aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw2@infradead.org>2006-05-03 08:30:35 -0400
committerDavid Woodhouse <dwmw2@infradead.org>2006-05-03 08:30:35 -0400
commitedc4ff7c08e9885c40e60c4fb39fa42cc91a0602 (patch)
treef375d28043dd4457428a841167dc93d760ba9a46 /fs
parentcbb9a56177b16294ed347ba7fcb1c66c8adb5dc4 (diff)
parente17df688f7064dae1417ce425dd1e4b71d24d63b (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig6
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/cifs/CHANGES6
-rw-r--r--fs/cifs/README8
-rw-r--r--fs/cifs/cifsfs.c99
-rw-r--r--fs/cifs/cifssmb.c2
-rw-r--r--fs/cifs/connect.c6
-rw-r--r--fs/cifs/dir.c18
-rw-r--r--fs/cifs/fcntl.c2
-rw-r--r--fs/cifs/file.c34
-rw-r--r--fs/cifs/inode.c6
-rw-r--r--fs/cifs/link.c6
-rw-r--r--fs/cifs/ntlmssp.c14
-rw-r--r--fs/cifs/readdir.c45
-rw-r--r--fs/cifs/xattr.c8
-rw-r--r--fs/compat.c24
-rw-r--r--fs/exec.c2
-rw-r--r--fs/ext3/ioctl.c18
-rw-r--r--fs/ext3/resize.c3
-rw-r--r--fs/fuse/dev.c35
-rw-r--r--fs/fuse/fuse_i.h12
-rw-r--r--fs/fuse/inode.c40
-rw-r--r--fs/lockd/svclock.c2
-rw-r--r--fs/locks.c9
-rw-r--r--fs/nfs/dir.c5
-rw-r--r--fs/nfs/direct.c8
-rw-r--r--fs/nfs/file.c5
-rw-r--r--fs/nfs/inode.c5
-rw-r--r--fs/nfs/nfs4proc.c10
-rw-r--r--fs/open.c24
-rw-r--r--fs/partitions/check.c5
-rw-r--r--fs/pipe.c190
-rw-r--r--fs/proc/base.c21
-rw-r--r--fs/reiserfs/xattr_acl.c5
-rw-r--r--fs/splice.c701
-rw-r--r--fs/stat.c2
36 files changed, 968 insertions, 420 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 2524629dc835..f9b5842c8d2d 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -842,6 +842,12 @@ config TMPFS
842config HUGETLBFS 842config HUGETLBFS
843 bool "HugeTLB file system support" 843 bool "HugeTLB file system support"
844 depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN 844 depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN
845 help
846 hugetlbfs is a filesystem backing for HugeTLB pages, based on
847 ramfs. For architectures that support it, say Y here and read
848 <file:Documentation/vm/hugetlbpage.txt> for details.
849
850 If unsure, say N.
845 851
846config HUGETLB_PAGE 852config HUGETLB_PAGE
847 def_bool HUGETLBFS 853 def_bool HUGETLBFS
diff --git a/fs/block_dev.c b/fs/block_dev.c
index af88c43043d5..f5958f413bd1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1104,6 +1104,8 @@ const struct file_operations def_blk_fops = {
1104 .readv = generic_file_readv, 1104 .readv = generic_file_readv,
1105 .writev = generic_file_write_nolock, 1105 .writev = generic_file_write_nolock,
1106 .sendfile = generic_file_sendfile, 1106 .sendfile = generic_file_sendfile,
1107 .splice_read = generic_file_splice_read,
1108 .splice_write = generic_file_splice_write,
1107}; 1109};
1108 1110
1109int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) 1111int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 8a2de038882e..1a27ecb46c9a 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,7 +1,11 @@
1Version 1.42 1Version 1.42
2------------ 2------------
3Fix slow oplock break when mounted to different servers at the same time and 3Fix slow oplock break when mounted to different servers at the same time and
4the tids match and we try to find matching fid on wrong server. 4the tids match and we try to find matching fid on wrong server. Fix read
5looping when signing required by server (2.6.16 kernel only). Fix readdir
6vs. rename race which could cause each to hang. Return . and .. even
7if server does not. Allow searches to skip first three entries and
8begin at any location. Fix oops in find_writeable_file.
5 9
6Version 1.41 10Version 1.41
7------------ 11------------
diff --git a/fs/cifs/README b/fs/cifs/README
index b2b4d0803761..0355003f4f0a 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -511,6 +511,14 @@ LinuxExtensionsEnabled If set to one then the client will attempt to
511 support and want to map the uid and gid fields 511 support and want to map the uid and gid fields
512 to values supplied at mount (rather than the 512 to values supplied at mount (rather than the
513 actual values, then set this to zero. (default 1) 513 actual values, then set this to zero. (default 1)
514Experimental When set to 1 used to enable certain experimental
515 features (currently enables multipage writes
516 when signing is enabled, the multipage write
517 performance enhancement was disabled when
518 signing turned on in case buffer was modified
519 just before it was sent, also this flag will
520 be used to use the new experimental sessionsetup
521 code).
514 522
515These experimental features and tracing can be enabled by changing flags in 523These experimental features and tracing can be enabled by changing flags in
516/proc/fs/cifs (after the cifs module has been installed or built into the 524/proc/fs/cifs (after the cifs module has been installed or built into the
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d4b713e5affb..c262d8874ce9 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -33,6 +33,7 @@
33#include <linux/vfs.h> 33#include <linux/vfs.h>
34#include <linux/mempool.h> 34#include <linux/mempool.h>
35#include <linux/delay.h> 35#include <linux/delay.h>
36#include <linux/kthread.h>
36#include "cifsfs.h" 37#include "cifsfs.h"
37#include "cifspdu.h" 38#include "cifspdu.h"
38#define DECLARE_GLOBALS_HERE 39#define DECLARE_GLOBALS_HERE
@@ -75,9 +76,6 @@ unsigned int cifs_max_pending = CIFS_MAX_REQ;
75module_param(cifs_max_pending, int, 0); 76module_param(cifs_max_pending, int, 0);
76MODULE_PARM_DESC(cifs_max_pending,"Simultaneous requests to server. Default: 50 Range: 2 to 256"); 77MODULE_PARM_DESC(cifs_max_pending,"Simultaneous requests to server. Default: 50 Range: 2 to 256");
77 78
78static DECLARE_COMPLETION(cifs_oplock_exited);
79static DECLARE_COMPLETION(cifs_dnotify_exited);
80
81extern mempool_t *cifs_sm_req_poolp; 79extern mempool_t *cifs_sm_req_poolp;
82extern mempool_t *cifs_req_poolp; 80extern mempool_t *cifs_req_poolp;
83extern mempool_t *cifs_mid_poolp; 81extern mempool_t *cifs_mid_poolp;
@@ -841,10 +839,6 @@ static int cifs_oplock_thread(void * dummyarg)
841 __u16 netfid; 839 __u16 netfid;
842 int rc; 840 int rc;
843 841
844 daemonize("cifsoplockd");
845 allow_signal(SIGTERM);
846
847 oplockThread = current;
848 do { 842 do {
849 if (try_to_freeze()) 843 if (try_to_freeze())
850 continue; 844 continue;
@@ -900,9 +894,9 @@ static int cifs_oplock_thread(void * dummyarg)
900 set_current_state(TASK_INTERRUPTIBLE); 894 set_current_state(TASK_INTERRUPTIBLE);
901 schedule_timeout(1); /* yield in case q were corrupt */ 895 schedule_timeout(1); /* yield in case q were corrupt */
902 } 896 }
903 } while(!signal_pending(current)); 897 } while (!kthread_should_stop());
904 oplockThread = NULL; 898
905 complete_and_exit (&cifs_oplock_exited, 0); 899 return 0;
906} 900}
907 901
908static int cifs_dnotify_thread(void * dummyarg) 902static int cifs_dnotify_thread(void * dummyarg)
@@ -910,10 +904,6 @@ static int cifs_dnotify_thread(void * dummyarg)
910 struct list_head *tmp; 904 struct list_head *tmp;
911 struct cifsSesInfo *ses; 905 struct cifsSesInfo *ses;
912 906
913 daemonize("cifsdnotifyd");
914 allow_signal(SIGTERM);
915
916 dnotifyThread = current;
917 do { 907 do {
918 if(try_to_freeze()) 908 if(try_to_freeze())
919 continue; 909 continue;
@@ -931,8 +921,9 @@ static int cifs_dnotify_thread(void * dummyarg)
931 wake_up_all(&ses->server->response_q); 921 wake_up_all(&ses->server->response_q);
932 } 922 }
933 read_unlock(&GlobalSMBSeslock); 923 read_unlock(&GlobalSMBSeslock);
934 } while(!signal_pending(current)); 924 } while (!kthread_should_stop());
935 complete_and_exit (&cifs_dnotify_exited, 0); 925
926 return 0;
936} 927}
937 928
938static int __init 929static int __init
@@ -982,32 +973,48 @@ init_cifs(void)
982 } 973 }
983 974
984 rc = cifs_init_inodecache(); 975 rc = cifs_init_inodecache();
985 if (!rc) { 976 if (rc)
986 rc = cifs_init_mids(); 977 goto out_clean_proc;
987 if (!rc) { 978
988 rc = cifs_init_request_bufs(); 979 rc = cifs_init_mids();
989 if (!rc) { 980 if (rc)
990 rc = register_filesystem(&cifs_fs_type); 981 goto out_destroy_inodecache;
991 if (!rc) { 982
992 rc = (int)kernel_thread(cifs_oplock_thread, NULL, 983 rc = cifs_init_request_bufs();
993 CLONE_FS | CLONE_FILES | CLONE_VM); 984 if (rc)
994 if(rc > 0) { 985 goto out_destroy_mids;
995 rc = (int)kernel_thread(cifs_dnotify_thread, NULL, 986
996 CLONE_FS | CLONE_FILES | CLONE_VM); 987 rc = register_filesystem(&cifs_fs_type);
997 if(rc > 0) 988 if (rc)
998 return 0; 989 goto out_destroy_request_bufs;
999 else 990
1000 cERROR(1,("error %d create dnotify thread", rc)); 991 oplockThread = kthread_run(cifs_oplock_thread, NULL, "cifsoplockd");
1001 } else { 992 if (IS_ERR(oplockThread)) {
1002 cERROR(1,("error %d create oplock thread",rc)); 993 rc = PTR_ERR(oplockThread);
1003 } 994 cERROR(1,("error %d create oplock thread", rc));
1004 } 995 goto out_unregister_filesystem;
1005 cifs_destroy_request_bufs();
1006 }
1007 cifs_destroy_mids();
1008 }
1009 cifs_destroy_inodecache();
1010 } 996 }
997
998 dnotifyThread = kthread_run(cifs_dnotify_thread, NULL, "cifsdnotifyd");
999 if (IS_ERR(dnotifyThread)) {
1000 rc = PTR_ERR(dnotifyThread);
1001 cERROR(1,("error %d create dnotify thread", rc));
1002 goto out_stop_oplock_thread;
1003 }
1004
1005 return 0;
1006
1007 out_stop_oplock_thread:
1008 kthread_stop(oplockThread);
1009 out_unregister_filesystem:
1010 unregister_filesystem(&cifs_fs_type);
1011 out_destroy_request_bufs:
1012 cifs_destroy_request_bufs();
1013 out_destroy_mids:
1014 cifs_destroy_mids();
1015 out_destroy_inodecache:
1016 cifs_destroy_inodecache();
1017 out_clean_proc:
1011#ifdef CONFIG_PROC_FS 1018#ifdef CONFIG_PROC_FS
1012 cifs_proc_clean(); 1019 cifs_proc_clean();
1013#endif 1020#endif
@@ -1025,14 +1032,8 @@ exit_cifs(void)
1025 cifs_destroy_inodecache(); 1032 cifs_destroy_inodecache();
1026 cifs_destroy_mids(); 1033 cifs_destroy_mids();
1027 cifs_destroy_request_bufs(); 1034 cifs_destroy_request_bufs();
1028 if(oplockThread) { 1035 kthread_stop(oplockThread);
1029 send_sig(SIGTERM, oplockThread, 1); 1036 kthread_stop(dnotifyThread);
1030 wait_for_completion(&cifs_oplock_exited);
1031 }
1032 if(dnotifyThread) {
1033 send_sig(SIGTERM, dnotifyThread, 1);
1034 wait_for_completion(&cifs_dnotify_exited);
1035 }
1036} 1037}
1037 1038
1038MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>"); 1039MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>");
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index d705500aa283..fd36892eda55 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -3119,7 +3119,7 @@ findFirstRetry:
3119 psrch_inf->endOfSearch = FALSE; 3119 psrch_inf->endOfSearch = FALSE;
3120 3120
3121 psrch_inf->entries_in_buffer = le16_to_cpu(parms->SearchCount); 3121 psrch_inf->entries_in_buffer = le16_to_cpu(parms->SearchCount);
3122 psrch_inf->index_of_last_entry = 3122 psrch_inf->index_of_last_entry = 2 /* skip . and .. */ +
3123 psrch_inf->entries_in_buffer; 3123 psrch_inf->entries_in_buffer;
3124 *pnetfid = parms->SearchHandle; 3124 *pnetfid = parms->SearchHandle;
3125 } else { 3125 } else {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0b86d5ca9014..d2ec806a4f32 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3447,6 +3447,12 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3447 pSesInfo->server->secMode, 3447 pSesInfo->server->secMode,
3448 pSesInfo->server->capabilities, 3448 pSesInfo->server->capabilities,
3449 pSesInfo->server->timeZone)); 3449 pSesInfo->server->timeZone));
3450#ifdef CONFIG_CIFS_EXPERIMENTAL
3451 if(experimEnabled > 1)
3452 rc = CIFS_SessSetup(xid, pSesInfo, CIFS_NTLM /* type */,
3453 &ntlmv2_flag, nls_info);
3454 else
3455#endif
3450 if (extended_security 3456 if (extended_security
3451 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) 3457 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
3452 && (pSesInfo->server->secType == NTLMSSP)) { 3458 && (pSesInfo->server->secType == NTLMSSP)) {
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 1d0ca3eaaca5..82315edc77d7 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -139,9 +139,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
139 cifs_sb = CIFS_SB(inode->i_sb); 139 cifs_sb = CIFS_SB(inode->i_sb);
140 pTcon = cifs_sb->tcon; 140 pTcon = cifs_sb->tcon;
141 141
142 mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
143 full_path = build_path_from_dentry(direntry); 142 full_path = build_path_from_dentry(direntry);
144 mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
145 if(full_path == NULL) { 143 if(full_path == NULL) {
146 FreeXid(xid); 144 FreeXid(xid);
147 return -ENOMEM; 145 return -ENOMEM;
@@ -316,9 +314,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
316 cifs_sb = CIFS_SB(inode->i_sb); 314 cifs_sb = CIFS_SB(inode->i_sb);
317 pTcon = cifs_sb->tcon; 315 pTcon = cifs_sb->tcon;
318 316
319 mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
320 full_path = build_path_from_dentry(direntry); 317 full_path = build_path_from_dentry(direntry);
321 mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
322 if(full_path == NULL) 318 if(full_path == NULL)
323 rc = -ENOMEM; 319 rc = -ENOMEM;
324 else if (pTcon->ses->capabilities & CAP_UNIX) { 320 else if (pTcon->ses->capabilities & CAP_UNIX) {
@@ -440,6 +436,20 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name
440 cifs_sb = CIFS_SB(parent_dir_inode->i_sb); 436 cifs_sb = CIFS_SB(parent_dir_inode->i_sb);
441 pTcon = cifs_sb->tcon; 437 pTcon = cifs_sb->tcon;
442 438
439 /*
440 * Don't allow the separator character in a path component.
441 * The VFS will not allow "/", but "\" is allowed by posix.
442 */
443 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) {
444 int i;
445 for (i = 0; i < direntry->d_name.len; i++)
446 if (direntry->d_name.name[i] == '\\') {
447 cFYI(1, ("Invalid file name"));
448 FreeXid(xid);
449 return ERR_PTR(-EINVAL);
450 }
451 }
452
443 /* can not grab the rename sem here since it would 453 /* can not grab the rename sem here since it would
444 deadlock in the cases (beginning of sys_rename itself) 454 deadlock in the cases (beginning of sys_rename itself)
445 in which we already have the sb rename sem */ 455 in which we already have the sb rename sem */
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
index ec4dfe9bf5ef..633a93811328 100644
--- a/fs/cifs/fcntl.c
+++ b/fs/cifs/fcntl.c
@@ -86,9 +86,7 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
86 cifs_sb = CIFS_SB(file->f_dentry->d_sb); 86 cifs_sb = CIFS_SB(file->f_dentry->d_sb);
87 pTcon = cifs_sb->tcon; 87 pTcon = cifs_sb->tcon;
88 88
89 mutex_lock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
90 full_path = build_path_from_dentry(file->f_dentry); 89 full_path = build_path_from_dentry(file->f_dentry);
91 mutex_unlock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
92 90
93 if(full_path == NULL) { 91 if(full_path == NULL) {
94 rc = -ENOMEM; 92 rc = -ENOMEM;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 5c497c529772..e152bf6afa60 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -203,9 +203,7 @@ int cifs_open(struct inode *inode, struct file *file)
203 } 203 }
204 } 204 }
205 205
206 mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
207 full_path = build_path_from_dentry(file->f_dentry); 206 full_path = build_path_from_dentry(file->f_dentry);
208 mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
209 if (full_path == NULL) { 207 if (full_path == NULL) {
210 FreeXid(xid); 208 FreeXid(xid);
211 return -ENOMEM; 209 return -ENOMEM;
@@ -906,8 +904,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
906 if (rc != 0) 904 if (rc != 0)
907 break; 905 break;
908 } 906 }
909 /* BB FIXME We can not sign across two buffers yet */ 907 if(experimEnabled || (pTcon->ses->server->secMode &
910 if((pTcon->ses->server->secMode &
911 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) == 0) { 908 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) == 0) {
912 struct kvec iov[2]; 909 struct kvec iov[2];
913 unsigned int len; 910 unsigned int len;
@@ -923,13 +920,13 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
923 *poffset, &bytes_written, 920 *poffset, &bytes_written,
924 iov, 1, long_op); 921 iov, 1, long_op);
925 } else 922 } else
926 /* BB FIXME fixup indentation of line below */ 923 rc = CIFSSMBWrite(xid, pTcon,
927 rc = CIFSSMBWrite(xid, pTcon, 924 open_file->netfid,
928 open_file->netfid, 925 min_t(const int, cifs_sb->wsize,
929 min_t(const int, cifs_sb->wsize, 926 write_size - total_written),
930 write_size - total_written), 927 *poffset, &bytes_written,
931 *poffset, &bytes_written, 928 write_data + total_written,
932 write_data + total_written, NULL, long_op); 929 NULL, long_op);
933 } 930 }
934 if (rc || (bytes_written == 0)) { 931 if (rc || (bytes_written == 0)) {
935 if (total_written) 932 if (total_written)
@@ -968,6 +965,16 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
968 struct cifsFileInfo *open_file; 965 struct cifsFileInfo *open_file;
969 int rc; 966 int rc;
970 967
968 /* Having a null inode here (because mapping->host was set to zero by
969 the VFS or MM) should not happen but we had reports of on oops (due to
970 it being zero) during stress testcases so we need to check for it */
971
972 if(cifs_inode == NULL) {
973 cERROR(1,("Null inode passed to cifs_writeable_file"));
974 dump_stack();
975 return NULL;
976 }
977
971 read_lock(&GlobalSMBSeslock); 978 read_lock(&GlobalSMBSeslock);
972 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 979 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
973 if (open_file->closePend) 980 if (open_file->closePend)
@@ -1093,12 +1100,11 @@ static int cifs_writepages(struct address_space *mapping,
1093 if (cifs_sb->wsize < PAGE_CACHE_SIZE) 1100 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1094 return generic_writepages(mapping, wbc); 1101 return generic_writepages(mapping, wbc);
1095 1102
1096 /* BB FIXME we do not have code to sign across multiple buffers yet,
1097 so go to older writepage style write which we can sign if needed */
1098 if((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server)) 1103 if((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server))
1099 if(cifs_sb->tcon->ses->server->secMode & 1104 if(cifs_sb->tcon->ses->server->secMode &
1100 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 1105 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1101 return generic_writepages(mapping, wbc); 1106 if(!experimEnabled)
1107 return generic_writepages(mapping, wbc);
1102 1108
1103 /* 1109 /*
1104 * BB: Is this meaningful for a non-block-device file system? 1110 * BB: Is this meaningful for a non-block-device file system?
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 957ddd1571c6..4093764ef461 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -722,9 +722,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
722 cifs_sb = CIFS_SB(inode->i_sb); 722 cifs_sb = CIFS_SB(inode->i_sb);
723 pTcon = cifs_sb->tcon; 723 pTcon = cifs_sb->tcon;
724 724
725 mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
726 full_path = build_path_from_dentry(direntry); 725 full_path = build_path_from_dentry(direntry);
727 mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
728 if (full_path == NULL) { 726 if (full_path == NULL) {
729 FreeXid(xid); 727 FreeXid(xid);
730 return -ENOMEM; 728 return -ENOMEM;
@@ -807,9 +805,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
807 cifs_sb = CIFS_SB(inode->i_sb); 805 cifs_sb = CIFS_SB(inode->i_sb);
808 pTcon = cifs_sb->tcon; 806 pTcon = cifs_sb->tcon;
809 807
810 mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
811 full_path = build_path_from_dentry(direntry); 808 full_path = build_path_from_dentry(direntry);
812 mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
813 if (full_path == NULL) { 809 if (full_path == NULL) {
814 FreeXid(xid); 810 FreeXid(xid);
815 return -ENOMEM; 811 return -ENOMEM;
@@ -1141,9 +1137,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1141 rc = 0; 1137 rc = 0;
1142 } 1138 }
1143 1139
1144 mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
1145 full_path = build_path_from_dentry(direntry); 1140 full_path = build_path_from_dentry(direntry);
1146 mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
1147 if (full_path == NULL) { 1141 if (full_path == NULL) {
1148 FreeXid(xid); 1142 FreeXid(xid);
1149 return -ENOMEM; 1143 return -ENOMEM;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 9562f5bba65c..2ec99f833142 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -48,10 +48,8 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
48/* No need to check for cross device links since server will do that 48/* No need to check for cross device links since server will do that
49 BB note DFS case in future though (when we may have to check) */ 49 BB note DFS case in future though (when we may have to check) */
50 50
51 mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
52 fromName = build_path_from_dentry(old_file); 51 fromName = build_path_from_dentry(old_file);
53 toName = build_path_from_dentry(direntry); 52 toName = build_path_from_dentry(direntry);
54 mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
55 if((fromName == NULL) || (toName == NULL)) { 53 if((fromName == NULL) || (toName == NULL)) {
56 rc = -ENOMEM; 54 rc = -ENOMEM;
57 goto cifs_hl_exit; 55 goto cifs_hl_exit;
@@ -103,9 +101,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
103 101
104 xid = GetXid(); 102 xid = GetXid();
105 103
106 mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
107 full_path = build_path_from_dentry(direntry); 104 full_path = build_path_from_dentry(direntry);
108 mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
109 105
110 if (!full_path) 106 if (!full_path)
111 goto out_no_free; 107 goto out_no_free;
@@ -164,9 +160,7 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
164 cifs_sb = CIFS_SB(inode->i_sb); 160 cifs_sb = CIFS_SB(inode->i_sb);
165 pTcon = cifs_sb->tcon; 161 pTcon = cifs_sb->tcon;
166 162
167 mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
168 full_path = build_path_from_dentry(direntry); 163 full_path = build_path_from_dentry(direntry);
169 mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
170 164
171 if(full_path == NULL) { 165 if(full_path == NULL) {
172 FreeXid(xid); 166 FreeXid(xid);
diff --git a/fs/cifs/ntlmssp.c b/fs/cifs/ntlmssp.c
index 78866f925747..115359cc7a32 100644
--- a/fs/cifs/ntlmssp.c
+++ b/fs/cifs/ntlmssp.c
@@ -121,6 +121,20 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, const int type,
121 } 121 }
122 122
123 123
124 /* copy session key */
125
126 /* if Unicode, align strings to two byte boundary */
127
128 /* copy user name */ /* BB Do we need to special case null user name? */
129
130 /* copy domain name */
131
132 /* copy Linux version */
133
134 /* copy network operating system name */
135
136 /* update bcc and smb buffer length */
137
124/* rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buf_type, 0); */ 138/* rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buf_type, 0); */
125 /* SMB request buf freed in SendReceive2 */ 139 /* SMB request buf freed in SendReceive2 */
126 140
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 2f6e2825571e..b689c5035124 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -404,9 +404,7 @@ static int initiate_cifs_search(const int xid, struct file *file)
404 if(pTcon == NULL) 404 if(pTcon == NULL)
405 return -EINVAL; 405 return -EINVAL;
406 406
407 mutex_lock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
408 full_path = build_path_from_dentry(file->f_dentry); 407 full_path = build_path_from_dentry(file->f_dentry);
409 mutex_unlock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
410 408
411 if(full_path == NULL) { 409 if(full_path == NULL) {
412 return -ENOMEM; 410 return -ENOMEM;
@@ -592,6 +590,13 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
592 first_entry_in_buffer = 590 first_entry_in_buffer =
593 cifsFile->srch_inf.index_of_last_entry - 591 cifsFile->srch_inf.index_of_last_entry -
594 cifsFile->srch_inf.entries_in_buffer; 592 cifsFile->srch_inf.entries_in_buffer;
593
594 /* if first entry in buf is zero then is first buffer
595 in search response data which means it is likely . and ..
596 will be in this buffer, although some servers do not return
597 . and .. for the root of a drive and for those we need
598 to start two entries earlier */
599
595/* dump_cifs_file_struct(file, "In fce ");*/ 600/* dump_cifs_file_struct(file, "In fce ");*/
596 if(((index_to_find < cifsFile->srch_inf.index_of_last_entry) && 601 if(((index_to_find < cifsFile->srch_inf.index_of_last_entry) &&
597 is_dir_changed(file)) || 602 is_dir_changed(file)) ||
@@ -634,23 +639,14 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
634 char * end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + 639 char * end_of_smb = cifsFile->srch_inf.ntwrk_buf_start +
635 smbCalcSize((struct smb_hdr *) 640 smbCalcSize((struct smb_hdr *)
636 cifsFile->srch_inf.ntwrk_buf_start); 641 cifsFile->srch_inf.ntwrk_buf_start);
642
643 current_entry = cifsFile->srch_inf.srch_entries_start;
637 first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry 644 first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry
638 - cifsFile->srch_inf.entries_in_buffer; 645 - cifsFile->srch_inf.entries_in_buffer;
639 pos_in_buf = index_to_find - first_entry_in_buffer; 646 pos_in_buf = index_to_find - first_entry_in_buffer;
640 cFYI(1,("found entry - pos_in_buf %d",pos_in_buf)); 647 cFYI(1,("found entry - pos_in_buf %d",pos_in_buf));
641 current_entry = cifsFile->srch_inf.srch_entries_start;
642 for(i=0;(i<(pos_in_buf)) && (current_entry != NULL);i++) { 648 for(i=0;(i<(pos_in_buf)) && (current_entry != NULL);i++) {
643 /* go entry by entry figuring out which is first */ 649 /* go entry by entry figuring out which is first */
644 /* if( . or ..)
645 skip */
646 rc = cifs_entry_is_dot(current_entry,cifsFile);
647 if(rc == 1) /* is . or .. so skip */ {
648 cFYI(1,("Entry is .")); /* BB removeme BB */
649 /* continue; */
650 } else if (rc == 2 ) {
651 cFYI(1,("Entry is ..")); /* BB removeme BB */
652 /* continue; */
653 }
654 current_entry = nxt_dir_entry(current_entry,end_of_smb); 650 current_entry = nxt_dir_entry(current_entry,end_of_smb);
655 } 651 }
656 if((current_entry == NULL) && (i < pos_in_buf)) { 652 if((current_entry == NULL) && (i < pos_in_buf)) {
@@ -770,6 +766,11 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
770 if(file->f_dentry == NULL) 766 if(file->f_dentry == NULL)
771 return -ENOENT; 767 return -ENOENT;
772 768
769 rc = cifs_entry_is_dot(pfindEntry,pCifsF);
770 /* skip . and .. since we added them first */
771 if(rc != 0)
772 return 0;
773
773 cifs_sb = CIFS_SB(file->f_dentry->d_sb); 774 cifs_sb = CIFS_SB(file->f_dentry->d_sb);
774 775
775 qstring.name = scratch_buf; 776 qstring.name = scratch_buf;
@@ -898,22 +899,22 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
898 899
899 switch ((int) file->f_pos) { 900 switch ((int) file->f_pos) {
900 case 0: 901 case 0:
901 /*if (filldir(direntry, ".", 1, file->f_pos, 902 if (filldir(direntry, ".", 1, file->f_pos,
902 file->f_dentry->d_inode->i_ino, DT_DIR) < 0) { 903 file->f_dentry->d_inode->i_ino, DT_DIR) < 0) {
903 cERROR(1, ("Filldir for current dir failed ")); 904 cERROR(1, ("Filldir for current dir failed"));
904 rc = -ENOMEM; 905 rc = -ENOMEM;
905 break; 906 break;
906 } 907 }
907 file->f_pos++; */ 908 file->f_pos++;
908 case 1: 909 case 1:
909 /* if (filldir(direntry, "..", 2, file->f_pos, 910 if (filldir(direntry, "..", 2, file->f_pos,
910 file->f_dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) { 911 file->f_dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) {
911 cERROR(1, ("Filldir for parent dir failed ")); 912 cERROR(1, ("Filldir for parent dir failed "));
912 rc = -ENOMEM; 913 rc = -ENOMEM;
913 break; 914 break;
914 } 915 }
915 file->f_pos++; */ 916 file->f_pos++;
916 case 2: 917 default:
917 /* 1) If search is active, 918 /* 1) If search is active,
918 is in current search buffer? 919 is in current search buffer?
919 if it before then restart search 920 if it before then restart search
@@ -927,7 +928,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
927 return rc; 928 return rc;
928 } 929 }
929 } 930 }
930 default:
931 if(file->private_data == NULL) { 931 if(file->private_data == NULL) {
932 rc = -EINVAL; 932 rc = -EINVAL;
933 FreeXid(xid); 933 FreeXid(xid);
@@ -947,8 +947,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
947 kfree(cifsFile->search_resume_name); 947 kfree(cifsFile->search_resume_name);
948 cifsFile->search_resume_name = NULL; */ 948 cifsFile->search_resume_name = NULL; */
949 949
950 /* BB account for . and .. in f_pos as special case */
951
952 rc = find_cifs_entry(xid,pTcon, file, 950 rc = find_cifs_entry(xid,pTcon, file,
953 &current_entry,&num_to_fill); 951 &current_entry,&num_to_fill);
954 if(rc) { 952 if(rc) {
@@ -977,7 +975,8 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
977 num_to_fill, i)); 975 num_to_fill, i));
978 break; 976 break;
979 } 977 }
980 978 /* if buggy server returns . and .. late do
979 we want to check for that here? */
981 rc = cifs_filldir(current_entry, file, 980 rc = cifs_filldir(current_entry, file,
982 filldir, direntry,tmp_buf); 981 filldir, direntry,tmp_buf);
983 file->f_pos++; 982 file->f_pos++;
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 3938444d87b2..7754d641775e 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -62,9 +62,7 @@ int cifs_removexattr(struct dentry * direntry, const char * ea_name)
62 cifs_sb = CIFS_SB(sb); 62 cifs_sb = CIFS_SB(sb);
63 pTcon = cifs_sb->tcon; 63 pTcon = cifs_sb->tcon;
64 64
65 mutex_lock(&sb->s_vfs_rename_mutex);
66 full_path = build_path_from_dentry(direntry); 65 full_path = build_path_from_dentry(direntry);
67 mutex_unlock(&sb->s_vfs_rename_mutex);
68 if(full_path == NULL) { 66 if(full_path == NULL) {
69 FreeXid(xid); 67 FreeXid(xid);
70 return -ENOMEM; 68 return -ENOMEM;
@@ -116,9 +114,7 @@ int cifs_setxattr(struct dentry * direntry, const char * ea_name,
116 cifs_sb = CIFS_SB(sb); 114 cifs_sb = CIFS_SB(sb);
117 pTcon = cifs_sb->tcon; 115 pTcon = cifs_sb->tcon;
118 116
119 mutex_lock(&sb->s_vfs_rename_mutex);
120 full_path = build_path_from_dentry(direntry); 117 full_path = build_path_from_dentry(direntry);
121 mutex_unlock(&sb->s_vfs_rename_mutex);
122 if(full_path == NULL) { 118 if(full_path == NULL) {
123 FreeXid(xid); 119 FreeXid(xid);
124 return -ENOMEM; 120 return -ENOMEM;
@@ -223,9 +219,7 @@ ssize_t cifs_getxattr(struct dentry * direntry, const char * ea_name,
223 cifs_sb = CIFS_SB(sb); 219 cifs_sb = CIFS_SB(sb);
224 pTcon = cifs_sb->tcon; 220 pTcon = cifs_sb->tcon;
225 221
226 mutex_lock(&sb->s_vfs_rename_mutex);
227 full_path = build_path_from_dentry(direntry); 222 full_path = build_path_from_dentry(direntry);
228 mutex_unlock(&sb->s_vfs_rename_mutex);
229 if(full_path == NULL) { 223 if(full_path == NULL) {
230 FreeXid(xid); 224 FreeXid(xid);
231 return -ENOMEM; 225 return -ENOMEM;
@@ -341,9 +335,7 @@ ssize_t cifs_listxattr(struct dentry * direntry, char * data, size_t buf_size)
341 cifs_sb = CIFS_SB(sb); 335 cifs_sb = CIFS_SB(sb);
342 pTcon = cifs_sb->tcon; 336 pTcon = cifs_sb->tcon;
343 337
344 mutex_lock(&sb->s_vfs_rename_mutex);
345 full_path = build_path_from_dentry(direntry); 338 full_path = build_path_from_dentry(direntry);
346 mutex_unlock(&sb->s_vfs_rename_mutex);
347 if(full_path == NULL) { 339 if(full_path == NULL) {
348 FreeXid(xid); 340 FreeXid(xid);
349 return -ENOMEM; 341 return -ENOMEM;
diff --git a/fs/compat.c b/fs/compat.c
index 7f8e26ea427c..3f3e8f4d43d6 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1217,6 +1217,10 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
1217 if (ret < 0) 1217 if (ret < 0)
1218 goto out; 1218 goto out;
1219 1219
1220 ret = security_file_permission(file, type == READ ? MAY_READ:MAY_WRITE);
1221 if (ret)
1222 goto out;
1223
1220 fnv = NULL; 1224 fnv = NULL;
1221 if (type == READ) { 1225 if (type == READ) {
1222 fn = file->f_op->read; 1226 fn = file->f_op->read;
@@ -1313,6 +1317,26 @@ out:
1313 return ret; 1317 return ret;
1314} 1318}
1315 1319
1320asmlinkage long
1321compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
1322 unsigned int nr_segs, unsigned int flags)
1323{
1324 unsigned i;
1325 struct iovec *iov;
1326 if (nr_segs >= UIO_MAXIOV)
1327 return -EINVAL;
1328 iov = compat_alloc_user_space(nr_segs * sizeof(struct iovec));
1329 for (i = 0; i < nr_segs; i++) {
1330 struct compat_iovec v;
1331 if (get_user(v.iov_base, &iov32[i].iov_base) ||
1332 get_user(v.iov_len, &iov32[i].iov_len) ||
1333 put_user(compat_ptr(v.iov_base), &iov[i].iov_base) ||
1334 put_user(v.iov_len, &iov[i].iov_len))
1335 return -EFAULT;
1336 }
1337 return sys_vmsplice(fd, iov, nr_segs, flags);
1338}
1339
1316/* 1340/*
1317 * Exactly like fs/open.c:sys_open(), except that it doesn't set the 1341 * Exactly like fs/open.c:sys_open(), except that it doesn't set the
1318 * O_LARGEFILE flag. 1342 * O_LARGEFILE flag.
diff --git a/fs/exec.c b/fs/exec.c
index 4121bb559739..3a79d97ac234 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -712,7 +712,7 @@ static int de_thread(struct task_struct *tsk)
712 attach_pid(current, PIDTYPE_PID, current->pid); 712 attach_pid(current, PIDTYPE_PID, current->pid);
713 attach_pid(current, PIDTYPE_PGID, current->signal->pgrp); 713 attach_pid(current, PIDTYPE_PGID, current->signal->pgrp);
714 attach_pid(current, PIDTYPE_SID, current->signal->session); 714 attach_pid(current, PIDTYPE_SID, current->signal->session);
715 list_add_tail(&current->tasks, &init_task.tasks); 715 list_add_tail_rcu(&current->tasks, &init_task.tasks);
716 716
717 current->group_leader = current; 717 current->group_leader = current;
718 leader->group_leader = current; 718 leader->group_leader = current;
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index aaf1da17b6d4..8c22aa9a7fbb 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -48,6 +48,7 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
48 if (!S_ISDIR(inode->i_mode)) 48 if (!S_ISDIR(inode->i_mode))
49 flags &= ~EXT3_DIRSYNC_FL; 49 flags &= ~EXT3_DIRSYNC_FL;
50 50
51 mutex_lock(&inode->i_mutex);
51 oldflags = ei->i_flags; 52 oldflags = ei->i_flags;
52 53
53 /* The JOURNAL_DATA flag is modifiable only by root */ 54 /* The JOURNAL_DATA flag is modifiable only by root */
@@ -60,8 +61,10 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
60 * This test looks nicer. Thanks to Pauline Middelink 61 * This test looks nicer. Thanks to Pauline Middelink
61 */ 62 */
62 if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { 63 if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
63 if (!capable(CAP_LINUX_IMMUTABLE)) 64 if (!capable(CAP_LINUX_IMMUTABLE)) {
65 mutex_unlock(&inode->i_mutex);
64 return -EPERM; 66 return -EPERM;
67 }
65 } 68 }
66 69
67 /* 70 /*
@@ -69,14 +72,18 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
69 * the relevant capability. 72 * the relevant capability.
70 */ 73 */
71 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) { 74 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
72 if (!capable(CAP_SYS_RESOURCE)) 75 if (!capable(CAP_SYS_RESOURCE)) {
76 mutex_unlock(&inode->i_mutex);
73 return -EPERM; 77 return -EPERM;
78 }
74 } 79 }
75 80
76 81
77 handle = ext3_journal_start(inode, 1); 82 handle = ext3_journal_start(inode, 1);
78 if (IS_ERR(handle)) 83 if (IS_ERR(handle)) {
84 mutex_unlock(&inode->i_mutex);
79 return PTR_ERR(handle); 85 return PTR_ERR(handle);
86 }
80 if (IS_SYNC(inode)) 87 if (IS_SYNC(inode))
81 handle->h_sync = 1; 88 handle->h_sync = 1;
82 err = ext3_reserve_inode_write(handle, inode, &iloc); 89 err = ext3_reserve_inode_write(handle, inode, &iloc);
@@ -93,11 +100,14 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
93 err = ext3_mark_iloc_dirty(handle, inode, &iloc); 100 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
94flags_err: 101flags_err:
95 ext3_journal_stop(handle); 102 ext3_journal_stop(handle);
96 if (err) 103 if (err) {
104 mutex_unlock(&inode->i_mutex);
97 return err; 105 return err;
106 }
98 107
99 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) 108 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL))
100 err = ext3_change_inode_journal_flag(inode, jflag); 109 err = ext3_change_inode_journal_flag(inode, jflag);
110 mutex_unlock(&inode->i_mutex);
101 return err; 111 return err;
102 } 112 }
103 case EXT3_IOC_GETVERSION: 113 case EXT3_IOC_GETVERSION:
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 14f5f6ea3e72..8aac5334680d 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -213,7 +213,7 @@ static int setup_new_group_blocks(struct super_block *sb,
213 goto exit_bh; 213 goto exit_bh;
214 } 214 }
215 lock_buffer(bh); 215 lock_buffer(bh);
216 memcpy(gdb->b_data, sbi->s_group_desc[i], bh->b_size); 216 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size);
217 set_buffer_uptodate(gdb); 217 set_buffer_uptodate(gdb);
218 unlock_buffer(bh); 218 unlock_buffer(bh);
219 ext3_journal_dirty_metadata(handle, gdb); 219 ext3_journal_dirty_metadata(handle, gdb);
@@ -767,6 +767,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
767 if (input->group != sbi->s_groups_count) { 767 if (input->group != sbi->s_groups_count) {
768 ext3_warning(sb, __FUNCTION__, 768 ext3_warning(sb, __FUNCTION__,
769 "multiple resizers run on filesystem!"); 769 "multiple resizers run on filesystem!");
770 unlock_super(sb);
770 err = -EBUSY; 771 err = -EBUSY;
771 goto exit_journal; 772 goto exit_journal;
772 } 773 }
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index cc750c68fe70..104a62dadb94 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -128,14 +128,24 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
128 } 128 }
129} 129}
130 130
131void fuse_remove_background(struct fuse_conn *fc, struct fuse_req *req) 131/*
132 * Called with sbput_sem held for read (request_end) or write
133 * (fuse_put_super). By the time fuse_put_super() is finished, all
134 * inodes belonging to background requests must be released, so the
135 * iputs have to be done within the locked region.
136 */
137void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req)
132{ 138{
133 list_del_init(&req->bg_entry); 139 iput(req->inode);
140 iput(req->inode2);
141 spin_lock(&fc->lock);
142 list_del(&req->bg_entry);
134 if (fc->num_background == FUSE_MAX_BACKGROUND) { 143 if (fc->num_background == FUSE_MAX_BACKGROUND) {
135 fc->blocked = 0; 144 fc->blocked = 0;
136 wake_up_all(&fc->blocked_waitq); 145 wake_up_all(&fc->blocked_waitq);
137 } 146 }
138 fc->num_background--; 147 fc->num_background--;
148 spin_unlock(&fc->lock);
139} 149}
140 150
141/* 151/*
@@ -165,27 +175,22 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
165 wake_up(&req->waitq); 175 wake_up(&req->waitq);
166 fuse_put_request(fc, req); 176 fuse_put_request(fc, req);
167 } else { 177 } else {
168 struct inode *inode = req->inode;
169 struct inode *inode2 = req->inode2;
170 struct file *file = req->file;
171 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; 178 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
172 req->end = NULL; 179 req->end = NULL;
173 req->inode = NULL;
174 req->inode2 = NULL;
175 req->file = NULL;
176 if (!list_empty(&req->bg_entry))
177 fuse_remove_background(fc, req);
178 spin_unlock(&fc->lock); 180 spin_unlock(&fc->lock);
181 down_read(&fc->sbput_sem);
182 if (fc->mounted)
183 fuse_release_background(fc, req);
184 up_read(&fc->sbput_sem);
185
186 /* fput must go outside sbput_sem, otherwise it can deadlock */
187 if (req->file)
188 fput(req->file);
179 189
180 if (end) 190 if (end)
181 end(fc, req); 191 end(fc, req);
182 else 192 else
183 fuse_put_request(fc, req); 193 fuse_put_request(fc, req);
184
185 if (file)
186 fput(file);
187 iput(inode);
188 iput(inode2);
189 } 194 }
190} 195}
191 196
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 59661c481d9d..0474202cb5dc 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -258,9 +258,15 @@ struct fuse_conn {
258 /** waitq for blocked connection */ 258 /** waitq for blocked connection */
259 wait_queue_head_t blocked_waitq; 259 wait_queue_head_t blocked_waitq;
260 260
261 /** RW semaphore for exclusion with fuse_put_super() */
262 struct rw_semaphore sbput_sem;
263
261 /** The next unique request id */ 264 /** The next unique request id */
262 u64 reqctr; 265 u64 reqctr;
263 266
267 /** Mount is active */
268 unsigned mounted;
269
264 /** Connection established, cleared on umount, connection 270 /** Connection established, cleared on umount, connection
265 abort and device release */ 271 abort and device release */
266 unsigned connected; 272 unsigned connected;
@@ -471,11 +477,11 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
471void request_send_background(struct fuse_conn *fc, struct fuse_req *req); 477void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
472 478
473/** 479/**
474 * Remove request from the the background list 480 * Release inodes and file associated with background request
475 */ 481 */
476void fuse_remove_background(struct fuse_conn *fc, struct fuse_req *req); 482void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req);
477 483
478/** Abort all requests */ 484/* Abort all requests */
479void fuse_abort_conn(struct fuse_conn *fc); 485void fuse_abort_conn(struct fuse_conn *fc);
480 486
481/** 487/**
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 43a6fc0db8a7..7627022446b2 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -204,26 +204,17 @@ static void fuse_put_super(struct super_block *sb)
204{ 204{
205 struct fuse_conn *fc = get_fuse_conn_super(sb); 205 struct fuse_conn *fc = get_fuse_conn_super(sb);
206 206
207 down_write(&fc->sbput_sem);
208 while (!list_empty(&fc->background))
209 fuse_release_background(fc,
210 list_entry(fc->background.next,
211 struct fuse_req, bg_entry));
212
207 spin_lock(&fc->lock); 213 spin_lock(&fc->lock);
214 fc->mounted = 0;
208 fc->connected = 0; 215 fc->connected = 0;
209 while (!list_empty(&fc->background)) {
210 struct fuse_req *req = list_entry(fc->background.next,
211 struct fuse_req, bg_entry);
212 struct inode *inode = req->inode;
213 struct inode *inode2 = req->inode2;
214
215 /* File would hold a reference to vfsmount */
216 BUG_ON(req->file);
217 req->inode = NULL;
218 req->inode2 = NULL;
219 fuse_remove_background(fc, req);
220
221 spin_unlock(&fc->lock);
222 iput(inode);
223 iput(inode2);
224 spin_lock(&fc->lock);
225 }
226 spin_unlock(&fc->lock); 216 spin_unlock(&fc->lock);
217 up_write(&fc->sbput_sem);
227 /* Flush all readers on this fs */ 218 /* Flush all readers on this fs */
228 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 219 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
229 wake_up_all(&fc->waitq); 220 wake_up_all(&fc->waitq);
@@ -395,6 +386,7 @@ static struct fuse_conn *new_conn(void)
395 INIT_LIST_HEAD(&fc->processing); 386 INIT_LIST_HEAD(&fc->processing);
396 INIT_LIST_HEAD(&fc->io); 387 INIT_LIST_HEAD(&fc->io);
397 INIT_LIST_HEAD(&fc->background); 388 INIT_LIST_HEAD(&fc->background);
389 init_rwsem(&fc->sbput_sem);
398 kobj_set_kset_s(fc, connections_subsys); 390 kobj_set_kset_s(fc, connections_subsys);
399 kobject_init(&fc->kobj); 391 kobject_init(&fc->kobj);
400 atomic_set(&fc->num_waiting, 0); 392 atomic_set(&fc->num_waiting, 0);
@@ -508,11 +500,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
508 if (file->f_op != &fuse_dev_operations) 500 if (file->f_op != &fuse_dev_operations)
509 return -EINVAL; 501 return -EINVAL;
510 502
511 /* Setting file->private_data can't race with other mount()
512 instances, since BKL is held for ->get_sb() */
513 if (file->private_data)
514 return -EINVAL;
515
516 fc = new_conn(); 503 fc = new_conn();
517 if (!fc) 504 if (!fc)
518 return -ENOMEM; 505 return -ENOMEM;
@@ -548,7 +535,14 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
548 if (err) 535 if (err)
549 goto err_free_req; 536 goto err_free_req;
550 537
538 /* Setting file->private_data can't race with other mount()
539 instances, since BKL is held for ->get_sb() */
540 err = -EINVAL;
541 if (file->private_data)
542 goto err_kobject_del;
543
551 sb->s_root = root_dentry; 544 sb->s_root = root_dentry;
545 fc->mounted = 1;
552 fc->connected = 1; 546 fc->connected = 1;
553 kobject_get(&fc->kobj); 547 kobject_get(&fc->kobj);
554 file->private_data = fc; 548 file->private_data = fc;
@@ -563,6 +557,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
563 557
564 return 0; 558 return 0;
565 559
560 err_kobject_del:
561 kobject_del(&fc->kobj);
566 err_free_req: 562 err_free_req:
567 fuse_request_free(init_req); 563 fuse_request_free(init_req);
568 err_put_root: 564 err_put_root:
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index d2b66bad7d50..3ef739120dff 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -650,7 +650,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
650 svc_wake_up(block->b_daemon); 650 svc_wake_up(block->b_daemon);
651} 651}
652 652
653void nlmsvc_grant_release(void *data) 653static void nlmsvc_grant_release(void *data)
654{ 654{
655 struct nlm_rqst *call = data; 655 struct nlm_rqst *call = data;
656 656
diff --git a/fs/locks.c b/fs/locks.c
index dda83d6cd48b..efad798824dc 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2230,7 +2230,12 @@ void steal_locks(fl_owner_t from)
2230 2230
2231 lock_kernel(); 2231 lock_kernel();
2232 j = 0; 2232 j = 0;
2233 rcu_read_lock(); 2233
2234 /*
2235 * We are not taking a ref to the file structures, so
2236 * we need to acquire ->file_lock.
2237 */
2238 spin_lock(&files->file_lock);
2234 fdt = files_fdtable(files); 2239 fdt = files_fdtable(files);
2235 for (;;) { 2240 for (;;) {
2236 unsigned long set; 2241 unsigned long set;
@@ -2248,7 +2253,7 @@ void steal_locks(fl_owner_t from)
2248 set >>= 1; 2253 set >>= 1;
2249 } 2254 }
2250 } 2255 }
2251 rcu_read_unlock(); 2256 spin_unlock(&files->file_lock);
2252 unlock_kernel(); 2257 unlock_kernel();
2253} 2258}
2254EXPORT_SYMBOL(steal_locks); 2259EXPORT_SYMBOL(steal_locks);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a23f34894167..cae74dd4c7f5 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -128,15 +128,14 @@ struct inode_operations nfs4_dir_inode_operations = {
128static int 128static int
129nfs_opendir(struct inode *inode, struct file *filp) 129nfs_opendir(struct inode *inode, struct file *filp)
130{ 130{
131 int res = 0; 131 int res;
132 132
133 dfprintk(VFS, "NFS: opendir(%s/%ld)\n", 133 dfprintk(VFS, "NFS: opendir(%s/%ld)\n",
134 inode->i_sb->s_id, inode->i_ino); 134 inode->i_sb->s_id, inode->i_ino);
135 135
136 lock_kernel(); 136 lock_kernel();
137 /* Call generic open code in order to cache credentials */ 137 /* Call generic open code in order to cache credentials */
138 if (!res) 138 res = nfs_open(inode, filp);
139 res = nfs_open(inode, filp);
140 unlock_kernel(); 139 unlock_kernel();
141 return res; 140 return res;
142} 141}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 0f583cb16ddb..3c72b0c07283 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -112,10 +112,9 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
112 */ 112 */
113ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) 113ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
114{ 114{
115 struct dentry *dentry = iocb->ki_filp->f_dentry;
116
117 dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", 115 dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
118 dentry->d_name.name, (long long) pos, nr_segs); 116 iocb->ki_filp->f_dentry->d_name.name,
117 (long long) pos, nr_segs);
119 118
120 return -EINVAL; 119 return -EINVAL;
121} 120}
@@ -468,7 +467,6 @@ static const struct rpc_call_ops nfs_commit_direct_ops = {
468static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) 467static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
469{ 468{
470 struct nfs_write_data *data = dreq->commit_data; 469 struct nfs_write_data *data = dreq->commit_data;
471 struct rpc_task *task = &data->task;
472 470
473 data->inode = dreq->inode; 471 data->inode = dreq->inode;
474 data->cred = dreq->ctx->cred; 472 data->cred = dreq->ctx->cred;
@@ -489,7 +487,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
489 /* Note: task.tk_ops->rpc_release will free dreq->commit_data */ 487 /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
490 dreq->commit_data = NULL; 488 dreq->commit_data = NULL;
491 489
492 dprintk("NFS: %5u initiated commit call\n", task->tk_pid); 490 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
493 491
494 lock_kernel(); 492 lock_kernel();
495 rpc_execute(&data->task); 493 rpc_execute(&data->task);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index f1df2c8d9259..fade02c15e6e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -534,10 +534,9 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
534 */ 534 */
535static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) 535static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
536{ 536{
537 struct inode * inode = filp->f_mapping->host;
538
539 dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n", 537 dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n",
540 inode->i_sb->s_id, inode->i_ino, 538 filp->f_dentry->d_inode->i_sb->s_id,
539 filp->f_dentry->d_inode->i_ino,
541 fl->fl_type, fl->fl_flags); 540 fl->fl_type, fl->fl_flags);
542 541
543 /* 542 /*
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 2f7656b911b6..d0b991a92327 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -700,12 +700,9 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
700 /* 700 /*
701 * Display superblock I/O counters 701 * Display superblock I/O counters
702 */ 702 */
703 for (cpu = 0; cpu < NR_CPUS; cpu++) { 703 for_each_possible_cpu(cpu) {
704 struct nfs_iostats *stats; 704 struct nfs_iostats *stats;
705 705
706 if (!cpu_possible(cpu))
707 continue;
708
709 preempt_disable(); 706 preempt_disable();
710 stats = per_cpu_ptr(nfss->io_stats, cpu); 707 stats = per_cpu_ptr(nfss->io_stats, cpu);
711 708
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 47ece1dd3c67..d86c0db7b1e8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1218,7 +1218,7 @@ out:
1218 return status; 1218 return status;
1219} 1219}
1220 1220
1221static void nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state) 1221static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state)
1222{ 1222{
1223 struct file *filp; 1223 struct file *filp;
1224 1224
@@ -1227,8 +1227,10 @@ static void nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, st
1227 struct nfs_open_context *ctx; 1227 struct nfs_open_context *ctx;
1228 ctx = (struct nfs_open_context *)filp->private_data; 1228 ctx = (struct nfs_open_context *)filp->private_data;
1229 ctx->state = state; 1229 ctx->state = state;
1230 } else 1230 return 0;
1231 nfs4_close_state(state, nd->intent.open.flags); 1231 }
1232 nfs4_close_state(state, nd->intent.open.flags);
1233 return PTR_ERR(filp);
1232} 1234}
1233 1235
1234struct dentry * 1236struct dentry *
@@ -1835,7 +1837,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1835 nfs_setattr_update_inode(state->inode, sattr); 1837 nfs_setattr_update_inode(state->inode, sattr);
1836 } 1838 }
1837 if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN)) 1839 if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN))
1838 nfs4_intent_set_file(nd, dentry, state); 1840 status = nfs4_intent_set_file(nd, dentry, state);
1839 else 1841 else
1840 nfs4_close_state(state, flags); 1842 nfs4_close_state(state, flags);
1841out: 1843out:
diff --git a/fs/open.c b/fs/open.c
index c32c89d6d8db..53ec28c36777 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -331,7 +331,10 @@ out:
331 331
332asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length) 332asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length)
333{ 333{
334 return do_sys_ftruncate(fd, length, 1); 334 long ret = do_sys_ftruncate(fd, length, 1);
335 /* avoid REGPARM breakage on x86: */
336 prevent_tail_call(ret);
337 return ret;
335} 338}
336 339
337/* LFS versions of truncate are only needed on 32 bit machines */ 340/* LFS versions of truncate are only needed on 32 bit machines */
@@ -343,7 +346,10 @@ asmlinkage long sys_truncate64(const char __user * path, loff_t length)
343 346
344asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length) 347asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
345{ 348{
346 return do_sys_ftruncate(fd, length, 0); 349 long ret = do_sys_ftruncate(fd, length, 0);
350 /* avoid REGPARM breakage on x86: */
351 prevent_tail_call(ret);
352 return ret;
347} 353}
348#endif 354#endif
349 355
@@ -1093,20 +1099,30 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
1093 1099
1094asmlinkage long sys_open(const char __user *filename, int flags, int mode) 1100asmlinkage long sys_open(const char __user *filename, int flags, int mode)
1095{ 1101{
1102 long ret;
1103
1096 if (force_o_largefile()) 1104 if (force_o_largefile())
1097 flags |= O_LARGEFILE; 1105 flags |= O_LARGEFILE;
1098 1106
1099 return do_sys_open(AT_FDCWD, filename, flags, mode); 1107 ret = do_sys_open(AT_FDCWD, filename, flags, mode);
1108 /* avoid REGPARM breakage on x86: */
1109 prevent_tail_call(ret);
1110 return ret;
1100} 1111}
1101EXPORT_SYMBOL_GPL(sys_open); 1112EXPORT_SYMBOL_GPL(sys_open);
1102 1113
1103asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, 1114asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
1104 int mode) 1115 int mode)
1105{ 1116{
1117 long ret;
1118
1106 if (force_o_largefile()) 1119 if (force_o_largefile())
1107 flags |= O_LARGEFILE; 1120 flags |= O_LARGEFILE;
1108 1121
1109 return do_sys_open(dfd, filename, flags, mode); 1122 ret = do_sys_open(dfd, filename, flags, mode);
1123 /* avoid REGPARM breakage on x86: */
1124 prevent_tail_call(ret);
1125 return ret;
1110} 1126}
1111EXPORT_SYMBOL_GPL(sys_openat); 1127EXPORT_SYMBOL_GPL(sys_openat);
1112 1128
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index f3b6af071722..45ae7dd3c650 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -372,6 +372,7 @@ static char *make_block_name(struct gendisk *disk)
372 char *name; 372 char *name;
373 static char *block_str = "block:"; 373 static char *block_str = "block:";
374 int size; 374 int size;
375 char *s;
375 376
376 size = strlen(block_str) + strlen(disk->disk_name) + 1; 377 size = strlen(block_str) + strlen(disk->disk_name) + 1;
377 name = kmalloc(size, GFP_KERNEL); 378 name = kmalloc(size, GFP_KERNEL);
@@ -379,6 +380,10 @@ static char *make_block_name(struct gendisk *disk)
379 return NULL; 380 return NULL;
380 strcpy(name, block_str); 381 strcpy(name, block_str);
381 strcat(name, disk->disk_name); 382 strcat(name, disk->disk_name);
383 /* ewww... some of these buggers have / in name... */
384 s = strchr(name, '/');
385 if (s)
386 *s = '!';
382 return name; 387 return name;
383} 388}
384 389
diff --git a/fs/pipe.c b/fs/pipe.c
index 7fefb10db8d9..5acd8954aaa0 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -55,7 +55,8 @@ void pipe_wait(struct pipe_inode_info *pipe)
55} 55}
56 56
57static int 57static int
58pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) 58pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
59 int atomic)
59{ 60{
60 unsigned long copy; 61 unsigned long copy;
61 62
@@ -64,8 +65,13 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
64 iov++; 65 iov++;
65 copy = min_t(unsigned long, len, iov->iov_len); 66 copy = min_t(unsigned long, len, iov->iov_len);
66 67
67 if (copy_from_user(to, iov->iov_base, copy)) 68 if (atomic) {
68 return -EFAULT; 69 if (__copy_from_user_inatomic(to, iov->iov_base, copy))
70 return -EFAULT;
71 } else {
72 if (copy_from_user(to, iov->iov_base, copy))
73 return -EFAULT;
74 }
69 to += copy; 75 to += copy;
70 len -= copy; 76 len -= copy;
71 iov->iov_base += copy; 77 iov->iov_base += copy;
@@ -75,7 +81,8 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
75} 81}
76 82
77static int 83static int
78pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) 84pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len,
85 int atomic)
79{ 86{
80 unsigned long copy; 87 unsigned long copy;
81 88
@@ -84,8 +91,13 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
84 iov++; 91 iov++;
85 copy = min_t(unsigned long, len, iov->iov_len); 92 copy = min_t(unsigned long, len, iov->iov_len);
86 93
87 if (copy_to_user(iov->iov_base, from, copy)) 94 if (atomic) {
88 return -EFAULT; 95 if (__copy_to_user_inatomic(iov->iov_base, from, copy))
96 return -EFAULT;
97 } else {
98 if (copy_to_user(iov->iov_base, from, copy))
99 return -EFAULT;
100 }
89 from += copy; 101 from += copy;
90 len -= copy; 102 len -= copy;
91 iov->iov_base += copy; 103 iov->iov_base += copy;
@@ -94,13 +106,52 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
94 return 0; 106 return 0;
95} 107}
96 108
109/*
110 * Attempt to pre-fault in the user memory, so we can use atomic copies.
111 * Returns the number of bytes not faulted in.
112 */
113static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len)
114{
115 while (!iov->iov_len)
116 iov++;
117
118 while (len > 0) {
119 unsigned long this_len;
120
121 this_len = min_t(unsigned long, len, iov->iov_len);
122 if (fault_in_pages_writeable(iov->iov_base, this_len))
123 break;
124
125 len -= this_len;
126 iov++;
127 }
128
129 return len;
130}
131
132/*
133 * Pre-fault in the user memory, so we can use atomic copies.
134 */
135static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len)
136{
137 while (!iov->iov_len)
138 iov++;
139
140 while (len > 0) {
141 unsigned long this_len;
142
143 this_len = min_t(unsigned long, len, iov->iov_len);
144 fault_in_pages_readable(iov->iov_base, this_len);
145 len -= this_len;
146 iov++;
147 }
148}
149
97static void anon_pipe_buf_release(struct pipe_inode_info *pipe, 150static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
98 struct pipe_buffer *buf) 151 struct pipe_buffer *buf)
99{ 152{
100 struct page *page = buf->page; 153 struct page *page = buf->page;
101 154
102 buf->flags &= ~PIPE_BUF_FLAG_STOLEN;
103
104 /* 155 /*
105 * If nobody else uses this page, and we don't already have a 156 * If nobody else uses this page, and we don't already have a
106 * temporary page, let's keep track of it as a one-deep 157 * temporary page, let's keep track of it as a one-deep
@@ -112,38 +163,58 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
112 page_cache_release(page); 163 page_cache_release(page);
113} 164}
114 165
115static void * anon_pipe_buf_map(struct file *file, struct pipe_inode_info *pipe, 166void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
116 struct pipe_buffer *buf) 167 struct pipe_buffer *buf, int atomic)
117{ 168{
169 if (atomic) {
170 buf->flags |= PIPE_BUF_FLAG_ATOMIC;
171 return kmap_atomic(buf->page, KM_USER0);
172 }
173
118 return kmap(buf->page); 174 return kmap(buf->page);
119} 175}
120 176
121static void anon_pipe_buf_unmap(struct pipe_inode_info *pipe, 177void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
122 struct pipe_buffer *buf) 178 struct pipe_buffer *buf, void *map_data)
123{ 179{
124 kunmap(buf->page); 180 if (buf->flags & PIPE_BUF_FLAG_ATOMIC) {
181 buf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
182 kunmap_atomic(map_data, KM_USER0);
183 } else
184 kunmap(buf->page);
125} 185}
126 186
127static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, 187int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
128 struct pipe_buffer *buf) 188 struct pipe_buffer *buf)
129{ 189{
130 buf->flags |= PIPE_BUF_FLAG_STOLEN; 190 struct page *page = buf->page;
131 return 0; 191
192 if (page_count(page) == 1) {
193 lock_page(page);
194 return 0;
195 }
196
197 return 1;
132} 198}
133 199
134static void anon_pipe_buf_get(struct pipe_inode_info *info, 200void generic_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf)
135 struct pipe_buffer *buf)
136{ 201{
137 page_cache_get(buf->page); 202 page_cache_get(buf->page);
138} 203}
139 204
205int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf)
206{
207 return 0;
208}
209
140static struct pipe_buf_operations anon_pipe_buf_ops = { 210static struct pipe_buf_operations anon_pipe_buf_ops = {
141 .can_merge = 1, 211 .can_merge = 1,
142 .map = anon_pipe_buf_map, 212 .map = generic_pipe_buf_map,
143 .unmap = anon_pipe_buf_unmap, 213 .unmap = generic_pipe_buf_unmap,
214 .pin = generic_pipe_buf_pin,
144 .release = anon_pipe_buf_release, 215 .release = anon_pipe_buf_release,
145 .steal = anon_pipe_buf_steal, 216 .steal = generic_pipe_buf_steal,
146 .get = anon_pipe_buf_get, 217 .get = generic_pipe_buf_get,
147}; 218};
148 219
149static ssize_t 220static ssize_t
@@ -174,22 +245,33 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
174 struct pipe_buf_operations *ops = buf->ops; 245 struct pipe_buf_operations *ops = buf->ops;
175 void *addr; 246 void *addr;
176 size_t chars = buf->len; 247 size_t chars = buf->len;
177 int error; 248 int error, atomic;
178 249
179 if (chars > total_len) 250 if (chars > total_len)
180 chars = total_len; 251 chars = total_len;
181 252
182 addr = ops->map(filp, pipe, buf); 253 error = ops->pin(pipe, buf);
183 if (IS_ERR(addr)) { 254 if (error) {
184 if (!ret) 255 if (!ret)
185 ret = PTR_ERR(addr); 256 error = ret;
186 break; 257 break;
187 } 258 }
188 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars); 259
189 ops->unmap(pipe, buf); 260 atomic = !iov_fault_in_pages_write(iov, chars);
261redo:
262 addr = ops->map(pipe, buf, atomic);
263 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic);
264 ops->unmap(pipe, buf, addr);
190 if (unlikely(error)) { 265 if (unlikely(error)) {
266 /*
267 * Just retry with the slow path if we failed.
268 */
269 if (atomic) {
270 atomic = 0;
271 goto redo;
272 }
191 if (!ret) 273 if (!ret)
192 ret = -EFAULT; 274 ret = error;
193 break; 275 break;
194 } 276 }
195 ret += chars; 277 ret += chars;
@@ -293,21 +375,28 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
293 int offset = buf->offset + buf->len; 375 int offset = buf->offset + buf->len;
294 376
295 if (ops->can_merge && offset + chars <= PAGE_SIZE) { 377 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
378 int error, atomic = 1;
296 void *addr; 379 void *addr;
297 int error;
298 380
299 addr = ops->map(filp, pipe, buf); 381 error = ops->pin(pipe, buf);
300 if (IS_ERR(addr)) { 382 if (error)
301 error = PTR_ERR(addr);
302 goto out; 383 goto out;
303 } 384
385 iov_fault_in_pages_read(iov, chars);
386redo1:
387 addr = ops->map(pipe, buf, atomic);
304 error = pipe_iov_copy_from_user(offset + addr, iov, 388 error = pipe_iov_copy_from_user(offset + addr, iov,
305 chars); 389 chars, atomic);
306 ops->unmap(pipe, buf); 390 ops->unmap(pipe, buf, addr);
307 ret = error; 391 ret = error;
308 do_wakeup = 1; 392 do_wakeup = 1;
309 if (error) 393 if (error) {
394 if (atomic) {
395 atomic = 0;
396 goto redo1;
397 }
310 goto out; 398 goto out;
399 }
311 buf->len += chars; 400 buf->len += chars;
312 total_len -= chars; 401 total_len -= chars;
313 ret = chars; 402 ret = chars;
@@ -330,7 +419,8 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
330 int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); 419 int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1);
331 struct pipe_buffer *buf = pipe->bufs + newbuf; 420 struct pipe_buffer *buf = pipe->bufs + newbuf;
332 struct page *page = pipe->tmp_page; 421 struct page *page = pipe->tmp_page;
333 int error; 422 char *src;
423 int error, atomic = 1;
334 424
335 if (!page) { 425 if (!page) {
336 page = alloc_page(GFP_HIGHUSER); 426 page = alloc_page(GFP_HIGHUSER);
@@ -350,11 +440,27 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
350 if (chars > total_len) 440 if (chars > total_len)
351 chars = total_len; 441 chars = total_len;
352 442
353 error = pipe_iov_copy_from_user(kmap(page), iov, chars); 443 iov_fault_in_pages_read(iov, chars);
354 kunmap(page); 444redo2:
445 if (atomic)
446 src = kmap_atomic(page, KM_USER0);
447 else
448 src = kmap(page);
449
450 error = pipe_iov_copy_from_user(src, iov, chars,
451 atomic);
452 if (atomic)
453 kunmap_atomic(src, KM_USER0);
454 else
455 kunmap(page);
456
355 if (unlikely(error)) { 457 if (unlikely(error)) {
458 if (atomic) {
459 atomic = 0;
460 goto redo2;
461 }
356 if (!ret) 462 if (!ret)
357 ret = -EFAULT; 463 ret = error;
358 break; 464 break;
359 } 465 }
360 ret += chars; 466 ret += chars;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a3a3eecef689..6cc77dc3f3ff 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -297,16 +297,20 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm
297 297
298 files = get_files_struct(task); 298 files = get_files_struct(task);
299 if (files) { 299 if (files) {
300 rcu_read_lock(); 300 /*
301 * We are not taking a ref to the file structure, so we must
302 * hold ->file_lock.
303 */
304 spin_lock(&files->file_lock);
301 file = fcheck_files(files, fd); 305 file = fcheck_files(files, fd);
302 if (file) { 306 if (file) {
303 *mnt = mntget(file->f_vfsmnt); 307 *mnt = mntget(file->f_vfsmnt);
304 *dentry = dget(file->f_dentry); 308 *dentry = dget(file->f_dentry);
305 rcu_read_unlock(); 309 spin_unlock(&files->file_lock);
306 put_files_struct(files); 310 put_files_struct(files);
307 return 0; 311 return 0;
308 } 312 }
309 rcu_read_unlock(); 313 spin_unlock(&files->file_lock);
310 put_files_struct(files); 314 put_files_struct(files);
311 } 315 }
312 return -ENOENT; 316 return -ENOENT;
@@ -1523,7 +1527,12 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1523 if (!files) 1527 if (!files)
1524 goto out_unlock; 1528 goto out_unlock;
1525 inode->i_mode = S_IFLNK; 1529 inode->i_mode = S_IFLNK;
1526 rcu_read_lock(); 1530
1531 /*
1532 * We are not taking a ref to the file structure, so we must
1533 * hold ->file_lock.
1534 */
1535 spin_lock(&files->file_lock);
1527 file = fcheck_files(files, fd); 1536 file = fcheck_files(files, fd);
1528 if (!file) 1537 if (!file)
1529 goto out_unlock2; 1538 goto out_unlock2;
@@ -1531,7 +1540,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1531 inode->i_mode |= S_IRUSR | S_IXUSR; 1540 inode->i_mode |= S_IRUSR | S_IXUSR;
1532 if (file->f_mode & 2) 1541 if (file->f_mode & 2)
1533 inode->i_mode |= S_IWUSR | S_IXUSR; 1542 inode->i_mode |= S_IWUSR | S_IXUSR;
1534 rcu_read_unlock(); 1543 spin_unlock(&files->file_lock);
1535 put_files_struct(files); 1544 put_files_struct(files);
1536 inode->i_op = &proc_pid_link_inode_operations; 1545 inode->i_op = &proc_pid_link_inode_operations;
1537 inode->i_size = 64; 1546 inode->i_size = 64;
@@ -1541,7 +1550,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1541 return NULL; 1550 return NULL;
1542 1551
1543out_unlock2: 1552out_unlock2:
1544 rcu_read_unlock(); 1553 spin_unlock(&files->file_lock);
1545 put_files_struct(files); 1554 put_files_struct(files);
1546out_unlock: 1555out_unlock:
1547 iput(inode); 1556 iput(inode);
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 58c418fbca2c..97ae1b92bc47 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -408,8 +408,9 @@ int reiserfs_cache_default_acl(struct inode *inode)
408 acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT); 408 acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT);
409 reiserfs_read_unlock_xattrs(inode->i_sb); 409 reiserfs_read_unlock_xattrs(inode->i_sb);
410 reiserfs_read_unlock_xattr_i(inode); 410 reiserfs_read_unlock_xattr_i(inode);
411 ret = acl ? 1 : 0; 411 ret = (acl && !IS_ERR(acl));
412 posix_acl_release(acl); 412 if (ret)
413 posix_acl_release(acl);
413 } 414 }
414 415
415 return ret; 416 return ret;
diff --git a/fs/splice.c b/fs/splice.c
index 8d57e89924a6..7fb04970c72d 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -27,15 +27,22 @@
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/syscalls.h> 29#include <linux/syscalls.h>
30#include <linux/uio.h>
31
32struct partial_page {
33 unsigned int offset;
34 unsigned int len;
35};
30 36
31/* 37/*
32 * Passed to the actors 38 * Passed to splice_to_pipe
33 */ 39 */
34struct splice_desc { 40struct splice_pipe_desc {
35 unsigned int len, total_len; /* current and remaining length */ 41 struct page **pages; /* page map */
42 struct partial_page *partial; /* pages[] may not be contig */
43 int nr_pages; /* number of pages in map */
36 unsigned int flags; /* splice flags */ 44 unsigned int flags; /* splice flags */
37 struct file *file; /* file to read/write */ 45 struct pipe_buf_operations *ops;/* ops associated with output pipe */
38 loff_t pos; /* file position */
39}; 46};
40 47
41/* 48/*
@@ -50,7 +57,8 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info,
50 struct page *page = buf->page; 57 struct page *page = buf->page;
51 struct address_space *mapping = page_mapping(page); 58 struct address_space *mapping = page_mapping(page);
52 59
53 WARN_ON(!PageLocked(page)); 60 lock_page(page);
61
54 WARN_ON(!PageUptodate(page)); 62 WARN_ON(!PageUptodate(page));
55 63
56 /* 64 /*
@@ -65,10 +73,11 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info,
65 if (PagePrivate(page)) 73 if (PagePrivate(page))
66 try_to_release_page(page, mapping_gfp_mask(mapping)); 74 try_to_release_page(page, mapping_gfp_mask(mapping));
67 75
68 if (!remove_mapping(mapping, page)) 76 if (!remove_mapping(mapping, page)) {
77 unlock_page(page);
69 return 1; 78 return 1;
79 }
70 80
71 buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU;
72 return 0; 81 return 0;
73} 82}
74 83
@@ -76,13 +85,10 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *info,
76 struct pipe_buffer *buf) 85 struct pipe_buffer *buf)
77{ 86{
78 page_cache_release(buf->page); 87 page_cache_release(buf->page);
79 buf->page = NULL;
80 buf->flags &= ~(PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU);
81} 88}
82 89
83static void *page_cache_pipe_buf_map(struct file *file, 90static int page_cache_pipe_buf_pin(struct pipe_inode_info *info,
84 struct pipe_inode_info *info, 91 struct pipe_buffer *buf)
85 struct pipe_buffer *buf)
86{ 92{
87 struct page *page = buf->page; 93 struct page *page = buf->page;
88 int err; 94 int err;
@@ -108,51 +114,58 @@ static void *page_cache_pipe_buf_map(struct file *file,
108 } 114 }
109 115
110 /* 116 /*
111 * Page is ok afterall, fall through to mapping. 117 * Page is ok afterall, we are done.
112 */ 118 */
113 unlock_page(page); 119 unlock_page(page);
114 } 120 }
115 121
116 return kmap(page); 122 return 0;
117error: 123error:
118 unlock_page(page); 124 unlock_page(page);
119 return ERR_PTR(err); 125 return err;
120} 126}
121 127
122static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, 128static struct pipe_buf_operations page_cache_pipe_buf_ops = {
123 struct pipe_buffer *buf) 129 .can_merge = 0,
124{ 130 .map = generic_pipe_buf_map,
125 kunmap(buf->page); 131 .unmap = generic_pipe_buf_unmap,
126} 132 .pin = page_cache_pipe_buf_pin,
133 .release = page_cache_pipe_buf_release,
134 .steal = page_cache_pipe_buf_steal,
135 .get = generic_pipe_buf_get,
136};
127 137
128static void page_cache_pipe_buf_get(struct pipe_inode_info *info, 138static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
129 struct pipe_buffer *buf) 139 struct pipe_buffer *buf)
130{ 140{
131 page_cache_get(buf->page); 141 if (!(buf->flags & PIPE_BUF_FLAG_GIFT))
142 return 1;
143
144 return generic_pipe_buf_steal(pipe, buf);
132} 145}
133 146
134static struct pipe_buf_operations page_cache_pipe_buf_ops = { 147static struct pipe_buf_operations user_page_pipe_buf_ops = {
135 .can_merge = 0, 148 .can_merge = 0,
136 .map = page_cache_pipe_buf_map, 149 .map = generic_pipe_buf_map,
137 .unmap = page_cache_pipe_buf_unmap, 150 .unmap = generic_pipe_buf_unmap,
151 .pin = generic_pipe_buf_pin,
138 .release = page_cache_pipe_buf_release, 152 .release = page_cache_pipe_buf_release,
139 .steal = page_cache_pipe_buf_steal, 153 .steal = user_page_pipe_buf_steal,
140 .get = page_cache_pipe_buf_get, 154 .get = generic_pipe_buf_get,
141}; 155};
142 156
143/* 157/*
144 * Pipe output worker. This sets up our pipe format with the page cache 158 * Pipe output worker. This sets up our pipe format with the page cache
145 * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). 159 * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
146 */ 160 */
147static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, 161static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
148 int nr_pages, unsigned long offset, 162 struct splice_pipe_desc *spd)
149 unsigned long len, unsigned int flags)
150{ 163{
151 int ret, do_wakeup, i; 164 int ret, do_wakeup, page_nr;
152 165
153 ret = 0; 166 ret = 0;
154 do_wakeup = 0; 167 do_wakeup = 0;
155 i = 0; 168 page_nr = 0;
156 169
157 if (pipe->inode) 170 if (pipe->inode)
158 mutex_lock(&pipe->inode->i_mutex); 171 mutex_lock(&pipe->inode->i_mutex);
@@ -168,27 +181,22 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
168 if (pipe->nrbufs < PIPE_BUFFERS) { 181 if (pipe->nrbufs < PIPE_BUFFERS) {
169 int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1); 182 int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1);
170 struct pipe_buffer *buf = pipe->bufs + newbuf; 183 struct pipe_buffer *buf = pipe->bufs + newbuf;
171 struct page *page = pages[i++];
172 unsigned long this_len;
173 184
174 this_len = PAGE_CACHE_SIZE - offset; 185 buf->page = spd->pages[page_nr];
175 if (this_len > len) 186 buf->offset = spd->partial[page_nr].offset;
176 this_len = len; 187 buf->len = spd->partial[page_nr].len;
188 buf->ops = spd->ops;
189 if (spd->flags & SPLICE_F_GIFT)
190 buf->flags |= PIPE_BUF_FLAG_GIFT;
177 191
178 buf->page = page;
179 buf->offset = offset;
180 buf->len = this_len;
181 buf->ops = &page_cache_pipe_buf_ops;
182 pipe->nrbufs++; 192 pipe->nrbufs++;
193 page_nr++;
194 ret += buf->len;
195
183 if (pipe->inode) 196 if (pipe->inode)
184 do_wakeup = 1; 197 do_wakeup = 1;
185 198
186 ret += this_len; 199 if (!--spd->nr_pages)
187 len -= this_len;
188 offset = 0;
189 if (!--nr_pages)
190 break;
191 if (!len)
192 break; 200 break;
193 if (pipe->nrbufs < PIPE_BUFFERS) 201 if (pipe->nrbufs < PIPE_BUFFERS)
194 continue; 202 continue;
@@ -196,7 +204,7 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
196 break; 204 break;
197 } 205 }
198 206
199 if (flags & SPLICE_F_NONBLOCK) { 207 if (spd->flags & SPLICE_F_NONBLOCK) {
200 if (!ret) 208 if (!ret)
201 ret = -EAGAIN; 209 ret = -EAGAIN;
202 break; 210 break;
@@ -231,8 +239,8 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
231 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 239 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
232 } 240 }
233 241
234 while (i < nr_pages) 242 while (page_nr < spd->nr_pages)
235 page_cache_release(pages[i++]); 243 page_cache_release(spd->pages[page_nr++]);
236 244
237 return ret; 245 return ret;
238} 246}
@@ -243,15 +251,24 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
243 unsigned int flags) 251 unsigned int flags)
244{ 252{
245 struct address_space *mapping = in->f_mapping; 253 struct address_space *mapping = in->f_mapping;
246 unsigned int offset, nr_pages; 254 unsigned int loff, nr_pages;
247 struct page *pages[PIPE_BUFFERS]; 255 struct page *pages[PIPE_BUFFERS];
256 struct partial_page partial[PIPE_BUFFERS];
248 struct page *page; 257 struct page *page;
249 pgoff_t index; 258 pgoff_t index, end_index;
250 int i, error; 259 loff_t isize;
260 size_t total_len;
261 int error, page_nr;
262 struct splice_pipe_desc spd = {
263 .pages = pages,
264 .partial = partial,
265 .flags = flags,
266 .ops = &page_cache_pipe_buf_ops,
267 };
251 268
252 index = *ppos >> PAGE_CACHE_SHIFT; 269 index = *ppos >> PAGE_CACHE_SHIFT;
253 offset = *ppos & ~PAGE_CACHE_MASK; 270 loff = *ppos & ~PAGE_CACHE_MASK;
254 nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 271 nr_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
255 272
256 if (nr_pages > PIPE_BUFFERS) 273 if (nr_pages > PIPE_BUFFERS)
257 nr_pages = PIPE_BUFFERS; 274 nr_pages = PIPE_BUFFERS;
@@ -261,49 +278,92 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
261 * read-ahead if this is a non-zero offset (we are likely doing small 278 * read-ahead if this is a non-zero offset (we are likely doing small
262 * chunk splice and the page is already there) for a single page. 279 * chunk splice and the page is already there) for a single page.
263 */ 280 */
264 if (!offset || nr_pages > 1) 281 if (!loff || nr_pages > 1)
265 do_page_cache_readahead(mapping, in, index, nr_pages); 282 page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages);
266 283
267 /* 284 /*
268 * Now fill in the holes: 285 * Now fill in the holes:
269 */ 286 */
270 error = 0; 287 error = 0;
271 for (i = 0; i < nr_pages; i++, index++) { 288 total_len = 0;
272find_page: 289
290 /*
291 * Lookup the (hopefully) full range of pages we need.
292 */
293 spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages);
294
295 /*
296 * If find_get_pages_contig() returned fewer pages than we needed,
297 * allocate the rest.
298 */
299 index += spd.nr_pages;
300 while (spd.nr_pages < nr_pages) {
273 /* 301 /*
274 * lookup the page for this index 302 * Page could be there, find_get_pages_contig() breaks on
303 * the first hole.
275 */ 304 */
276 page = find_get_page(mapping, index); 305 page = find_get_page(mapping, index);
277 if (!page) { 306 if (!page) {
278 /* 307 /*
279 * If in nonblock mode then dont block on 308 * Make sure the read-ahead engine is notified
280 * readpage (we've kicked readahead so there 309 * about this failure.
281 * will be asynchronous progress):
282 */ 310 */
283 if (flags & SPLICE_F_NONBLOCK) 311 handle_ra_miss(mapping, &in->f_ra, index);
284 break;
285 312
286 /* 313 /*
287 * page didn't exist, allocate one 314 * page didn't exist, allocate one.
288 */ 315 */
289 page = page_cache_alloc_cold(mapping); 316 page = page_cache_alloc_cold(mapping);
290 if (!page) 317 if (!page)
291 break; 318 break;
292 319
293 error = add_to_page_cache_lru(page, mapping, index, 320 error = add_to_page_cache_lru(page, mapping, index,
294 mapping_gfp_mask(mapping)); 321 mapping_gfp_mask(mapping));
295 if (unlikely(error)) { 322 if (unlikely(error)) {
296 page_cache_release(page); 323 page_cache_release(page);
297 break; 324 break;
298 } 325 }
299 326 /*
300 goto readpage; 327 * add_to_page_cache() locks the page, unlock it
328 * to avoid convoluting the logic below even more.
329 */
330 unlock_page(page);
301 } 331 }
302 332
333 pages[spd.nr_pages++] = page;
334 index++;
335 }
336
337 /*
338 * Now loop over the map and see if we need to start IO on any
339 * pages, fill in the partial map, etc.
340 */
341 index = *ppos >> PAGE_CACHE_SHIFT;
342 nr_pages = spd.nr_pages;
343 spd.nr_pages = 0;
344 for (page_nr = 0; page_nr < nr_pages; page_nr++) {
345 unsigned int this_len;
346
347 if (!len)
348 break;
349
350 /*
351 * this_len is the max we'll use from this page
352 */
353 this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
354 page = pages[page_nr];
355
303 /* 356 /*
304 * If the page isn't uptodate, we may need to start io on it 357 * If the page isn't uptodate, we may need to start io on it
305 */ 358 */
306 if (!PageUptodate(page)) { 359 if (!PageUptodate(page)) {
360 /*
361 * If in nonblock mode then dont block on waiting
362 * for an in-flight io page
363 */
364 if (flags & SPLICE_F_NONBLOCK)
365 break;
366
307 lock_page(page); 367 lock_page(page);
308 368
309 /* 369 /*
@@ -313,7 +373,6 @@ find_page:
313 */ 373 */
314 if (!page->mapping) { 374 if (!page->mapping) {
315 unlock_page(page); 375 unlock_page(page);
316 page_cache_release(page);
317 break; 376 break;
318 } 377 }
319 /* 378 /*
@@ -324,25 +383,66 @@ find_page:
324 goto fill_it; 383 goto fill_it;
325 } 384 }
326 385
327readpage:
328 /* 386 /*
329 * need to read in the page 387 * need to read in the page
330 */ 388 */
331 error = mapping->a_ops->readpage(in, page); 389 error = mapping->a_ops->readpage(in, page);
332
333 if (unlikely(error)) { 390 if (unlikely(error)) {
334 page_cache_release(page); 391 /*
392 * We really should re-lookup the page here,
393 * but it complicates things a lot. Instead
394 * lets just do what we already stored, and
395 * we'll get it the next time we are called.
396 */
335 if (error == AOP_TRUNCATED_PAGE) 397 if (error == AOP_TRUNCATED_PAGE)
336 goto find_page; 398 error = 0;
399
337 break; 400 break;
338 } 401 }
402
403 /*
404 * i_size must be checked after ->readpage().
405 */
406 isize = i_size_read(mapping->host);
407 end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
408 if (unlikely(!isize || index > end_index))
409 break;
410
411 /*
412 * if this is the last page, see if we need to shrink
413 * the length and stop
414 */
415 if (end_index == index) {
416 loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK);
417 if (total_len + loff > isize)
418 break;
419 /*
420 * force quit after adding this page
421 */
422 len = this_len;
423 this_len = min(this_len, loff);
424 loff = 0;
425 }
339 } 426 }
340fill_it: 427fill_it:
341 pages[i] = page; 428 partial[page_nr].offset = loff;
429 partial[page_nr].len = this_len;
430 len -= this_len;
431 total_len += this_len;
432 loff = 0;
433 spd.nr_pages++;
434 index++;
342 } 435 }
343 436
344 if (i) 437 /*
345 return move_to_pipe(pipe, pages, i, offset, len, flags); 438 * Release any pages at the end, if we quit early. 'i' is how far
439 * we got, 'nr_pages' is how many pages are in the map.
440 */
441 while (page_nr < nr_pages)
442 page_cache_release(pages[page_nr++]);
443
444 if (spd.nr_pages)
445 return splice_to_pipe(pipe, &spd);
346 446
347 return error; 447 return error;
348} 448}
@@ -369,17 +469,20 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
369 while (len) { 469 while (len) {
370 ret = __generic_file_splice_read(in, ppos, pipe, len, flags); 470 ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
371 471
372 if (ret <= 0) 472 if (ret < 0)
373 break; 473 break;
474 else if (!ret) {
475 if (spliced)
476 break;
477 if (flags & SPLICE_F_NONBLOCK) {
478 ret = -EAGAIN;
479 break;
480 }
481 }
374 482
375 *ppos += ret; 483 *ppos += ret;
376 len -= ret; 484 len -= ret;
377 spliced += ret; 485 spliced += ret;
378
379 if (!(flags & SPLICE_F_NONBLOCK))
380 continue;
381 ret = -EAGAIN;
382 break;
383 } 486 }
384 487
385 if (spliced) 488 if (spliced)
@@ -392,38 +495,24 @@ EXPORT_SYMBOL(generic_file_splice_read);
392 495
393/* 496/*
394 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' 497 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
395 * using sendpage(). 498 * using sendpage(). Return the number of bytes sent.
396 */ 499 */
397static int pipe_to_sendpage(struct pipe_inode_info *info, 500static int pipe_to_sendpage(struct pipe_inode_info *info,
398 struct pipe_buffer *buf, struct splice_desc *sd) 501 struct pipe_buffer *buf, struct splice_desc *sd)
399{ 502{
400 struct file *file = sd->file; 503 struct file *file = sd->file;
401 loff_t pos = sd->pos; 504 loff_t pos = sd->pos;
402 unsigned int offset; 505 int ret, more;
403 ssize_t ret;
404 void *ptr;
405 int more;
406
407 /*
408 * Sub-optimal, but we are limited by the pipe ->map. We don't
409 * need a kmap'ed buffer here, we just want to make sure we
410 * have the page pinned if the pipe page originates from the
411 * page cache.
412 */
413 ptr = buf->ops->map(file, info, buf);
414 if (IS_ERR(ptr))
415 return PTR_ERR(ptr);
416 506
417 offset = pos & ~PAGE_CACHE_MASK; 507 ret = buf->ops->pin(info, buf);
418 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; 508 if (!ret) {
509 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
419 510
420 ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,more); 511 ret = file->f_op->sendpage(file, buf->page, buf->offset,
421 512 sd->len, &pos, more);
422 buf->ops->unmap(info, buf); 513 }
423 if (ret == sd->len)
424 return 0;
425 514
426 return -EIO; 515 return ret;
427} 516}
428 517
429/* 518/*
@@ -452,56 +541,88 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
452 struct file *file = sd->file; 541 struct file *file = sd->file;
453 struct address_space *mapping = file->f_mapping; 542 struct address_space *mapping = file->f_mapping;
454 gfp_t gfp_mask = mapping_gfp_mask(mapping); 543 gfp_t gfp_mask = mapping_gfp_mask(mapping);
455 unsigned int offset; 544 unsigned int offset, this_len;
456 struct page *page; 545 struct page *page;
457 pgoff_t index; 546 pgoff_t index;
458 char *src;
459 int ret; 547 int ret;
460 548
461 /* 549 /*
462 * make sure the data in this buffer is uptodate 550 * make sure the data in this buffer is uptodate
463 */ 551 */
464 src = buf->ops->map(file, info, buf); 552 ret = buf->ops->pin(info, buf);
465 if (IS_ERR(src)) 553 if (unlikely(ret))
466 return PTR_ERR(src); 554 return ret;
467 555
468 index = sd->pos >> PAGE_CACHE_SHIFT; 556 index = sd->pos >> PAGE_CACHE_SHIFT;
469 offset = sd->pos & ~PAGE_CACHE_MASK; 557 offset = sd->pos & ~PAGE_CACHE_MASK;
470 558
559 this_len = sd->len;
560 if (this_len + offset > PAGE_CACHE_SIZE)
561 this_len = PAGE_CACHE_SIZE - offset;
562
471 /* 563 /*
472 * Reuse buf page, if SPLICE_F_MOVE is set. 564 * Reuse buf page, if SPLICE_F_MOVE is set and we are doing a full
565 * page.
473 */ 566 */
474 if (sd->flags & SPLICE_F_MOVE) { 567 if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) {
475 /* 568 /*
476 * If steal succeeds, buf->page is now pruned from the vm 569 * If steal succeeds, buf->page is now pruned from the vm
477 * side (LRU and page cache) and we can reuse it. 570 * side (page cache) and we can reuse it. The page will also
571 * be locked on successful return.
478 */ 572 */
479 if (buf->ops->steal(info, buf)) 573 if (buf->ops->steal(info, buf))
480 goto find_page; 574 goto find_page;
481 575
576 page = buf->page;
577 page_cache_get(page);
578
482 /* 579 /*
483 * this will also set the page locked 580 * page must be on the LRU for adding to the pagecache.
581 * Check this without grabbing the zone lock, if it isn't
582 * the do grab the zone lock, recheck, and add if necessary.
484 */ 583 */
485 page = buf->page; 584 if (!PageLRU(page)) {
486 if (add_to_page_cache(page, mapping, index, gfp_mask)) 585 struct zone *zone = page_zone(page);
487 goto find_page; 586
587 spin_lock_irq(&zone->lru_lock);
588 if (!PageLRU(page)) {
589 SetPageLRU(page);
590 add_page_to_inactive_list(zone, page);
591 }
592 spin_unlock_irq(&zone->lru_lock);
593 }
488 594
489 if (!(buf->flags & PIPE_BUF_FLAG_LRU)) 595 if (add_to_page_cache(page, mapping, index, gfp_mask)) {
490 lru_cache_add(page); 596 page_cache_release(page);
597 unlock_page(page);
598 goto find_page;
599 }
491 } else { 600 } else {
492find_page: 601find_page:
493 ret = -ENOMEM; 602 page = find_lock_page(mapping, index);
494 page = find_or_create_page(mapping, index, gfp_mask); 603 if (!page) {
495 if (!page) 604 ret = -ENOMEM;
496 goto out_nomem; 605 page = page_cache_alloc_cold(mapping);
606 if (unlikely(!page))
607 goto out_nomem;
608
609 /*
610 * This will also lock the page
611 */
612 ret = add_to_page_cache_lru(page, mapping, index,
613 gfp_mask);
614 if (unlikely(ret))
615 goto out;
616 }
497 617
498 /* 618 /*
499 * If the page is uptodate, it is also locked. If it isn't 619 * We get here with the page locked. If the page is also
500 * uptodate, we can mark it uptodate if we are filling the 620 * uptodate, we don't need to do more. If it isn't, we
501 * full page. Otherwise we need to read it in first... 621 * may need to bring it in if we are not going to overwrite
622 * the full page.
502 */ 623 */
503 if (!PageUptodate(page)) { 624 if (!PageUptodate(page)) {
504 if (sd->len < PAGE_CACHE_SIZE) { 625 if (this_len < PAGE_CACHE_SIZE) {
505 ret = mapping->a_ops->readpage(file, page); 626 ret = mapping->a_ops->readpage(file, page);
506 if (unlikely(ret)) 627 if (unlikely(ret))
507 goto out; 628 goto out;
@@ -520,58 +641,59 @@ find_page:
520 ret = -EIO; 641 ret = -EIO;
521 goto out; 642 goto out;
522 } 643 }
523 } else { 644 } else
524 WARN_ON(!PageLocked(page));
525 SetPageUptodate(page); 645 SetPageUptodate(page);
526 }
527 } 646 }
528 } 647 }
529 648
530 ret = mapping->a_ops->prepare_write(file, page, 0, sd->len); 649 ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len);
531 if (ret == AOP_TRUNCATED_PAGE) { 650 if (ret == AOP_TRUNCATED_PAGE) {
532 page_cache_release(page); 651 page_cache_release(page);
533 goto find_page; 652 goto find_page;
534 } else if (ret) 653 } else if (ret)
535 goto out; 654 goto out;
536 655
537 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 656 if (buf->page != page) {
538 char *dst = kmap_atomic(page, KM_USER0); 657 /*
658 * Careful, ->map() uses KM_USER0!
659 */
660 char *src = buf->ops->map(info, buf, 1);
661 char *dst = kmap_atomic(page, KM_USER1);
539 662
540 memcpy(dst + offset, src + buf->offset, sd->len); 663 memcpy(dst + offset, src + buf->offset, this_len);
541 flush_dcache_page(page); 664 flush_dcache_page(page);
542 kunmap_atomic(dst, KM_USER0); 665 kunmap_atomic(dst, KM_USER1);
666 buf->ops->unmap(info, buf, src);
543 } 667 }
544 668
545 ret = mapping->a_ops->commit_write(file, page, 0, sd->len); 669 ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);
546 if (ret == AOP_TRUNCATED_PAGE) { 670 if (!ret) {
671 /*
672 * Return the number of bytes written and mark page as
673 * accessed, we are now done!
674 */
675 ret = this_len;
676 mark_page_accessed(page);
677 balance_dirty_pages_ratelimited(mapping);
678 } else if (ret == AOP_TRUNCATED_PAGE) {
547 page_cache_release(page); 679 page_cache_release(page);
548 goto find_page; 680 goto find_page;
549 } else if (ret)
550 goto out;
551
552 mark_page_accessed(page);
553 balance_dirty_pages_ratelimited(mapping);
554out:
555 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) {
556 page_cache_release(page);
557 unlock_page(page);
558 } 681 }
682out:
683 page_cache_release(page);
684 unlock_page(page);
559out_nomem: 685out_nomem:
560 buf->ops->unmap(info, buf);
561 return ret; 686 return ret;
562} 687}
563 688
564typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
565 struct splice_desc *);
566
567/* 689/*
568 * Pipe input worker. Most of this logic works like a regular pipe, the 690 * Pipe input worker. Most of this logic works like a regular pipe, the
569 * key here is the 'actor' worker passed in that actually moves the data 691 * key here is the 'actor' worker passed in that actually moves the data
570 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. 692 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
571 */ 693 */
572static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out, 694ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
573 loff_t *ppos, size_t len, unsigned int flags, 695 loff_t *ppos, size_t len, unsigned int flags,
574 splice_actor *actor) 696 splice_actor *actor)
575{ 697{
576 int ret, do_wakeup, err; 698 int ret, do_wakeup, err;
577 struct splice_desc sd; 699 struct splice_desc sd;
@@ -597,16 +719,22 @@ static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
597 sd.len = sd.total_len; 719 sd.len = sd.total_len;
598 720
599 err = actor(pipe, buf, &sd); 721 err = actor(pipe, buf, &sd);
600 if (err) { 722 if (err <= 0) {
601 if (!ret && err != -ENODATA) 723 if (!ret && err != -ENODATA)
602 ret = err; 724 ret = err;
603 725
604 break; 726 break;
605 } 727 }
606 728
607 ret += sd.len; 729 ret += err;
608 buf->offset += sd.len; 730 buf->offset += err;
609 buf->len -= sd.len; 731 buf->len -= err;
732
733 sd.len -= err;
734 sd.pos += err;
735 sd.total_len -= err;
736 if (sd.len)
737 continue;
610 738
611 if (!buf->len) { 739 if (!buf->len) {
612 buf->ops = NULL; 740 buf->ops = NULL;
@@ -617,8 +745,6 @@ static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
617 do_wakeup = 1; 745 do_wakeup = 1;
618 } 746 }
619 747
620 sd.pos += sd.len;
621 sd.total_len -= sd.len;
622 if (!sd.total_len) 748 if (!sd.total_len)
623 break; 749 break;
624 } 750 }
@@ -686,23 +812,27 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
686 struct address_space *mapping = out->f_mapping; 812 struct address_space *mapping = out->f_mapping;
687 ssize_t ret; 813 ssize_t ret;
688 814
689 ret = move_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); 815 ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
690 816 if (ret > 0) {
691 /*
692 * If file or inode is SYNC and we actually wrote some data, sync it.
693 */
694 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host))
695 && ret > 0) {
696 struct inode *inode = mapping->host; 817 struct inode *inode = mapping->host;
697 int err;
698 818
699 mutex_lock(&inode->i_mutex); 819 *ppos += ret;
700 err = generic_osync_inode(mapping->host, mapping,
701 OSYNC_METADATA|OSYNC_DATA);
702 mutex_unlock(&inode->i_mutex);
703 820
704 if (err) 821 /*
705 ret = err; 822 * If file or inode is SYNC and we actually wrote some data,
823 * sync it.
824 */
825 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
826 int err;
827
828 mutex_lock(&inode->i_mutex);
829 err = generic_osync_inode(inode, mapping,
830 OSYNC_METADATA|OSYNC_DATA);
831 mutex_unlock(&inode->i_mutex);
832
833 if (err)
834 ret = err;
835 }
706 } 836 }
707 837
708 return ret; 838 return ret;
@@ -724,7 +854,7 @@ EXPORT_SYMBOL(generic_file_splice_write);
724ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, 854ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
725 loff_t *ppos, size_t len, unsigned int flags) 855 loff_t *ppos, size_t len, unsigned int flags)
726{ 856{
727 return move_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage); 857 return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage);
728} 858}
729 859
730EXPORT_SYMBOL(generic_splice_sendpage); 860EXPORT_SYMBOL(generic_splice_sendpage);
@@ -811,7 +941,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
811 941
812 /* 942 /*
813 * We don't have an immediate reader, but we'll read the stuff 943 * We don't have an immediate reader, but we'll read the stuff
814 * out of the pipe right after the move_to_pipe(). So set 944 * out of the pipe right after the splice_to_pipe(). So set
815 * PIPE_READERS appropriately. 945 * PIPE_READERS appropriately.
816 */ 946 */
817 pipe->readers = 1; 947 pipe->readers = 1;
@@ -904,6 +1034,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
904{ 1034{
905 struct pipe_inode_info *pipe; 1035 struct pipe_inode_info *pipe;
906 loff_t offset, *off; 1036 loff_t offset, *off;
1037 long ret;
907 1038
908 pipe = in->f_dentry->d_inode->i_pipe; 1039 pipe = in->f_dentry->d_inode->i_pipe;
909 if (pipe) { 1040 if (pipe) {
@@ -918,7 +1049,12 @@ static long do_splice(struct file *in, loff_t __user *off_in,
918 } else 1049 } else
919 off = &out->f_pos; 1050 off = &out->f_pos;
920 1051
921 return do_splice_from(pipe, out, off, len, flags); 1052 ret = do_splice_from(pipe, out, off, len, flags);
1053
1054 if (off_out && copy_to_user(off_out, off, sizeof(loff_t)))
1055 ret = -EFAULT;
1056
1057 return ret;
922 } 1058 }
923 1059
924 pipe = out->f_dentry->d_inode->i_pipe; 1060 pipe = out->f_dentry->d_inode->i_pipe;
@@ -934,12 +1070,195 @@ static long do_splice(struct file *in, loff_t __user *off_in,
934 } else 1070 } else
935 off = &in->f_pos; 1071 off = &in->f_pos;
936 1072
937 return do_splice_to(in, off, pipe, len, flags); 1073 ret = do_splice_to(in, off, pipe, len, flags);
1074
1075 if (off_in && copy_to_user(off_in, off, sizeof(loff_t)))
1076 ret = -EFAULT;
1077
1078 return ret;
938 } 1079 }
939 1080
940 return -EINVAL; 1081 return -EINVAL;
941} 1082}
942 1083
1084/*
1085 * Map an iov into an array of pages and offset/length tupples. With the
1086 * partial_page structure, we can map several non-contiguous ranges into
1087 * our ones pages[] map instead of splitting that operation into pieces.
1088 * Could easily be exported as a generic helper for other users, in which
1089 * case one would probably want to add a 'max_nr_pages' parameter as well.
1090 */
1091static int get_iovec_page_array(const struct iovec __user *iov,
1092 unsigned int nr_vecs, struct page **pages,
1093 struct partial_page *partial, int aligned)
1094{
1095 int buffers = 0, error = 0;
1096
1097 /*
1098 * It's ok to take the mmap_sem for reading, even
1099 * across a "get_user()".
1100 */
1101 down_read(&current->mm->mmap_sem);
1102
1103 while (nr_vecs) {
1104 unsigned long off, npages;
1105 void __user *base;
1106 size_t len;
1107 int i;
1108
1109 /*
1110 * Get user address base and length for this iovec.
1111 */
1112 error = get_user(base, &iov->iov_base);
1113 if (unlikely(error))
1114 break;
1115 error = get_user(len, &iov->iov_len);
1116 if (unlikely(error))
1117 break;
1118
1119 /*
1120 * Sanity check this iovec. 0 read succeeds.
1121 */
1122 if (unlikely(!len))
1123 break;
1124 error = -EFAULT;
1125 if (unlikely(!base))
1126 break;
1127
1128 /*
1129 * Get this base offset and number of pages, then map
1130 * in the user pages.
1131 */
1132 off = (unsigned long) base & ~PAGE_MASK;
1133
1134 /*
1135 * If asked for alignment, the offset must be zero and the
1136 * length a multiple of the PAGE_SIZE.
1137 */
1138 error = -EINVAL;
1139 if (aligned && (off || len & ~PAGE_MASK))
1140 break;
1141
1142 npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1143 if (npages > PIPE_BUFFERS - buffers)
1144 npages = PIPE_BUFFERS - buffers;
1145
1146 error = get_user_pages(current, current->mm,
1147 (unsigned long) base, npages, 0, 0,
1148 &pages[buffers], NULL);
1149
1150 if (unlikely(error <= 0))
1151 break;
1152
1153 /*
1154 * Fill this contiguous range into the partial page map.
1155 */
1156 for (i = 0; i < error; i++) {
1157 const int plen = min_t(size_t, len, PAGE_SIZE - off);
1158
1159 partial[buffers].offset = off;
1160 partial[buffers].len = plen;
1161
1162 off = 0;
1163 len -= plen;
1164 buffers++;
1165 }
1166
1167 /*
1168 * We didn't complete this iov, stop here since it probably
1169 * means we have to move some of this into a pipe to
1170 * be able to continue.
1171 */
1172 if (len)
1173 break;
1174
1175 /*
1176 * Don't continue if we mapped fewer pages than we asked for,
1177 * or if we mapped the max number of pages that we have
1178 * room for.
1179 */
1180 if (error < npages || buffers == PIPE_BUFFERS)
1181 break;
1182
1183 nr_vecs--;
1184 iov++;
1185 }
1186
1187 up_read(&current->mm->mmap_sem);
1188
1189 if (buffers)
1190 return buffers;
1191
1192 return error;
1193}
1194
1195/*
1196 * vmsplice splices a user address range into a pipe. It can be thought of
1197 * as splice-from-memory, where the regular splice is splice-from-file (or
1198 * to file). In both cases the output is a pipe, naturally.
1199 *
1200 * Note that vmsplice only supports splicing _from_ user memory to a pipe,
1201 * not the other way around. Splicing from user memory is a simple operation
1202 * that can be supported without any funky alignment restrictions or nasty
1203 * vm tricks. We simply map in the user memory and fill them into a pipe.
1204 * The reverse isn't quite as easy, though. There are two possible solutions
1205 * for that:
1206 *
1207 * - memcpy() the data internally, at which point we might as well just
1208 * do a regular read() on the buffer anyway.
1209 * - Lots of nasty vm tricks, that are neither fast nor flexible (it
1210 * has restriction limitations on both ends of the pipe).
1211 *
1212 * Alas, it isn't here.
1213 *
1214 */
1215static long do_vmsplice(struct file *file, const struct iovec __user *iov,
1216 unsigned long nr_segs, unsigned int flags)
1217{
1218 struct pipe_inode_info *pipe = file->f_dentry->d_inode->i_pipe;
1219 struct page *pages[PIPE_BUFFERS];
1220 struct partial_page partial[PIPE_BUFFERS];
1221 struct splice_pipe_desc spd = {
1222 .pages = pages,
1223 .partial = partial,
1224 .flags = flags,
1225 .ops = &user_page_pipe_buf_ops,
1226 };
1227
1228 if (unlikely(!pipe))
1229 return -EBADF;
1230 if (unlikely(nr_segs > UIO_MAXIOV))
1231 return -EINVAL;
1232 else if (unlikely(!nr_segs))
1233 return 0;
1234
1235 spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
1236 flags & SPLICE_F_GIFT);
1237 if (spd.nr_pages <= 0)
1238 return spd.nr_pages;
1239
1240 return splice_to_pipe(pipe, &spd);
1241}
1242
1243asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
1244 unsigned long nr_segs, unsigned int flags)
1245{
1246 struct file *file;
1247 long error;
1248 int fput;
1249
1250 error = -EBADF;
1251 file = fget_light(fd, &fput);
1252 if (file) {
1253 if (file->f_mode & FMODE_WRITE)
1254 error = do_vmsplice(file, iov, nr_segs, flags);
1255
1256 fput_light(file, fput);
1257 }
1258
1259 return error;
1260}
1261
943asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, 1262asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
944 int fd_out, loff_t __user *off_out, 1263 int fd_out, loff_t __user *off_out,
945 size_t len, unsigned int flags) 1264 size_t len, unsigned int flags)
@@ -979,7 +1298,9 @@ static int link_pipe(struct pipe_inode_info *ipipe,
979 size_t len, unsigned int flags) 1298 size_t len, unsigned int flags)
980{ 1299{
981 struct pipe_buffer *ibuf, *obuf; 1300 struct pipe_buffer *ibuf, *obuf;
982 int ret = 0, do_wakeup = 0, i; 1301 int ret, do_wakeup, i, ipipe_first;
1302
1303 ret = do_wakeup = ipipe_first = 0;
983 1304
984 /* 1305 /*
985 * Potential ABBA deadlock, work around it by ordering lock 1306 * Potential ABBA deadlock, work around it by ordering lock
@@ -987,6 +1308,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
987 * could deadlock (one doing tee from A -> B, the other from B -> A). 1308 * could deadlock (one doing tee from A -> B, the other from B -> A).
988 */ 1309 */
989 if (ipipe->inode < opipe->inode) { 1310 if (ipipe->inode < opipe->inode) {
1311 ipipe_first = 1;
990 mutex_lock(&ipipe->inode->i_mutex); 1312 mutex_lock(&ipipe->inode->i_mutex);
991 mutex_lock(&opipe->inode->i_mutex); 1313 mutex_lock(&opipe->inode->i_mutex);
992 } else { 1314 } else {
@@ -1019,6 +1341,12 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1019 obuf = opipe->bufs + nbuf; 1341 obuf = opipe->bufs + nbuf;
1020 *obuf = *ibuf; 1342 *obuf = *ibuf;
1021 1343
1344 /*
1345 * Don't inherit the gift flag, we need to
1346 * prevent multiple steals of this page.
1347 */
1348 obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1349
1022 if (obuf->len > len) 1350 if (obuf->len > len)
1023 obuf->len = len; 1351 obuf->len = len;
1024 1352
@@ -1035,9 +1363,11 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1035 1363
1036 /* 1364 /*
1037 * We have input available, but no output room. 1365 * We have input available, but no output room.
1038 * If we already copied data, return that. 1366 * If we already copied data, return that. If we
1367 * need to drop the opipe lock, it must be ordered
1368 * last to avoid deadlocks.
1039 */ 1369 */
1040 if (flags & SPLICE_F_NONBLOCK) { 1370 if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) {
1041 if (!ret) 1371 if (!ret)
1042 ret = -EAGAIN; 1372 ret = -EAGAIN;
1043 break; 1373 break;
@@ -1071,7 +1401,12 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1071 if (ret) 1401 if (ret)
1072 break; 1402 break;
1073 } 1403 }
1074 if (flags & SPLICE_F_NONBLOCK) { 1404 /*
1405 * pipe_wait() drops the ipipe mutex. To avoid deadlocks
1406 * with another process, we can only safely do that if
1407 * the ipipe lock is ordered last.
1408 */
1409 if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) {
1075 if (!ret) 1410 if (!ret)
1076 ret = -EAGAIN; 1411 ret = -EAGAIN;
1077 break; 1412 break;
diff --git a/fs/stat.c b/fs/stat.c
index 9948cc1685a4..0f282face322 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -261,7 +261,7 @@ asmlinkage long sys_newlstat(char __user *filename, struct stat __user *statbuf)
261 return error; 261 return error;
262} 262}
263 263
264#ifndef __ARCH_WANT_STAT64 264#if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT)
265asmlinkage long sys_newfstatat(int dfd, char __user *filename, 265asmlinkage long sys_newfstatat(int dfd, char __user *filename,
266 struct stat __user *statbuf, int flag) 266 struct stat __user *statbuf, int flag)
267{ 267{