aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/cifs/CHANGES6
-rw-r--r--fs/cifs/README8
-rw-r--r--fs/cifs/cifsfs.c99
-rw-r--r--fs/cifs/cifssmb.c2
-rw-r--r--fs/cifs/connect.c6
-rw-r--r--fs/cifs/dir.c18
-rw-r--r--fs/cifs/fcntl.c2
-rw-r--r--fs/cifs/file.c34
-rw-r--r--fs/cifs/inode.c6
-rw-r--r--fs/cifs/link.c6
-rw-r--r--fs/cifs/ntlmssp.c14
-rw-r--r--fs/cifs/readdir.c45
-rw-r--r--fs/cifs/xattr.c8
-rw-r--r--fs/compat.c24
-rw-r--r--fs/ext3/inode.c13
-rw-r--r--fs/ext3/ioctl.c18
-rw-r--r--fs/ext3/resize.c2
-rw-r--r--fs/fuse/dev.c35
-rw-r--r--fs/fuse/fuse_i.h12
-rw-r--r--fs/fuse/inode.c40
-rw-r--r--fs/locks.c21
-rw-r--r--fs/pipe.c190
-rw-r--r--fs/reiserfs/xattr_acl.c5
-rw-r--r--fs/splice.c578
-rw-r--r--fs/stat.c2
-rw-r--r--fs/xfs/xfs_alloc.c5
-rw-r--r--fs/xfs/xfs_rename.c12
-rw-r--r--fs/xfs/xfs_vfsops.c27
-rw-r--r--fs/xfs/xfs_vnodeops.c2
30 files changed, 850 insertions, 392 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index af88c43043d5..f5958f413bd1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1104,6 +1104,8 @@ const struct file_operations def_blk_fops = {
1104 .readv = generic_file_readv, 1104 .readv = generic_file_readv,
1105 .writev = generic_file_write_nolock, 1105 .writev = generic_file_write_nolock,
1106 .sendfile = generic_file_sendfile, 1106 .sendfile = generic_file_sendfile,
1107 .splice_read = generic_file_splice_read,
1108 .splice_write = generic_file_splice_write,
1107}; 1109};
1108 1110
1109int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) 1111int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 8a2de038882e..1a27ecb46c9a 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,7 +1,11 @@
1Version 1.42 1Version 1.42
2------------ 2------------
3Fix slow oplock break when mounted to different servers at the same time and 3Fix slow oplock break when mounted to different servers at the same time and
4the tids match and we try to find matching fid on wrong server. 4the tids match and we try to find matching fid on wrong server. Fix read
5looping when signing required by server (2.6.16 kernel only). Fix readdir
6vs. rename race which could cause each to hang. Return . and .. even
7if server does not. Allow searches to skip first three entries and
8begin at any location. Fix oops in find_writeable_file.
5 9
6Version 1.41 10Version 1.41
7------------ 11------------
diff --git a/fs/cifs/README b/fs/cifs/README
index b2b4d0803761..0355003f4f0a 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -511,6 +511,14 @@ LinuxExtensionsEnabled If set to one then the client will attempt to
511 support and want to map the uid and gid fields 511 support and want to map the uid and gid fields
512 to values supplied at mount (rather than the 512 to values supplied at mount (rather than the
513 actual values, then set this to zero. (default 1) 513 actual values, then set this to zero. (default 1)
514Experimental When set to 1 used to enable certain experimental
515 features (currently enables multipage writes
516 when signing is enabled, the multipage write
517 performance enhancement was disabled when
518 signing turned on in case buffer was modified
519 just before it was sent, also this flag will
520 be used to use the new experimental sessionsetup
521 code).
514 522
515These experimental features and tracing can be enabled by changing flags in 523These experimental features and tracing can be enabled by changing flags in
516/proc/fs/cifs (after the cifs module has been installed or built into the 524/proc/fs/cifs (after the cifs module has been installed or built into the
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d4b713e5affb..c262d8874ce9 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -33,6 +33,7 @@
33#include <linux/vfs.h> 33#include <linux/vfs.h>
34#include <linux/mempool.h> 34#include <linux/mempool.h>
35#include <linux/delay.h> 35#include <linux/delay.h>
36#include <linux/kthread.h>
36#include "cifsfs.h" 37#include "cifsfs.h"
37#include "cifspdu.h" 38#include "cifspdu.h"
38#define DECLARE_GLOBALS_HERE 39#define DECLARE_GLOBALS_HERE
@@ -75,9 +76,6 @@ unsigned int cifs_max_pending = CIFS_MAX_REQ;
75module_param(cifs_max_pending, int, 0); 76module_param(cifs_max_pending, int, 0);
76MODULE_PARM_DESC(cifs_max_pending,"Simultaneous requests to server. Default: 50 Range: 2 to 256"); 77MODULE_PARM_DESC(cifs_max_pending,"Simultaneous requests to server. Default: 50 Range: 2 to 256");
77 78
78static DECLARE_COMPLETION(cifs_oplock_exited);
79static DECLARE_COMPLETION(cifs_dnotify_exited);
80
81extern mempool_t *cifs_sm_req_poolp; 79extern mempool_t *cifs_sm_req_poolp;
82extern mempool_t *cifs_req_poolp; 80extern mempool_t *cifs_req_poolp;
83extern mempool_t *cifs_mid_poolp; 81extern mempool_t *cifs_mid_poolp;
@@ -841,10 +839,6 @@ static int cifs_oplock_thread(void * dummyarg)
841 __u16 netfid; 839 __u16 netfid;
842 int rc; 840 int rc;
843 841
844 daemonize("cifsoplockd");
845 allow_signal(SIGTERM);
846
847 oplockThread = current;
848 do { 842 do {
849 if (try_to_freeze()) 843 if (try_to_freeze())
850 continue; 844 continue;
@@ -900,9 +894,9 @@ static int cifs_oplock_thread(void * dummyarg)
900 set_current_state(TASK_INTERRUPTIBLE); 894 set_current_state(TASK_INTERRUPTIBLE);
901 schedule_timeout(1); /* yield in case q were corrupt */ 895 schedule_timeout(1); /* yield in case q were corrupt */
902 } 896 }
903 } while(!signal_pending(current)); 897 } while (!kthread_should_stop());
904 oplockThread = NULL; 898
905 complete_and_exit (&cifs_oplock_exited, 0); 899 return 0;
906} 900}
907 901
908static int cifs_dnotify_thread(void * dummyarg) 902static int cifs_dnotify_thread(void * dummyarg)
@@ -910,10 +904,6 @@ static int cifs_dnotify_thread(void * dummyarg)
910 struct list_head *tmp; 904 struct list_head *tmp;
911 struct cifsSesInfo *ses; 905 struct cifsSesInfo *ses;
912 906
913 daemonize("cifsdnotifyd");
914 allow_signal(SIGTERM);
915
916 dnotifyThread = current;
917 do { 907 do {
918 if(try_to_freeze()) 908 if(try_to_freeze())
919 continue; 909 continue;
@@ -931,8 +921,9 @@ static int cifs_dnotify_thread(void * dummyarg)
931 wake_up_all(&ses->server->response_q); 921 wake_up_all(&ses->server->response_q);
932 } 922 }
933 read_unlock(&GlobalSMBSeslock); 923 read_unlock(&GlobalSMBSeslock);
934 } while(!signal_pending(current)); 924 } while (!kthread_should_stop());
935 complete_and_exit (&cifs_dnotify_exited, 0); 925
926 return 0;
936} 927}
937 928
938static int __init 929static int __init
@@ -982,32 +973,48 @@ init_cifs(void)
982 } 973 }
983 974
984 rc = cifs_init_inodecache(); 975 rc = cifs_init_inodecache();
985 if (!rc) { 976 if (rc)
986 rc = cifs_init_mids(); 977 goto out_clean_proc;
987 if (!rc) { 978
988 rc = cifs_init_request_bufs(); 979 rc = cifs_init_mids();
989 if (!rc) { 980 if (rc)
990 rc = register_filesystem(&cifs_fs_type); 981 goto out_destroy_inodecache;
991 if (!rc) { 982
992 rc = (int)kernel_thread(cifs_oplock_thread, NULL, 983 rc = cifs_init_request_bufs();
993 CLONE_FS | CLONE_FILES | CLONE_VM); 984 if (rc)
994 if(rc > 0) { 985 goto out_destroy_mids;
995 rc = (int)kernel_thread(cifs_dnotify_thread, NULL, 986
996 CLONE_FS | CLONE_FILES | CLONE_VM); 987 rc = register_filesystem(&cifs_fs_type);
997 if(rc > 0) 988 if (rc)
998 return 0; 989 goto out_destroy_request_bufs;
999 else 990
1000 cERROR(1,("error %d create dnotify thread", rc)); 991 oplockThread = kthread_run(cifs_oplock_thread, NULL, "cifsoplockd");
1001 } else { 992 if (IS_ERR(oplockThread)) {
1002 cERROR(1,("error %d create oplock thread",rc)); 993 rc = PTR_ERR(oplockThread);
1003 } 994 cERROR(1,("error %d create oplock thread", rc));
1004 } 995 goto out_unregister_filesystem;
1005 cifs_destroy_request_bufs();
1006 }
1007 cifs_destroy_mids();
1008 }
1009 cifs_destroy_inodecache();
1010 } 996 }
997
998 dnotifyThread = kthread_run(cifs_dnotify_thread, NULL, "cifsdnotifyd");
999 if (IS_ERR(dnotifyThread)) {
1000 rc = PTR_ERR(dnotifyThread);
1001 cERROR(1,("error %d create dnotify thread", rc));
1002 goto out_stop_oplock_thread;
1003 }
1004
1005 return 0;
1006
1007 out_stop_oplock_thread:
1008 kthread_stop(oplockThread);
1009 out_unregister_filesystem:
1010 unregister_filesystem(&cifs_fs_type);
1011 out_destroy_request_bufs:
1012 cifs_destroy_request_bufs();
1013 out_destroy_mids:
1014 cifs_destroy_mids();
1015 out_destroy_inodecache:
1016 cifs_destroy_inodecache();
1017 out_clean_proc:
1011#ifdef CONFIG_PROC_FS 1018#ifdef CONFIG_PROC_FS
1012 cifs_proc_clean(); 1019 cifs_proc_clean();
1013#endif 1020#endif
@@ -1025,14 +1032,8 @@ exit_cifs(void)
1025 cifs_destroy_inodecache(); 1032 cifs_destroy_inodecache();
1026 cifs_destroy_mids(); 1033 cifs_destroy_mids();
1027 cifs_destroy_request_bufs(); 1034 cifs_destroy_request_bufs();
1028 if(oplockThread) { 1035 kthread_stop(oplockThread);
1029 send_sig(SIGTERM, oplockThread, 1); 1036 kthread_stop(dnotifyThread);
1030 wait_for_completion(&cifs_oplock_exited);
1031 }
1032 if(dnotifyThread) {
1033 send_sig(SIGTERM, dnotifyThread, 1);
1034 wait_for_completion(&cifs_dnotify_exited);
1035 }
1036} 1037}
1037 1038
1038MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>"); 1039MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>");
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index d705500aa283..fd36892eda55 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -3119,7 +3119,7 @@ findFirstRetry:
3119 psrch_inf->endOfSearch = FALSE; 3119 psrch_inf->endOfSearch = FALSE;
3120 3120
3121 psrch_inf->entries_in_buffer = le16_to_cpu(parms->SearchCount); 3121 psrch_inf->entries_in_buffer = le16_to_cpu(parms->SearchCount);
3122 psrch_inf->index_of_last_entry = 3122 psrch_inf->index_of_last_entry = 2 /* skip . and .. */ +
3123 psrch_inf->entries_in_buffer; 3123 psrch_inf->entries_in_buffer;
3124 *pnetfid = parms->SearchHandle; 3124 *pnetfid = parms->SearchHandle;
3125 } else { 3125 } else {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0b86d5ca9014..d2ec806a4f32 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3447,6 +3447,12 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3447 pSesInfo->server->secMode, 3447 pSesInfo->server->secMode,
3448 pSesInfo->server->capabilities, 3448 pSesInfo->server->capabilities,
3449 pSesInfo->server->timeZone)); 3449 pSesInfo->server->timeZone));
3450#ifdef CONFIG_CIFS_EXPERIMENTAL
3451 if(experimEnabled > 1)
3452 rc = CIFS_SessSetup(xid, pSesInfo, CIFS_NTLM /* type */,
3453 &ntlmv2_flag, nls_info);
3454 else
3455#endif
3450 if (extended_security 3456 if (extended_security
3451 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) 3457 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
3452 && (pSesInfo->server->secType == NTLMSSP)) { 3458 && (pSesInfo->server->secType == NTLMSSP)) {
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 1d0ca3eaaca5..82315edc77d7 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -139,9 +139,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
139 cifs_sb = CIFS_SB(inode->i_sb); 139 cifs_sb = CIFS_SB(inode->i_sb);
140 pTcon = cifs_sb->tcon; 140 pTcon = cifs_sb->tcon;
141 141
142 mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
143 full_path = build_path_from_dentry(direntry); 142 full_path = build_path_from_dentry(direntry);
144 mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
145 if(full_path == NULL) { 143 if(full_path == NULL) {
146 FreeXid(xid); 144 FreeXid(xid);
147 return -ENOMEM; 145 return -ENOMEM;
@@ -316,9 +314,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
316 cifs_sb = CIFS_SB(inode->i_sb); 314 cifs_sb = CIFS_SB(inode->i_sb);
317 pTcon = cifs_sb->tcon; 315 pTcon = cifs_sb->tcon;
318 316
319 mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
320 full_path = build_path_from_dentry(direntry); 317 full_path = build_path_from_dentry(direntry);
321 mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
322 if(full_path == NULL) 318 if(full_path == NULL)
323 rc = -ENOMEM; 319 rc = -ENOMEM;
324 else if (pTcon->ses->capabilities & CAP_UNIX) { 320 else if (pTcon->ses->capabilities & CAP_UNIX) {
@@ -440,6 +436,20 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name
440 cifs_sb = CIFS_SB(parent_dir_inode->i_sb); 436 cifs_sb = CIFS_SB(parent_dir_inode->i_sb);
441 pTcon = cifs_sb->tcon; 437 pTcon = cifs_sb->tcon;
442 438
439 /*
440 * Don't allow the separator character in a path component.
441 * The VFS will not allow "/", but "\" is allowed by posix.
442 */
443 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) {
444 int i;
445 for (i = 0; i < direntry->d_name.len; i++)
446 if (direntry->d_name.name[i] == '\\') {
447 cFYI(1, ("Invalid file name"));
448 FreeXid(xid);
449 return ERR_PTR(-EINVAL);
450 }
451 }
452
443 /* can not grab the rename sem here since it would 453 /* can not grab the rename sem here since it would
444 deadlock in the cases (beginning of sys_rename itself) 454 deadlock in the cases (beginning of sys_rename itself)
445 in which we already have the sb rename sem */ 455 in which we already have the sb rename sem */
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
index ec4dfe9bf5ef..633a93811328 100644
--- a/fs/cifs/fcntl.c
+++ b/fs/cifs/fcntl.c
@@ -86,9 +86,7 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
86 cifs_sb = CIFS_SB(file->f_dentry->d_sb); 86 cifs_sb = CIFS_SB(file->f_dentry->d_sb);
87 pTcon = cifs_sb->tcon; 87 pTcon = cifs_sb->tcon;
88 88
89 mutex_lock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
90 full_path = build_path_from_dentry(file->f_dentry); 89 full_path = build_path_from_dentry(file->f_dentry);
91 mutex_unlock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
92 90
93 if(full_path == NULL) { 91 if(full_path == NULL) {
94 rc = -ENOMEM; 92 rc = -ENOMEM;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 5c497c529772..e152bf6afa60 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -203,9 +203,7 @@ int cifs_open(struct inode *inode, struct file *file)
203 } 203 }
204 } 204 }
205 205
206 mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
207 full_path = build_path_from_dentry(file->f_dentry); 206 full_path = build_path_from_dentry(file->f_dentry);
208 mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
209 if (full_path == NULL) { 207 if (full_path == NULL) {
210 FreeXid(xid); 208 FreeXid(xid);
211 return -ENOMEM; 209 return -ENOMEM;
@@ -906,8 +904,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
906 if (rc != 0) 904 if (rc != 0)
907 break; 905 break;
908 } 906 }
909 /* BB FIXME We can not sign across two buffers yet */ 907 if(experimEnabled || (pTcon->ses->server->secMode &
910 if((pTcon->ses->server->secMode &
911 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) == 0) { 908 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) == 0) {
912 struct kvec iov[2]; 909 struct kvec iov[2];
913 unsigned int len; 910 unsigned int len;
@@ -923,13 +920,13 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
923 *poffset, &bytes_written, 920 *poffset, &bytes_written,
924 iov, 1, long_op); 921 iov, 1, long_op);
925 } else 922 } else
926 /* BB FIXME fixup indentation of line below */ 923 rc = CIFSSMBWrite(xid, pTcon,
927 rc = CIFSSMBWrite(xid, pTcon, 924 open_file->netfid,
928 open_file->netfid, 925 min_t(const int, cifs_sb->wsize,
929 min_t(const int, cifs_sb->wsize, 926 write_size - total_written),
930 write_size - total_written), 927 *poffset, &bytes_written,
931 *poffset, &bytes_written, 928 write_data + total_written,
932 write_data + total_written, NULL, long_op); 929 NULL, long_op);
933 } 930 }
934 if (rc || (bytes_written == 0)) { 931 if (rc || (bytes_written == 0)) {
935 if (total_written) 932 if (total_written)
@@ -968,6 +965,16 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
968 struct cifsFileInfo *open_file; 965 struct cifsFileInfo *open_file;
969 int rc; 966 int rc;
970 967
968 /* Having a null inode here (because mapping->host was set to zero by
969 the VFS or MM) should not happen but we had reports of on oops (due to
970 it being zero) during stress testcases so we need to check for it */
971
972 if(cifs_inode == NULL) {
973 cERROR(1,("Null inode passed to cifs_writeable_file"));
974 dump_stack();
975 return NULL;
976 }
977
971 read_lock(&GlobalSMBSeslock); 978 read_lock(&GlobalSMBSeslock);
972 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 979 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
973 if (open_file->closePend) 980 if (open_file->closePend)
@@ -1093,12 +1100,11 @@ static int cifs_writepages(struct address_space *mapping,
1093 if (cifs_sb->wsize < PAGE_CACHE_SIZE) 1100 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1094 return generic_writepages(mapping, wbc); 1101 return generic_writepages(mapping, wbc);
1095 1102
1096 /* BB FIXME we do not have code to sign across multiple buffers yet,
1097 so go to older writepage style write which we can sign if needed */
1098 if((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server)) 1103 if((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server))
1099 if(cifs_sb->tcon->ses->server->secMode & 1104 if(cifs_sb->tcon->ses->server->secMode &
1100 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 1105 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1101 return generic_writepages(mapping, wbc); 1106 if(!experimEnabled)
1107 return generic_writepages(mapping, wbc);
1102 1108
1103 /* 1109 /*
1104 * BB: Is this meaningful for a non-block-device file system? 1110 * BB: Is this meaningful for a non-block-device file system?
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 957ddd1571c6..4093764ef461 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -722,9 +722,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
722 cifs_sb = CIFS_SB(inode->i_sb); 722 cifs_sb = CIFS_SB(inode->i_sb);
723 pTcon = cifs_sb->tcon; 723 pTcon = cifs_sb->tcon;
724 724
725 mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
726 full_path = build_path_from_dentry(direntry); 725 full_path = build_path_from_dentry(direntry);
727 mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
728 if (full_path == NULL) { 726 if (full_path == NULL) {
729 FreeXid(xid); 727 FreeXid(xid);
730 return -ENOMEM; 728 return -ENOMEM;
@@ -807,9 +805,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
807 cifs_sb = CIFS_SB(inode->i_sb); 805 cifs_sb = CIFS_SB(inode->i_sb);
808 pTcon = cifs_sb->tcon; 806 pTcon = cifs_sb->tcon;
809 807
810 mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
811 full_path = build_path_from_dentry(direntry); 808 full_path = build_path_from_dentry(direntry);
812 mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
813 if (full_path == NULL) { 809 if (full_path == NULL) {
814 FreeXid(xid); 810 FreeXid(xid);
815 return -ENOMEM; 811 return -ENOMEM;
@@ -1141,9 +1137,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1141 rc = 0; 1137 rc = 0;
1142 } 1138 }
1143 1139
1144 mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
1145 full_path = build_path_from_dentry(direntry); 1140 full_path = build_path_from_dentry(direntry);
1146 mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
1147 if (full_path == NULL) { 1141 if (full_path == NULL) {
1148 FreeXid(xid); 1142 FreeXid(xid);
1149 return -ENOMEM; 1143 return -ENOMEM;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 9562f5bba65c..2ec99f833142 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -48,10 +48,8 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
48/* No need to check for cross device links since server will do that 48/* No need to check for cross device links since server will do that
49 BB note DFS case in future though (when we may have to check) */ 49 BB note DFS case in future though (when we may have to check) */
50 50
51 mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
52 fromName = build_path_from_dentry(old_file); 51 fromName = build_path_from_dentry(old_file);
53 toName = build_path_from_dentry(direntry); 52 toName = build_path_from_dentry(direntry);
54 mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
55 if((fromName == NULL) || (toName == NULL)) { 53 if((fromName == NULL) || (toName == NULL)) {
56 rc = -ENOMEM; 54 rc = -ENOMEM;
57 goto cifs_hl_exit; 55 goto cifs_hl_exit;
@@ -103,9 +101,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
103 101
104 xid = GetXid(); 102 xid = GetXid();
105 103
106 mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
107 full_path = build_path_from_dentry(direntry); 104 full_path = build_path_from_dentry(direntry);
108 mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
109 105
110 if (!full_path) 106 if (!full_path)
111 goto out_no_free; 107 goto out_no_free;
@@ -164,9 +160,7 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
164 cifs_sb = CIFS_SB(inode->i_sb); 160 cifs_sb = CIFS_SB(inode->i_sb);
165 pTcon = cifs_sb->tcon; 161 pTcon = cifs_sb->tcon;
166 162
167 mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
168 full_path = build_path_from_dentry(direntry); 163 full_path = build_path_from_dentry(direntry);
169 mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
170 164
171 if(full_path == NULL) { 165 if(full_path == NULL) {
172 FreeXid(xid); 166 FreeXid(xid);
diff --git a/fs/cifs/ntlmssp.c b/fs/cifs/ntlmssp.c
index 78866f925747..115359cc7a32 100644
--- a/fs/cifs/ntlmssp.c
+++ b/fs/cifs/ntlmssp.c
@@ -121,6 +121,20 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, const int type,
121 } 121 }
122 122
123 123
124 /* copy session key */
125
126 /* if Unicode, align strings to two byte boundary */
127
128 /* copy user name */ /* BB Do we need to special case null user name? */
129
130 /* copy domain name */
131
132 /* copy Linux version */
133
134 /* copy network operating system name */
135
136 /* update bcc and smb buffer length */
137
124/* rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buf_type, 0); */ 138/* rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buf_type, 0); */
125 /* SMB request buf freed in SendReceive2 */ 139 /* SMB request buf freed in SendReceive2 */
126 140
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 2f6e2825571e..b689c5035124 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -404,9 +404,7 @@ static int initiate_cifs_search(const int xid, struct file *file)
404 if(pTcon == NULL) 404 if(pTcon == NULL)
405 return -EINVAL; 405 return -EINVAL;
406 406
407 mutex_lock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
408 full_path = build_path_from_dentry(file->f_dentry); 407 full_path = build_path_from_dentry(file->f_dentry);
409 mutex_unlock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
410 408
411 if(full_path == NULL) { 409 if(full_path == NULL) {
412 return -ENOMEM; 410 return -ENOMEM;
@@ -592,6 +590,13 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
592 first_entry_in_buffer = 590 first_entry_in_buffer =
593 cifsFile->srch_inf.index_of_last_entry - 591 cifsFile->srch_inf.index_of_last_entry -
594 cifsFile->srch_inf.entries_in_buffer; 592 cifsFile->srch_inf.entries_in_buffer;
593
594 /* if first entry in buf is zero then is first buffer
595 in search response data which means it is likely . and ..
596 will be in this buffer, although some servers do not return
597 . and .. for the root of a drive and for those we need
598 to start two entries earlier */
599
595/* dump_cifs_file_struct(file, "In fce ");*/ 600/* dump_cifs_file_struct(file, "In fce ");*/
596 if(((index_to_find < cifsFile->srch_inf.index_of_last_entry) && 601 if(((index_to_find < cifsFile->srch_inf.index_of_last_entry) &&
597 is_dir_changed(file)) || 602 is_dir_changed(file)) ||
@@ -634,23 +639,14 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
634 char * end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + 639 char * end_of_smb = cifsFile->srch_inf.ntwrk_buf_start +
635 smbCalcSize((struct smb_hdr *) 640 smbCalcSize((struct smb_hdr *)
636 cifsFile->srch_inf.ntwrk_buf_start); 641 cifsFile->srch_inf.ntwrk_buf_start);
642
643 current_entry = cifsFile->srch_inf.srch_entries_start;
637 first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry 644 first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry
638 - cifsFile->srch_inf.entries_in_buffer; 645 - cifsFile->srch_inf.entries_in_buffer;
639 pos_in_buf = index_to_find - first_entry_in_buffer; 646 pos_in_buf = index_to_find - first_entry_in_buffer;
640 cFYI(1,("found entry - pos_in_buf %d",pos_in_buf)); 647 cFYI(1,("found entry - pos_in_buf %d",pos_in_buf));
641 current_entry = cifsFile->srch_inf.srch_entries_start;
642 for(i=0;(i<(pos_in_buf)) && (current_entry != NULL);i++) { 648 for(i=0;(i<(pos_in_buf)) && (current_entry != NULL);i++) {
643 /* go entry by entry figuring out which is first */ 649 /* go entry by entry figuring out which is first */
644 /* if( . or ..)
645 skip */
646 rc = cifs_entry_is_dot(current_entry,cifsFile);
647 if(rc == 1) /* is . or .. so skip */ {
648 cFYI(1,("Entry is .")); /* BB removeme BB */
649 /* continue; */
650 } else if (rc == 2 ) {
651 cFYI(1,("Entry is ..")); /* BB removeme BB */
652 /* continue; */
653 }
654 current_entry = nxt_dir_entry(current_entry,end_of_smb); 650 current_entry = nxt_dir_entry(current_entry,end_of_smb);
655 } 651 }
656 if((current_entry == NULL) && (i < pos_in_buf)) { 652 if((current_entry == NULL) && (i < pos_in_buf)) {
@@ -770,6 +766,11 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
770 if(file->f_dentry == NULL) 766 if(file->f_dentry == NULL)
771 return -ENOENT; 767 return -ENOENT;
772 768
769 rc = cifs_entry_is_dot(pfindEntry,pCifsF);
770 /* skip . and .. since we added them first */
771 if(rc != 0)
772 return 0;
773
773 cifs_sb = CIFS_SB(file->f_dentry->d_sb); 774 cifs_sb = CIFS_SB(file->f_dentry->d_sb);
774 775
775 qstring.name = scratch_buf; 776 qstring.name = scratch_buf;
@@ -898,22 +899,22 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
898 899
899 switch ((int) file->f_pos) { 900 switch ((int) file->f_pos) {
900 case 0: 901 case 0:
901 /*if (filldir(direntry, ".", 1, file->f_pos, 902 if (filldir(direntry, ".", 1, file->f_pos,
902 file->f_dentry->d_inode->i_ino, DT_DIR) < 0) { 903 file->f_dentry->d_inode->i_ino, DT_DIR) < 0) {
903 cERROR(1, ("Filldir for current dir failed ")); 904 cERROR(1, ("Filldir for current dir failed"));
904 rc = -ENOMEM; 905 rc = -ENOMEM;
905 break; 906 break;
906 } 907 }
907 file->f_pos++; */ 908 file->f_pos++;
908 case 1: 909 case 1:
909 /* if (filldir(direntry, "..", 2, file->f_pos, 910 if (filldir(direntry, "..", 2, file->f_pos,
910 file->f_dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) { 911 file->f_dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) {
911 cERROR(1, ("Filldir for parent dir failed ")); 912 cERROR(1, ("Filldir for parent dir failed "));
912 rc = -ENOMEM; 913 rc = -ENOMEM;
913 break; 914 break;
914 } 915 }
915 file->f_pos++; */ 916 file->f_pos++;
916 case 2: 917 default:
917 /* 1) If search is active, 918 /* 1) If search is active,
918 is in current search buffer? 919 is in current search buffer?
919 if it before then restart search 920 if it before then restart search
@@ -927,7 +928,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
927 return rc; 928 return rc;
928 } 929 }
929 } 930 }
930 default:
931 if(file->private_data == NULL) { 931 if(file->private_data == NULL) {
932 rc = -EINVAL; 932 rc = -EINVAL;
933 FreeXid(xid); 933 FreeXid(xid);
@@ -947,8 +947,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
947 kfree(cifsFile->search_resume_name); 947 kfree(cifsFile->search_resume_name);
948 cifsFile->search_resume_name = NULL; */ 948 cifsFile->search_resume_name = NULL; */
949 949
950 /* BB account for . and .. in f_pos as special case */
951
952 rc = find_cifs_entry(xid,pTcon, file, 950 rc = find_cifs_entry(xid,pTcon, file,
953 &current_entry,&num_to_fill); 951 &current_entry,&num_to_fill);
954 if(rc) { 952 if(rc) {
@@ -977,7 +975,8 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
977 num_to_fill, i)); 975 num_to_fill, i));
978 break; 976 break;
979 } 977 }
980 978 /* if buggy server returns . and .. late do
979 we want to check for that here? */
981 rc = cifs_filldir(current_entry, file, 980 rc = cifs_filldir(current_entry, file,
982 filldir, direntry,tmp_buf); 981 filldir, direntry,tmp_buf);
983 file->f_pos++; 982 file->f_pos++;
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 3938444d87b2..7754d641775e 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -62,9 +62,7 @@ int cifs_removexattr(struct dentry * direntry, const char * ea_name)
62 cifs_sb = CIFS_SB(sb); 62 cifs_sb = CIFS_SB(sb);
63 pTcon = cifs_sb->tcon; 63 pTcon = cifs_sb->tcon;
64 64
65 mutex_lock(&sb->s_vfs_rename_mutex);
66 full_path = build_path_from_dentry(direntry); 65 full_path = build_path_from_dentry(direntry);
67 mutex_unlock(&sb->s_vfs_rename_mutex);
68 if(full_path == NULL) { 66 if(full_path == NULL) {
69 FreeXid(xid); 67 FreeXid(xid);
70 return -ENOMEM; 68 return -ENOMEM;
@@ -116,9 +114,7 @@ int cifs_setxattr(struct dentry * direntry, const char * ea_name,
116 cifs_sb = CIFS_SB(sb); 114 cifs_sb = CIFS_SB(sb);
117 pTcon = cifs_sb->tcon; 115 pTcon = cifs_sb->tcon;
118 116
119 mutex_lock(&sb->s_vfs_rename_mutex);
120 full_path = build_path_from_dentry(direntry); 117 full_path = build_path_from_dentry(direntry);
121 mutex_unlock(&sb->s_vfs_rename_mutex);
122 if(full_path == NULL) { 118 if(full_path == NULL) {
123 FreeXid(xid); 119 FreeXid(xid);
124 return -ENOMEM; 120 return -ENOMEM;
@@ -223,9 +219,7 @@ ssize_t cifs_getxattr(struct dentry * direntry, const char * ea_name,
223 cifs_sb = CIFS_SB(sb); 219 cifs_sb = CIFS_SB(sb);
224 pTcon = cifs_sb->tcon; 220 pTcon = cifs_sb->tcon;
225 221
226 mutex_lock(&sb->s_vfs_rename_mutex);
227 full_path = build_path_from_dentry(direntry); 222 full_path = build_path_from_dentry(direntry);
228 mutex_unlock(&sb->s_vfs_rename_mutex);
229 if(full_path == NULL) { 223 if(full_path == NULL) {
230 FreeXid(xid); 224 FreeXid(xid);
231 return -ENOMEM; 225 return -ENOMEM;
@@ -341,9 +335,7 @@ ssize_t cifs_listxattr(struct dentry * direntry, char * data, size_t buf_size)
341 cifs_sb = CIFS_SB(sb); 335 cifs_sb = CIFS_SB(sb);
342 pTcon = cifs_sb->tcon; 336 pTcon = cifs_sb->tcon;
343 337
344 mutex_lock(&sb->s_vfs_rename_mutex);
345 full_path = build_path_from_dentry(direntry); 338 full_path = build_path_from_dentry(direntry);
346 mutex_unlock(&sb->s_vfs_rename_mutex);
347 if(full_path == NULL) { 339 if(full_path == NULL) {
348 FreeXid(xid); 340 FreeXid(xid);
349 return -ENOMEM; 341 return -ENOMEM;
diff --git a/fs/compat.c b/fs/compat.c
index 7f8e26ea427c..970888aad843 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1217,6 +1217,10 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
1217 if (ret < 0) 1217 if (ret < 0)
1218 goto out; 1218 goto out;
1219 1219
1220 ret = security_file_permission(file, type == READ ? MAY_READ:MAY_WRITE);
1221 if (ret)
1222 goto out;
1223
1220 fnv = NULL; 1224 fnv = NULL;
1221 if (type == READ) { 1225 if (type == READ) {
1222 fn = file->f_op->read; 1226 fn = file->f_op->read;
@@ -1313,6 +1317,26 @@ out:
1313 return ret; 1317 return ret;
1314} 1318}
1315 1319
1320asmlinkage long
1321compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
1322 unsigned int nr_segs, unsigned int flags)
1323{
1324 unsigned i;
1325 struct iovec *iov;
1326 if (nr_segs > UIO_MAXIOV)
1327 return -EINVAL;
1328 iov = compat_alloc_user_space(nr_segs * sizeof(struct iovec));
1329 for (i = 0; i < nr_segs; i++) {
1330 struct compat_iovec v;
1331 if (get_user(v.iov_base, &iov32[i].iov_base) ||
1332 get_user(v.iov_len, &iov32[i].iov_len) ||
1333 put_user(compat_ptr(v.iov_base), &iov[i].iov_base) ||
1334 put_user(v.iov_len, &iov[i].iov_len))
1335 return -EFAULT;
1336 }
1337 return sys_vmsplice(fd, iov, nr_segs, flags);
1338}
1339
1316/* 1340/*
1317 * Exactly like fs/open.c:sys_open(), except that it doesn't set the 1341 * Exactly like fs/open.c:sys_open(), except that it doesn't set the
1318 * O_LARGEFILE flag. 1342 * O_LARGEFILE flag.
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 48ae0339af17..2edd7eec88fd 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -711,7 +711,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
711 * direct blocks blocks 711 * direct blocks blocks
712 */ 712 */
713 if (num == 0 && blks > 1) { 713 if (num == 0 && blks > 1) {
714 current_block = le32_to_cpu(where->key + 1); 714 current_block = le32_to_cpu(where->key) + 1;
715 for (i = 1; i < blks; i++) 715 for (i = 1; i < blks; i++)
716 *(where->p + i ) = cpu_to_le32(current_block++); 716 *(where->p + i ) = cpu_to_le32(current_block++);
717 } 717 }
@@ -724,7 +724,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
724 if (block_i) { 724 if (block_i) {
725 block_i->last_alloc_logical_block = block + blks - 1; 725 block_i->last_alloc_logical_block = block + blks - 1;
726 block_i->last_alloc_physical_block = 726 block_i->last_alloc_physical_block =
727 le32_to_cpu(where[num].key + blks - 1); 727 le32_to_cpu(where[num].key) + blks - 1;
728 } 728 }
729 729
730 /* We are done with atomic stuff, now do the rest of housekeeping */ 730 /* We are done with atomic stuff, now do the rest of housekeeping */
@@ -814,11 +814,13 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
814 814
815 /* Simplest case - block found, no allocation needed */ 815 /* Simplest case - block found, no allocation needed */
816 if (!partial) { 816 if (!partial) {
817 first_block = chain[depth - 1].key; 817 first_block = le32_to_cpu(chain[depth - 1].key);
818 clear_buffer_new(bh_result); 818 clear_buffer_new(bh_result);
819 count++; 819 count++;
820 /*map more blocks*/ 820 /*map more blocks*/
821 while (count < maxblocks && count <= blocks_to_boundary) { 821 while (count < maxblocks && count <= blocks_to_boundary) {
822 unsigned long blk;
823
822 if (!verify_chain(chain, partial)) { 824 if (!verify_chain(chain, partial)) {
823 /* 825 /*
824 * Indirect block might be removed by 826 * Indirect block might be removed by
@@ -831,8 +833,9 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
831 count = 0; 833 count = 0;
832 break; 834 break;
833 } 835 }
834 if (le32_to_cpu(*(chain[depth-1].p+count) == 836 blk = le32_to_cpu(*(chain[depth-1].p + count));
835 (first_block + count))) 837
838 if (blk == first_block + count)
836 count++; 839 count++;
837 else 840 else
838 break; 841 break;
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index aaf1da17b6d4..8c22aa9a7fbb 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -48,6 +48,7 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
48 if (!S_ISDIR(inode->i_mode)) 48 if (!S_ISDIR(inode->i_mode))
49 flags &= ~EXT3_DIRSYNC_FL; 49 flags &= ~EXT3_DIRSYNC_FL;
50 50
51 mutex_lock(&inode->i_mutex);
51 oldflags = ei->i_flags; 52 oldflags = ei->i_flags;
52 53
53 /* The JOURNAL_DATA flag is modifiable only by root */ 54 /* The JOURNAL_DATA flag is modifiable only by root */
@@ -60,8 +61,10 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
60 * This test looks nicer. Thanks to Pauline Middelink 61 * This test looks nicer. Thanks to Pauline Middelink
61 */ 62 */
62 if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { 63 if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
63 if (!capable(CAP_LINUX_IMMUTABLE)) 64 if (!capable(CAP_LINUX_IMMUTABLE)) {
65 mutex_unlock(&inode->i_mutex);
64 return -EPERM; 66 return -EPERM;
67 }
65 } 68 }
66 69
67 /* 70 /*
@@ -69,14 +72,18 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
69 * the relevant capability. 72 * the relevant capability.
70 */ 73 */
71 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) { 74 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
72 if (!capable(CAP_SYS_RESOURCE)) 75 if (!capable(CAP_SYS_RESOURCE)) {
76 mutex_unlock(&inode->i_mutex);
73 return -EPERM; 77 return -EPERM;
78 }
74 } 79 }
75 80
76 81
77 handle = ext3_journal_start(inode, 1); 82 handle = ext3_journal_start(inode, 1);
78 if (IS_ERR(handle)) 83 if (IS_ERR(handle)) {
84 mutex_unlock(&inode->i_mutex);
79 return PTR_ERR(handle); 85 return PTR_ERR(handle);
86 }
80 if (IS_SYNC(inode)) 87 if (IS_SYNC(inode))
81 handle->h_sync = 1; 88 handle->h_sync = 1;
82 err = ext3_reserve_inode_write(handle, inode, &iloc); 89 err = ext3_reserve_inode_write(handle, inode, &iloc);
@@ -93,11 +100,14 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
93 err = ext3_mark_iloc_dirty(handle, inode, &iloc); 100 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
94flags_err: 101flags_err:
95 ext3_journal_stop(handle); 102 ext3_journal_stop(handle);
96 if (err) 103 if (err) {
104 mutex_unlock(&inode->i_mutex);
97 return err; 105 return err;
106 }
98 107
99 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) 108 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL))
100 err = ext3_change_inode_journal_flag(inode, jflag); 109 err = ext3_change_inode_journal_flag(inode, jflag);
110 mutex_unlock(&inode->i_mutex);
101 return err; 111 return err;
102 } 112 }
103 case EXT3_IOC_GETVERSION: 113 case EXT3_IOC_GETVERSION:
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index c5ffa8523968..8aac5334680d 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -213,7 +213,7 @@ static int setup_new_group_blocks(struct super_block *sb,
213 goto exit_bh; 213 goto exit_bh;
214 } 214 }
215 lock_buffer(bh); 215 lock_buffer(bh);
216 memcpy(gdb->b_data, sbi->s_group_desc[i], bh->b_size); 216 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size);
217 set_buffer_uptodate(gdb); 217 set_buffer_uptodate(gdb);
218 unlock_buffer(bh); 218 unlock_buffer(bh);
219 ext3_journal_dirty_metadata(handle, gdb); 219 ext3_journal_dirty_metadata(handle, gdb);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index cc750c68fe70..104a62dadb94 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -128,14 +128,24 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
128 } 128 }
129} 129}
130 130
131void fuse_remove_background(struct fuse_conn *fc, struct fuse_req *req) 131/*
132 * Called with sbput_sem held for read (request_end) or write
133 * (fuse_put_super). By the time fuse_put_super() is finished, all
134 * inodes belonging to background requests must be released, so the
135 * iputs have to be done within the locked region.
136 */
137void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req)
132{ 138{
133 list_del_init(&req->bg_entry); 139 iput(req->inode);
140 iput(req->inode2);
141 spin_lock(&fc->lock);
142 list_del(&req->bg_entry);
134 if (fc->num_background == FUSE_MAX_BACKGROUND) { 143 if (fc->num_background == FUSE_MAX_BACKGROUND) {
135 fc->blocked = 0; 144 fc->blocked = 0;
136 wake_up_all(&fc->blocked_waitq); 145 wake_up_all(&fc->blocked_waitq);
137 } 146 }
138 fc->num_background--; 147 fc->num_background--;
148 spin_unlock(&fc->lock);
139} 149}
140 150
141/* 151/*
@@ -165,27 +175,22 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
165 wake_up(&req->waitq); 175 wake_up(&req->waitq);
166 fuse_put_request(fc, req); 176 fuse_put_request(fc, req);
167 } else { 177 } else {
168 struct inode *inode = req->inode;
169 struct inode *inode2 = req->inode2;
170 struct file *file = req->file;
171 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; 178 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
172 req->end = NULL; 179 req->end = NULL;
173 req->inode = NULL;
174 req->inode2 = NULL;
175 req->file = NULL;
176 if (!list_empty(&req->bg_entry))
177 fuse_remove_background(fc, req);
178 spin_unlock(&fc->lock); 180 spin_unlock(&fc->lock);
181 down_read(&fc->sbput_sem);
182 if (fc->mounted)
183 fuse_release_background(fc, req);
184 up_read(&fc->sbput_sem);
185
186 /* fput must go outside sbput_sem, otherwise it can deadlock */
187 if (req->file)
188 fput(req->file);
179 189
180 if (end) 190 if (end)
181 end(fc, req); 191 end(fc, req);
182 else 192 else
183 fuse_put_request(fc, req); 193 fuse_put_request(fc, req);
184
185 if (file)
186 fput(file);
187 iput(inode);
188 iput(inode2);
189 } 194 }
190} 195}
191 196
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 59661c481d9d..0474202cb5dc 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -258,9 +258,15 @@ struct fuse_conn {
258 /** waitq for blocked connection */ 258 /** waitq for blocked connection */
259 wait_queue_head_t blocked_waitq; 259 wait_queue_head_t blocked_waitq;
260 260
261 /** RW semaphore for exclusion with fuse_put_super() */
262 struct rw_semaphore sbput_sem;
263
261 /** The next unique request id */ 264 /** The next unique request id */
262 u64 reqctr; 265 u64 reqctr;
263 266
267 /** Mount is active */
268 unsigned mounted;
269
264 /** Connection established, cleared on umount, connection 270 /** Connection established, cleared on umount, connection
265 abort and device release */ 271 abort and device release */
266 unsigned connected; 272 unsigned connected;
@@ -471,11 +477,11 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
471void request_send_background(struct fuse_conn *fc, struct fuse_req *req); 477void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
472 478
473/** 479/**
474 * Remove request from the the background list 480 * Release inodes and file associated with background request
475 */ 481 */
476void fuse_remove_background(struct fuse_conn *fc, struct fuse_req *req); 482void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req);
477 483
478/** Abort all requests */ 484/* Abort all requests */
479void fuse_abort_conn(struct fuse_conn *fc); 485void fuse_abort_conn(struct fuse_conn *fc);
480 486
481/** 487/**
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 43a6fc0db8a7..7627022446b2 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -204,26 +204,17 @@ static void fuse_put_super(struct super_block *sb)
204{ 204{
205 struct fuse_conn *fc = get_fuse_conn_super(sb); 205 struct fuse_conn *fc = get_fuse_conn_super(sb);
206 206
207 down_write(&fc->sbput_sem);
208 while (!list_empty(&fc->background))
209 fuse_release_background(fc,
210 list_entry(fc->background.next,
211 struct fuse_req, bg_entry));
212
207 spin_lock(&fc->lock); 213 spin_lock(&fc->lock);
214 fc->mounted = 0;
208 fc->connected = 0; 215 fc->connected = 0;
209 while (!list_empty(&fc->background)) {
210 struct fuse_req *req = list_entry(fc->background.next,
211 struct fuse_req, bg_entry);
212 struct inode *inode = req->inode;
213 struct inode *inode2 = req->inode2;
214
215 /* File would hold a reference to vfsmount */
216 BUG_ON(req->file);
217 req->inode = NULL;
218 req->inode2 = NULL;
219 fuse_remove_background(fc, req);
220
221 spin_unlock(&fc->lock);
222 iput(inode);
223 iput(inode2);
224 spin_lock(&fc->lock);
225 }
226 spin_unlock(&fc->lock); 216 spin_unlock(&fc->lock);
217 up_write(&fc->sbput_sem);
227 /* Flush all readers on this fs */ 218 /* Flush all readers on this fs */
228 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 219 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
229 wake_up_all(&fc->waitq); 220 wake_up_all(&fc->waitq);
@@ -395,6 +386,7 @@ static struct fuse_conn *new_conn(void)
395 INIT_LIST_HEAD(&fc->processing); 386 INIT_LIST_HEAD(&fc->processing);
396 INIT_LIST_HEAD(&fc->io); 387 INIT_LIST_HEAD(&fc->io);
397 INIT_LIST_HEAD(&fc->background); 388 INIT_LIST_HEAD(&fc->background);
389 init_rwsem(&fc->sbput_sem);
398 kobj_set_kset_s(fc, connections_subsys); 390 kobj_set_kset_s(fc, connections_subsys);
399 kobject_init(&fc->kobj); 391 kobject_init(&fc->kobj);
400 atomic_set(&fc->num_waiting, 0); 392 atomic_set(&fc->num_waiting, 0);
@@ -508,11 +500,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
508 if (file->f_op != &fuse_dev_operations) 500 if (file->f_op != &fuse_dev_operations)
509 return -EINVAL; 501 return -EINVAL;
510 502
511 /* Setting file->private_data can't race with other mount()
512 instances, since BKL is held for ->get_sb() */
513 if (file->private_data)
514 return -EINVAL;
515
516 fc = new_conn(); 503 fc = new_conn();
517 if (!fc) 504 if (!fc)
518 return -ENOMEM; 505 return -ENOMEM;
@@ -548,7 +535,14 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
548 if (err) 535 if (err)
549 goto err_free_req; 536 goto err_free_req;
550 537
538 /* Setting file->private_data can't race with other mount()
539 instances, since BKL is held for ->get_sb() */
540 err = -EINVAL;
541 if (file->private_data)
542 goto err_kobject_del;
543
551 sb->s_root = root_dentry; 544 sb->s_root = root_dentry;
545 fc->mounted = 1;
552 fc->connected = 1; 546 fc->connected = 1;
553 kobject_get(&fc->kobj); 547 kobject_get(&fc->kobj);
554 file->private_data = fc; 548 file->private_data = fc;
@@ -563,6 +557,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
563 557
564 return 0; 558 return 0;
565 559
560 err_kobject_del:
561 kobject_del(&fc->kobj);
566 err_free_req: 562 err_free_req:
567 fuse_request_free(init_req); 563 fuse_request_free(init_req);
568 err_put_root: 564 err_put_root:
diff --git a/fs/locks.c b/fs/locks.c
index efad798824dc..6f99c0a6f836 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -446,15 +446,14 @@ static struct lock_manager_operations lease_manager_ops = {
446 */ 446 */
447static int lease_init(struct file *filp, int type, struct file_lock *fl) 447static int lease_init(struct file *filp, int type, struct file_lock *fl)
448 { 448 {
449 if (assign_type(fl, type) != 0)
450 return -EINVAL;
451
449 fl->fl_owner = current->files; 452 fl->fl_owner = current->files;
450 fl->fl_pid = current->tgid; 453 fl->fl_pid = current->tgid;
451 454
452 fl->fl_file = filp; 455 fl->fl_file = filp;
453 fl->fl_flags = FL_LEASE; 456 fl->fl_flags = FL_LEASE;
454 if (assign_type(fl, type) != 0) {
455 locks_free_lock(fl);
456 return -EINVAL;
457 }
458 fl->fl_start = 0; 457 fl->fl_start = 0;
459 fl->fl_end = OFFSET_MAX; 458 fl->fl_end = OFFSET_MAX;
460 fl->fl_ops = NULL; 459 fl->fl_ops = NULL;
@@ -466,16 +465,19 @@ static int lease_init(struct file *filp, int type, struct file_lock *fl)
466static int lease_alloc(struct file *filp, int type, struct file_lock **flp) 465static int lease_alloc(struct file *filp, int type, struct file_lock **flp)
467{ 466{
468 struct file_lock *fl = locks_alloc_lock(); 467 struct file_lock *fl = locks_alloc_lock();
469 int error; 468 int error = -ENOMEM;
470 469
471 if (fl == NULL) 470 if (fl == NULL)
472 return -ENOMEM; 471 goto out;
473 472
474 error = lease_init(filp, type, fl); 473 error = lease_init(filp, type, fl);
475 if (error) 474 if (error) {
476 return error; 475 locks_free_lock(fl);
476 fl = NULL;
477 }
478out:
477 *flp = fl; 479 *flp = fl;
478 return 0; 480 return error;
479} 481}
480 482
481/* Check if two locks overlap each other. 483/* Check if two locks overlap each other.
@@ -1372,6 +1374,7 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp)
1372 goto out; 1374 goto out;
1373 1375
1374 if (my_before != NULL) { 1376 if (my_before != NULL) {
1377 *flp = *my_before;
1375 error = lease->fl_lmops->fl_change(my_before, arg); 1378 error = lease->fl_lmops->fl_change(my_before, arg);
1376 goto out; 1379 goto out;
1377 } 1380 }
diff --git a/fs/pipe.c b/fs/pipe.c
index 7fefb10db8d9..5acd8954aaa0 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -55,7 +55,8 @@ void pipe_wait(struct pipe_inode_info *pipe)
55} 55}
56 56
57static int 57static int
58pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) 58pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
59 int atomic)
59{ 60{
60 unsigned long copy; 61 unsigned long copy;
61 62
@@ -64,8 +65,13 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
64 iov++; 65 iov++;
65 copy = min_t(unsigned long, len, iov->iov_len); 66 copy = min_t(unsigned long, len, iov->iov_len);
66 67
67 if (copy_from_user(to, iov->iov_base, copy)) 68 if (atomic) {
68 return -EFAULT; 69 if (__copy_from_user_inatomic(to, iov->iov_base, copy))
70 return -EFAULT;
71 } else {
72 if (copy_from_user(to, iov->iov_base, copy))
73 return -EFAULT;
74 }
69 to += copy; 75 to += copy;
70 len -= copy; 76 len -= copy;
71 iov->iov_base += copy; 77 iov->iov_base += copy;
@@ -75,7 +81,8 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
75} 81}
76 82
77static int 83static int
78pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) 84pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len,
85 int atomic)
79{ 86{
80 unsigned long copy; 87 unsigned long copy;
81 88
@@ -84,8 +91,13 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
84 iov++; 91 iov++;
85 copy = min_t(unsigned long, len, iov->iov_len); 92 copy = min_t(unsigned long, len, iov->iov_len);
86 93
87 if (copy_to_user(iov->iov_base, from, copy)) 94 if (atomic) {
88 return -EFAULT; 95 if (__copy_to_user_inatomic(iov->iov_base, from, copy))
96 return -EFAULT;
97 } else {
98 if (copy_to_user(iov->iov_base, from, copy))
99 return -EFAULT;
100 }
89 from += copy; 101 from += copy;
90 len -= copy; 102 len -= copy;
91 iov->iov_base += copy; 103 iov->iov_base += copy;
@@ -94,13 +106,52 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
94 return 0; 106 return 0;
95} 107}
96 108
109/*
110 * Attempt to pre-fault in the user memory, so we can use atomic copies.
111 * Returns the number of bytes not faulted in.
112 */
113static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len)
114{
115 while (!iov->iov_len)
116 iov++;
117
118 while (len > 0) {
119 unsigned long this_len;
120
121 this_len = min_t(unsigned long, len, iov->iov_len);
122 if (fault_in_pages_writeable(iov->iov_base, this_len))
123 break;
124
125 len -= this_len;
126 iov++;
127 }
128
129 return len;
130}
131
132/*
133 * Pre-fault in the user memory, so we can use atomic copies.
134 */
135static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len)
136{
137 while (!iov->iov_len)
138 iov++;
139
140 while (len > 0) {
141 unsigned long this_len;
142
143 this_len = min_t(unsigned long, len, iov->iov_len);
144 fault_in_pages_readable(iov->iov_base, this_len);
145 len -= this_len;
146 iov++;
147 }
148}
149
97static void anon_pipe_buf_release(struct pipe_inode_info *pipe, 150static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
98 struct pipe_buffer *buf) 151 struct pipe_buffer *buf)
99{ 152{
100 struct page *page = buf->page; 153 struct page *page = buf->page;
101 154
102 buf->flags &= ~PIPE_BUF_FLAG_STOLEN;
103
104 /* 155 /*
105 * If nobody else uses this page, and we don't already have a 156 * If nobody else uses this page, and we don't already have a
106 * temporary page, let's keep track of it as a one-deep 157 * temporary page, let's keep track of it as a one-deep
@@ -112,38 +163,58 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
112 page_cache_release(page); 163 page_cache_release(page);
113} 164}
114 165
115static void * anon_pipe_buf_map(struct file *file, struct pipe_inode_info *pipe, 166void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
116 struct pipe_buffer *buf) 167 struct pipe_buffer *buf, int atomic)
117{ 168{
169 if (atomic) {
170 buf->flags |= PIPE_BUF_FLAG_ATOMIC;
171 return kmap_atomic(buf->page, KM_USER0);
172 }
173
118 return kmap(buf->page); 174 return kmap(buf->page);
119} 175}
120 176
121static void anon_pipe_buf_unmap(struct pipe_inode_info *pipe, 177void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
122 struct pipe_buffer *buf) 178 struct pipe_buffer *buf, void *map_data)
123{ 179{
124 kunmap(buf->page); 180 if (buf->flags & PIPE_BUF_FLAG_ATOMIC) {
181 buf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
182 kunmap_atomic(map_data, KM_USER0);
183 } else
184 kunmap(buf->page);
125} 185}
126 186
127static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, 187int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
128 struct pipe_buffer *buf) 188 struct pipe_buffer *buf)
129{ 189{
130 buf->flags |= PIPE_BUF_FLAG_STOLEN; 190 struct page *page = buf->page;
131 return 0; 191
192 if (page_count(page) == 1) {
193 lock_page(page);
194 return 0;
195 }
196
197 return 1;
132} 198}
133 199
134static void anon_pipe_buf_get(struct pipe_inode_info *info, 200void generic_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf)
135 struct pipe_buffer *buf)
136{ 201{
137 page_cache_get(buf->page); 202 page_cache_get(buf->page);
138} 203}
139 204
205int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf)
206{
207 return 0;
208}
209
140static struct pipe_buf_operations anon_pipe_buf_ops = { 210static struct pipe_buf_operations anon_pipe_buf_ops = {
141 .can_merge = 1, 211 .can_merge = 1,
142 .map = anon_pipe_buf_map, 212 .map = generic_pipe_buf_map,
143 .unmap = anon_pipe_buf_unmap, 213 .unmap = generic_pipe_buf_unmap,
214 .pin = generic_pipe_buf_pin,
144 .release = anon_pipe_buf_release, 215 .release = anon_pipe_buf_release,
145 .steal = anon_pipe_buf_steal, 216 .steal = generic_pipe_buf_steal,
146 .get = anon_pipe_buf_get, 217 .get = generic_pipe_buf_get,
147}; 218};
148 219
149static ssize_t 220static ssize_t
@@ -174,22 +245,33 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
174 struct pipe_buf_operations *ops = buf->ops; 245 struct pipe_buf_operations *ops = buf->ops;
175 void *addr; 246 void *addr;
176 size_t chars = buf->len; 247 size_t chars = buf->len;
177 int error; 248 int error, atomic;
178 249
179 if (chars > total_len) 250 if (chars > total_len)
180 chars = total_len; 251 chars = total_len;
181 252
182 addr = ops->map(filp, pipe, buf); 253 error = ops->pin(pipe, buf);
183 if (IS_ERR(addr)) { 254 if (error) {
184 if (!ret) 255 if (!ret)
185 ret = PTR_ERR(addr); 256 error = ret;
186 break; 257 break;
187 } 258 }
188 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars); 259
189 ops->unmap(pipe, buf); 260 atomic = !iov_fault_in_pages_write(iov, chars);
261redo:
262 addr = ops->map(pipe, buf, atomic);
263 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic);
264 ops->unmap(pipe, buf, addr);
190 if (unlikely(error)) { 265 if (unlikely(error)) {
266 /*
267 * Just retry with the slow path if we failed.
268 */
269 if (atomic) {
270 atomic = 0;
271 goto redo;
272 }
191 if (!ret) 273 if (!ret)
192 ret = -EFAULT; 274 ret = error;
193 break; 275 break;
194 } 276 }
195 ret += chars; 277 ret += chars;
@@ -293,21 +375,28 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
293 int offset = buf->offset + buf->len; 375 int offset = buf->offset + buf->len;
294 376
295 if (ops->can_merge && offset + chars <= PAGE_SIZE) { 377 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
378 int error, atomic = 1;
296 void *addr; 379 void *addr;
297 int error;
298 380
299 addr = ops->map(filp, pipe, buf); 381 error = ops->pin(pipe, buf);
300 if (IS_ERR(addr)) { 382 if (error)
301 error = PTR_ERR(addr);
302 goto out; 383 goto out;
303 } 384
385 iov_fault_in_pages_read(iov, chars);
386redo1:
387 addr = ops->map(pipe, buf, atomic);
304 error = pipe_iov_copy_from_user(offset + addr, iov, 388 error = pipe_iov_copy_from_user(offset + addr, iov,
305 chars); 389 chars, atomic);
306 ops->unmap(pipe, buf); 390 ops->unmap(pipe, buf, addr);
307 ret = error; 391 ret = error;
308 do_wakeup = 1; 392 do_wakeup = 1;
309 if (error) 393 if (error) {
394 if (atomic) {
395 atomic = 0;
396 goto redo1;
397 }
310 goto out; 398 goto out;
399 }
311 buf->len += chars; 400 buf->len += chars;
312 total_len -= chars; 401 total_len -= chars;
313 ret = chars; 402 ret = chars;
@@ -330,7 +419,8 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
330 int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); 419 int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1);
331 struct pipe_buffer *buf = pipe->bufs + newbuf; 420 struct pipe_buffer *buf = pipe->bufs + newbuf;
332 struct page *page = pipe->tmp_page; 421 struct page *page = pipe->tmp_page;
333 int error; 422 char *src;
423 int error, atomic = 1;
334 424
335 if (!page) { 425 if (!page) {
336 page = alloc_page(GFP_HIGHUSER); 426 page = alloc_page(GFP_HIGHUSER);
@@ -350,11 +440,27 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
350 if (chars > total_len) 440 if (chars > total_len)
351 chars = total_len; 441 chars = total_len;
352 442
353 error = pipe_iov_copy_from_user(kmap(page), iov, chars); 443 iov_fault_in_pages_read(iov, chars);
354 kunmap(page); 444redo2:
445 if (atomic)
446 src = kmap_atomic(page, KM_USER0);
447 else
448 src = kmap(page);
449
450 error = pipe_iov_copy_from_user(src, iov, chars,
451 atomic);
452 if (atomic)
453 kunmap_atomic(src, KM_USER0);
454 else
455 kunmap(page);
456
355 if (unlikely(error)) { 457 if (unlikely(error)) {
458 if (atomic) {
459 atomic = 0;
460 goto redo2;
461 }
356 if (!ret) 462 if (!ret)
357 ret = -EFAULT; 463 ret = error;
358 break; 464 break;
359 } 465 }
360 ret += chars; 466 ret += chars;
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 58c418fbca2c..97ae1b92bc47 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -408,8 +408,9 @@ int reiserfs_cache_default_acl(struct inode *inode)
408 acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT); 408 acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT);
409 reiserfs_read_unlock_xattrs(inode->i_sb); 409 reiserfs_read_unlock_xattrs(inode->i_sb);
410 reiserfs_read_unlock_xattr_i(inode); 410 reiserfs_read_unlock_xattr_i(inode);
411 ret = acl ? 1 : 0; 411 ret = (acl && !IS_ERR(acl));
412 posix_acl_release(acl); 412 if (ret)
413 posix_acl_release(acl);
413 } 414 }
414 415
415 return ret; 416 return ret;
diff --git a/fs/splice.c b/fs/splice.c
index 22fac87e90b3..a285fd746dc0 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -27,15 +27,22 @@
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/syscalls.h> 29#include <linux/syscalls.h>
30#include <linux/uio.h>
31
32struct partial_page {
33 unsigned int offset;
34 unsigned int len;
35};
30 36
31/* 37/*
32 * Passed to the actors 38 * Passed to splice_to_pipe
33 */ 39 */
34struct splice_desc { 40struct splice_pipe_desc {
35 unsigned int len, total_len; /* current and remaining length */ 41 struct page **pages; /* page map */
42 struct partial_page *partial; /* pages[] may not be contig */
43 int nr_pages; /* number of pages in map */
36 unsigned int flags; /* splice flags */ 44 unsigned int flags; /* splice flags */
37 struct file *file; /* file to read/write */ 45 struct pipe_buf_operations *ops;/* ops associated with output pipe */
38 loff_t pos; /* file position */
39}; 46};
40 47
41/* 48/*
@@ -44,7 +51,7 @@ struct splice_desc {
44 * addition of remove_mapping(). If success is returned, the caller may 51 * addition of remove_mapping(). If success is returned, the caller may
45 * attempt to reuse this page for another destination. 52 * attempt to reuse this page for another destination.
46 */ 53 */
47static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, 54static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
48 struct pipe_buffer *buf) 55 struct pipe_buffer *buf)
49{ 56{
50 struct page *page = buf->page; 57 struct page *page = buf->page;
@@ -71,21 +78,19 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info,
71 return 1; 78 return 1;
72 } 79 }
73 80
74 buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; 81 buf->flags |= PIPE_BUF_FLAG_LRU;
75 return 0; 82 return 0;
76} 83}
77 84
78static void page_cache_pipe_buf_release(struct pipe_inode_info *info, 85static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
79 struct pipe_buffer *buf) 86 struct pipe_buffer *buf)
80{ 87{
81 page_cache_release(buf->page); 88 page_cache_release(buf->page);
82 buf->page = NULL; 89 buf->flags &= ~PIPE_BUF_FLAG_LRU;
83 buf->flags &= ~(PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU);
84} 90}
85 91
86static void *page_cache_pipe_buf_map(struct file *file, 92static int page_cache_pipe_buf_pin(struct pipe_inode_info *pipe,
87 struct pipe_inode_info *info, 93 struct pipe_buffer *buf)
88 struct pipe_buffer *buf)
89{ 94{
90 struct page *page = buf->page; 95 struct page *page = buf->page;
91 int err; 96 int err;
@@ -111,51 +116,59 @@ static void *page_cache_pipe_buf_map(struct file *file,
111 } 116 }
112 117
113 /* 118 /*
114 * Page is ok afterall, fall through to mapping. 119 * Page is ok afterall, we are done.
115 */ 120 */
116 unlock_page(page); 121 unlock_page(page);
117 } 122 }
118 123
119 return kmap(page); 124 return 0;
120error: 125error:
121 unlock_page(page); 126 unlock_page(page);
122 return ERR_PTR(err); 127 return err;
123} 128}
124 129
125static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, 130static struct pipe_buf_operations page_cache_pipe_buf_ops = {
126 struct pipe_buffer *buf) 131 .can_merge = 0,
127{ 132 .map = generic_pipe_buf_map,
128 kunmap(buf->page); 133 .unmap = generic_pipe_buf_unmap,
129} 134 .pin = page_cache_pipe_buf_pin,
135 .release = page_cache_pipe_buf_release,
136 .steal = page_cache_pipe_buf_steal,
137 .get = generic_pipe_buf_get,
138};
130 139
131static void page_cache_pipe_buf_get(struct pipe_inode_info *info, 140static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
132 struct pipe_buffer *buf) 141 struct pipe_buffer *buf)
133{ 142{
134 page_cache_get(buf->page); 143 if (!(buf->flags & PIPE_BUF_FLAG_GIFT))
144 return 1;
145
146 buf->flags |= PIPE_BUF_FLAG_LRU;
147 return generic_pipe_buf_steal(pipe, buf);
135} 148}
136 149
137static struct pipe_buf_operations page_cache_pipe_buf_ops = { 150static struct pipe_buf_operations user_page_pipe_buf_ops = {
138 .can_merge = 0, 151 .can_merge = 0,
139 .map = page_cache_pipe_buf_map, 152 .map = generic_pipe_buf_map,
140 .unmap = page_cache_pipe_buf_unmap, 153 .unmap = generic_pipe_buf_unmap,
154 .pin = generic_pipe_buf_pin,
141 .release = page_cache_pipe_buf_release, 155 .release = page_cache_pipe_buf_release,
142 .steal = page_cache_pipe_buf_steal, 156 .steal = user_page_pipe_buf_steal,
143 .get = page_cache_pipe_buf_get, 157 .get = generic_pipe_buf_get,
144}; 158};
145 159
146/* 160/*
147 * Pipe output worker. This sets up our pipe format with the page cache 161 * Pipe output worker. This sets up our pipe format with the page cache
148 * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). 162 * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
149 */ 163 */
150static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, 164static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
151 int nr_pages, unsigned long len, 165 struct splice_pipe_desc *spd)
152 unsigned int offset, unsigned int flags)
153{ 166{
154 int ret, do_wakeup, i; 167 int ret, do_wakeup, page_nr;
155 168
156 ret = 0; 169 ret = 0;
157 do_wakeup = 0; 170 do_wakeup = 0;
158 i = 0; 171 page_nr = 0;
159 172
160 if (pipe->inode) 173 if (pipe->inode)
161 mutex_lock(&pipe->inode->i_mutex); 174 mutex_lock(&pipe->inode->i_mutex);
@@ -171,27 +184,22 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
171 if (pipe->nrbufs < PIPE_BUFFERS) { 184 if (pipe->nrbufs < PIPE_BUFFERS) {
172 int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1); 185 int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1);
173 struct pipe_buffer *buf = pipe->bufs + newbuf; 186 struct pipe_buffer *buf = pipe->bufs + newbuf;
174 struct page *page = pages[i++];
175 unsigned long this_len;
176 187
177 this_len = PAGE_CACHE_SIZE - offset; 188 buf->page = spd->pages[page_nr];
178 if (this_len > len) 189 buf->offset = spd->partial[page_nr].offset;
179 this_len = len; 190 buf->len = spd->partial[page_nr].len;
191 buf->ops = spd->ops;
192 if (spd->flags & SPLICE_F_GIFT)
193 buf->flags |= PIPE_BUF_FLAG_GIFT;
180 194
181 buf->page = page;
182 buf->offset = offset;
183 buf->len = this_len;
184 buf->ops = &page_cache_pipe_buf_ops;
185 pipe->nrbufs++; 195 pipe->nrbufs++;
196 page_nr++;
197 ret += buf->len;
198
186 if (pipe->inode) 199 if (pipe->inode)
187 do_wakeup = 1; 200 do_wakeup = 1;
188 201
189 ret += this_len; 202 if (!--spd->nr_pages)
190 len -= this_len;
191 offset = 0;
192 if (!--nr_pages)
193 break;
194 if (!len)
195 break; 203 break;
196 if (pipe->nrbufs < PIPE_BUFFERS) 204 if (pipe->nrbufs < PIPE_BUFFERS)
197 continue; 205 continue;
@@ -199,7 +207,7 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
199 break; 207 break;
200 } 208 }
201 209
202 if (flags & SPLICE_F_NONBLOCK) { 210 if (spd->flags & SPLICE_F_NONBLOCK) {
203 if (!ret) 211 if (!ret)
204 ret = -EAGAIN; 212 ret = -EAGAIN;
205 break; 213 break;
@@ -234,8 +242,8 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
234 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 242 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
235 } 243 }
236 244
237 while (i < nr_pages) 245 while (page_nr < spd->nr_pages)
238 page_cache_release(pages[i++]); 246 page_cache_release(spd->pages[page_nr++]);
239 247
240 return ret; 248 return ret;
241} 249}
@@ -246,17 +254,24 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
246 unsigned int flags) 254 unsigned int flags)
247{ 255{
248 struct address_space *mapping = in->f_mapping; 256 struct address_space *mapping = in->f_mapping;
249 unsigned int loff, offset, nr_pages; 257 unsigned int loff, nr_pages;
250 struct page *pages[PIPE_BUFFERS]; 258 struct page *pages[PIPE_BUFFERS];
259 struct partial_page partial[PIPE_BUFFERS];
251 struct page *page; 260 struct page *page;
252 pgoff_t index, end_index; 261 pgoff_t index, end_index;
253 loff_t isize; 262 loff_t isize;
254 size_t bytes; 263 size_t total_len;
255 int i, error; 264 int error, page_nr;
265 struct splice_pipe_desc spd = {
266 .pages = pages,
267 .partial = partial,
268 .flags = flags,
269 .ops = &page_cache_pipe_buf_ops,
270 };
256 271
257 index = *ppos >> PAGE_CACHE_SHIFT; 272 index = *ppos >> PAGE_CACHE_SHIFT;
258 loff = offset = *ppos & ~PAGE_CACHE_MASK; 273 loff = *ppos & ~PAGE_CACHE_MASK;
259 nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 274 nr_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
260 275
261 if (nr_pages > PIPE_BUFFERS) 276 if (nr_pages > PIPE_BUFFERS)
262 nr_pages = PIPE_BUFFERS; 277 nr_pages = PIPE_BUFFERS;
@@ -266,38 +281,83 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
266 * read-ahead if this is a non-zero offset (we are likely doing small 281 * read-ahead if this is a non-zero offset (we are likely doing small
267 * chunk splice and the page is already there) for a single page. 282 * chunk splice and the page is already there) for a single page.
268 */ 283 */
269 if (!offset || nr_pages > 1) 284 if (!loff || nr_pages > 1)
270 do_page_cache_readahead(mapping, in, index, nr_pages); 285 page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages);
271 286
272 /* 287 /*
273 * Now fill in the holes: 288 * Now fill in the holes:
274 */ 289 */
275 error = 0; 290 error = 0;
276 bytes = 0; 291 total_len = 0;
277 for (i = 0; i < nr_pages; i++, index++) { 292
278find_page: 293 /*
294 * Lookup the (hopefully) full range of pages we need.
295 */
296 spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages);
297
298 /*
299 * If find_get_pages_contig() returned fewer pages than we needed,
300 * allocate the rest.
301 */
302 index += spd.nr_pages;
303 while (spd.nr_pages < nr_pages) {
279 /* 304 /*
280 * lookup the page for this index 305 * Page could be there, find_get_pages_contig() breaks on
306 * the first hole.
281 */ 307 */
282 page = find_get_page(mapping, index); 308 page = find_get_page(mapping, index);
283 if (!page) { 309 if (!page) {
284 /* 310 /*
285 * page didn't exist, allocate one 311 * Make sure the read-ahead engine is notified
312 * about this failure.
313 */
314 handle_ra_miss(mapping, &in->f_ra, index);
315
316 /*
317 * page didn't exist, allocate one.
286 */ 318 */
287 page = page_cache_alloc_cold(mapping); 319 page = page_cache_alloc_cold(mapping);
288 if (!page) 320 if (!page)
289 break; 321 break;
290 322
291 error = add_to_page_cache_lru(page, mapping, index, 323 error = add_to_page_cache_lru(page, mapping, index,
292 mapping_gfp_mask(mapping)); 324 mapping_gfp_mask(mapping));
293 if (unlikely(error)) { 325 if (unlikely(error)) {
294 page_cache_release(page); 326 page_cache_release(page);
327 if (error == -EEXIST)
328 continue;
295 break; 329 break;
296 } 330 }
297 331 /*
298 goto readpage; 332 * add_to_page_cache() locks the page, unlock it
333 * to avoid convoluting the logic below even more.
334 */
335 unlock_page(page);
299 } 336 }
300 337
338 pages[spd.nr_pages++] = page;
339 index++;
340 }
341
342 /*
343 * Now loop over the map and see if we need to start IO on any
344 * pages, fill in the partial map, etc.
345 */
346 index = *ppos >> PAGE_CACHE_SHIFT;
347 nr_pages = spd.nr_pages;
348 spd.nr_pages = 0;
349 for (page_nr = 0; page_nr < nr_pages; page_nr++) {
350 unsigned int this_len;
351
352 if (!len)
353 break;
354
355 /*
356 * this_len is the max we'll use from this page
357 */
358 this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
359 page = pages[page_nr];
360
301 /* 361 /*
302 * If the page isn't uptodate, we may need to start io on it 362 * If the page isn't uptodate, we may need to start io on it
303 */ 363 */
@@ -318,7 +378,6 @@ find_page:
318 */ 378 */
319 if (!page->mapping) { 379 if (!page->mapping) {
320 unlock_page(page); 380 unlock_page(page);
321 page_cache_release(page);
322 break; 381 break;
323 } 382 }
324 /* 383 /*
@@ -329,16 +388,20 @@ find_page:
329 goto fill_it; 388 goto fill_it;
330 } 389 }
331 390
332readpage:
333 /* 391 /*
334 * need to read in the page 392 * need to read in the page
335 */ 393 */
336 error = mapping->a_ops->readpage(in, page); 394 error = mapping->a_ops->readpage(in, page);
337
338 if (unlikely(error)) { 395 if (unlikely(error)) {
339 page_cache_release(page); 396 /*
397 * We really should re-lookup the page here,
398 * but it complicates things a lot. Instead
399 * lets just do what we already stored, and
400 * we'll get it the next time we are called.
401 */
340 if (error == AOP_TRUNCATED_PAGE) 402 if (error == AOP_TRUNCATED_PAGE)
341 goto find_page; 403 error = 0;
404
342 break; 405 break;
343 } 406 }
344 407
@@ -347,10 +410,8 @@ readpage:
347 */ 410 */
348 isize = i_size_read(mapping->host); 411 isize = i_size_read(mapping->host);
349 end_index = (isize - 1) >> PAGE_CACHE_SHIFT; 412 end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
350 if (unlikely(!isize || index > end_index)) { 413 if (unlikely(!isize || index > end_index))
351 page_cache_release(page);
352 break; 414 break;
353 }
354 415
355 /* 416 /*
356 * if this is the last page, see if we need to shrink 417 * if this is the last page, see if we need to shrink
@@ -358,24 +419,35 @@ readpage:
358 */ 419 */
359 if (end_index == index) { 420 if (end_index == index) {
360 loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK); 421 loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK);
361 if (bytes + loff > isize) { 422 if (total_len + loff > isize)
362 page_cache_release(page);
363 break; 423 break;
364 }
365 /* 424 /*
366 * force quit after adding this page 425 * force quit after adding this page
367 */ 426 */
368 nr_pages = i; 427 len = this_len;
428 this_len = min(this_len, loff);
429 loff = 0;
369 } 430 }
370 } 431 }
371fill_it: 432fill_it:
372 pages[i] = page; 433 partial[page_nr].offset = loff;
373 bytes += PAGE_CACHE_SIZE - loff; 434 partial[page_nr].len = this_len;
435 len -= this_len;
436 total_len += this_len;
374 loff = 0; 437 loff = 0;
438 spd.nr_pages++;
439 index++;
375 } 440 }
376 441
377 if (i) 442 /*
378 return move_to_pipe(pipe, pages, i, bytes, offset, flags); 443 * Release any pages at the end, if we quit early. 'i' is how far
444 * we got, 'nr_pages' is how many pages are in the map.
445 */
446 while (page_nr < nr_pages)
447 page_cache_release(pages[page_nr++]);
448
449 if (spd.nr_pages)
450 return splice_to_pipe(pipe, &spd);
379 451
380 return error; 452 return error;
381} 453}
@@ -428,38 +500,24 @@ EXPORT_SYMBOL(generic_file_splice_read);
428 500
429/* 501/*
430 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' 502 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
431 * using sendpage(). 503 * using sendpage(). Return the number of bytes sent.
432 */ 504 */
433static int pipe_to_sendpage(struct pipe_inode_info *info, 505static int pipe_to_sendpage(struct pipe_inode_info *pipe,
434 struct pipe_buffer *buf, struct splice_desc *sd) 506 struct pipe_buffer *buf, struct splice_desc *sd)
435{ 507{
436 struct file *file = sd->file; 508 struct file *file = sd->file;
437 loff_t pos = sd->pos; 509 loff_t pos = sd->pos;
438 unsigned int offset; 510 int ret, more;
439 ssize_t ret;
440 void *ptr;
441 int more;
442 511
443 /* 512 ret = buf->ops->pin(pipe, buf);
444 * Sub-optimal, but we are limited by the pipe ->map. We don't 513 if (!ret) {
445 * need a kmap'ed buffer here, we just want to make sure we 514 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
446 * have the page pinned if the pipe page originates from the
447 * page cache.
448 */
449 ptr = buf->ops->map(file, info, buf);
450 if (IS_ERR(ptr))
451 return PTR_ERR(ptr);
452
453 offset = pos & ~PAGE_CACHE_MASK;
454 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
455 515
456 ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,more); 516 ret = file->f_op->sendpage(file, buf->page, buf->offset,
457 517 sd->len, &pos, more);
458 buf->ops->unmap(info, buf); 518 }
459 if (ret == sd->len)
460 return 0;
461 519
462 return -EIO; 520 return ret;
463} 521}
464 522
465/* 523/*
@@ -482,43 +540,51 @@ static int pipe_to_sendpage(struct pipe_inode_info *info,
482 * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create 540 * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
483 * a new page in the output file page cache and fill/dirty that. 541 * a new page in the output file page cache and fill/dirty that.
484 */ 542 */
485static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, 543static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
486 struct splice_desc *sd) 544 struct splice_desc *sd)
487{ 545{
488 struct file *file = sd->file; 546 struct file *file = sd->file;
489 struct address_space *mapping = file->f_mapping; 547 struct address_space *mapping = file->f_mapping;
490 gfp_t gfp_mask = mapping_gfp_mask(mapping); 548 gfp_t gfp_mask = mapping_gfp_mask(mapping);
491 unsigned int offset; 549 unsigned int offset, this_len;
492 struct page *page; 550 struct page *page;
493 pgoff_t index; 551 pgoff_t index;
494 char *src;
495 int ret; 552 int ret;
496 553
497 /* 554 /*
498 * make sure the data in this buffer is uptodate 555 * make sure the data in this buffer is uptodate
499 */ 556 */
500 src = buf->ops->map(file, info, buf); 557 ret = buf->ops->pin(pipe, buf);
501 if (IS_ERR(src)) 558 if (unlikely(ret))
502 return PTR_ERR(src); 559 return ret;
503 560
504 index = sd->pos >> PAGE_CACHE_SHIFT; 561 index = sd->pos >> PAGE_CACHE_SHIFT;
505 offset = sd->pos & ~PAGE_CACHE_MASK; 562 offset = sd->pos & ~PAGE_CACHE_MASK;
506 563
564 this_len = sd->len;
565 if (this_len + offset > PAGE_CACHE_SIZE)
566 this_len = PAGE_CACHE_SIZE - offset;
567
507 /* 568 /*
508 * Reuse buf page, if SPLICE_F_MOVE is set. 569 * Reuse buf page, if SPLICE_F_MOVE is set and we are doing a full
570 * page.
509 */ 571 */
510 if (sd->flags & SPLICE_F_MOVE) { 572 if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) {
511 /* 573 /*
512 * If steal succeeds, buf->page is now pruned from the vm 574 * If steal succeeds, buf->page is now pruned from the
513 * side (LRU and page cache) and we can reuse it. The page 575 * pagecache and we can reuse it. The page will also be
514 * will also be looked on successful return. 576 * locked on successful return.
515 */ 577 */
516 if (buf->ops->steal(info, buf)) 578 if (buf->ops->steal(pipe, buf))
517 goto find_page; 579 goto find_page;
518 580
519 page = buf->page; 581 page = buf->page;
520 if (add_to_page_cache(page, mapping, index, gfp_mask)) 582 if (add_to_page_cache(page, mapping, index, gfp_mask)) {
583 unlock_page(page);
521 goto find_page; 584 goto find_page;
585 }
586
587 page_cache_get(page);
522 588
523 if (!(buf->flags & PIPE_BUF_FLAG_LRU)) 589 if (!(buf->flags & PIPE_BUF_FLAG_LRU))
524 lru_cache_add(page); 590 lru_cache_add(page);
@@ -547,7 +613,7 @@ find_page:
547 * the full page. 613 * the full page.
548 */ 614 */
549 if (!PageUptodate(page)) { 615 if (!PageUptodate(page)) {
550 if (sd->len < PAGE_CACHE_SIZE) { 616 if (this_len < PAGE_CACHE_SIZE) {
551 ret = mapping->a_ops->readpage(file, page); 617 ret = mapping->a_ops->readpage(file, page);
552 if (unlikely(ret)) 618 if (unlikely(ret))
553 goto out; 619 goto out;
@@ -571,51 +637,67 @@ find_page:
571 } 637 }
572 } 638 }
573 639
574 ret = mapping->a_ops->prepare_write(file, page, 0, sd->len); 640 ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len);
575 if (ret == AOP_TRUNCATED_PAGE) { 641 if (unlikely(ret)) {
642 loff_t isize = i_size_read(mapping->host);
643
644 if (ret != AOP_TRUNCATED_PAGE)
645 unlock_page(page);
576 page_cache_release(page); 646 page_cache_release(page);
577 goto find_page; 647 if (ret == AOP_TRUNCATED_PAGE)
578 } else if (ret) 648 goto find_page;
649
650 /*
651 * prepare_write() may have instantiated a few blocks
652 * outside i_size. Trim these off again.
653 */
654 if (sd->pos + this_len > isize)
655 vmtruncate(mapping->host, isize);
656
579 goto out; 657 goto out;
658 }
580 659
581 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 660 if (buf->page != page) {
582 char *dst = kmap_atomic(page, KM_USER0); 661 /*
662 * Careful, ->map() uses KM_USER0!
663 */
664 char *src = buf->ops->map(pipe, buf, 1);
665 char *dst = kmap_atomic(page, KM_USER1);
583 666
584 memcpy(dst + offset, src + buf->offset, sd->len); 667 memcpy(dst + offset, src + buf->offset, this_len);
585 flush_dcache_page(page); 668 flush_dcache_page(page);
586 kunmap_atomic(dst, KM_USER0); 669 kunmap_atomic(dst, KM_USER1);
670 buf->ops->unmap(pipe, buf, src);
587 } 671 }
588 672
589 ret = mapping->a_ops->commit_write(file, page, 0, sd->len); 673 ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);
590 if (ret == AOP_TRUNCATED_PAGE) { 674 if (!ret) {
675 /*
676 * Return the number of bytes written and mark page as
677 * accessed, we are now done!
678 */
679 ret = this_len;
680 mark_page_accessed(page);
681 balance_dirty_pages_ratelimited(mapping);
682 } else if (ret == AOP_TRUNCATED_PAGE) {
591 page_cache_release(page); 683 page_cache_release(page);
592 goto find_page; 684 goto find_page;
593 } else if (ret) 685 }
594 goto out;
595
596 mark_page_accessed(page);
597 balance_dirty_pages_ratelimited(mapping);
598out: 686out:
599 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) 687 page_cache_release(page);
600 page_cache_release(page);
601
602 unlock_page(page); 688 unlock_page(page);
603out_nomem: 689out_nomem:
604 buf->ops->unmap(info, buf);
605 return ret; 690 return ret;
606} 691}
607 692
608typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
609 struct splice_desc *);
610
611/* 693/*
612 * Pipe input worker. Most of this logic works like a regular pipe, the 694 * Pipe input worker. Most of this logic works like a regular pipe, the
613 * key here is the 'actor' worker passed in that actually moves the data 695 * key here is the 'actor' worker passed in that actually moves the data
614 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. 696 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
615 */ 697 */
616static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out, 698ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
617 loff_t *ppos, size_t len, unsigned int flags, 699 loff_t *ppos, size_t len, unsigned int flags,
618 splice_actor *actor) 700 splice_actor *actor)
619{ 701{
620 int ret, do_wakeup, err; 702 int ret, do_wakeup, err;
621 struct splice_desc sd; 703 struct splice_desc sd;
@@ -641,16 +723,22 @@ static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
641 sd.len = sd.total_len; 723 sd.len = sd.total_len;
642 724
643 err = actor(pipe, buf, &sd); 725 err = actor(pipe, buf, &sd);
644 if (err) { 726 if (err <= 0) {
645 if (!ret && err != -ENODATA) 727 if (!ret && err != -ENODATA)
646 ret = err; 728 ret = err;
647 729
648 break; 730 break;
649 } 731 }
650 732
651 ret += sd.len; 733 ret += err;
652 buf->offset += sd.len; 734 buf->offset += err;
653 buf->len -= sd.len; 735 buf->len -= err;
736
737 sd.len -= err;
738 sd.pos += err;
739 sd.total_len -= err;
740 if (sd.len)
741 continue;
654 742
655 if (!buf->len) { 743 if (!buf->len) {
656 buf->ops = NULL; 744 buf->ops = NULL;
@@ -661,8 +749,6 @@ static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
661 do_wakeup = 1; 749 do_wakeup = 1;
662 } 750 }
663 751
664 sd.pos += sd.len;
665 sd.total_len -= sd.len;
666 if (!sd.total_len) 752 if (!sd.total_len)
667 break; 753 break;
668 } 754 }
@@ -730,7 +816,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
730 struct address_space *mapping = out->f_mapping; 816 struct address_space *mapping = out->f_mapping;
731 ssize_t ret; 817 ssize_t ret;
732 818
733 ret = move_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); 819 ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
734 if (ret > 0) { 820 if (ret > 0) {
735 struct inode *inode = mapping->host; 821 struct inode *inode = mapping->host;
736 822
@@ -772,7 +858,7 @@ EXPORT_SYMBOL(generic_file_splice_write);
772ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, 858ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
773 loff_t *ppos, size_t len, unsigned int flags) 859 loff_t *ppos, size_t len, unsigned int flags)
774{ 860{
775 return move_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage); 861 return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage);
776} 862}
777 863
778EXPORT_SYMBOL(generic_splice_sendpage); 864EXPORT_SYMBOL(generic_splice_sendpage);
@@ -859,7 +945,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
859 945
860 /* 946 /*
861 * We don't have an immediate reader, but we'll read the stuff 947 * We don't have an immediate reader, but we'll read the stuff
862 * out of the pipe right after the move_to_pipe(). So set 948 * out of the pipe right after the splice_to_pipe(). So set
863 * PIPE_READERS appropriately. 949 * PIPE_READERS appropriately.
864 */ 950 */
865 pipe->readers = 1; 951 pipe->readers = 1;
@@ -999,6 +1085,184 @@ static long do_splice(struct file *in, loff_t __user *off_in,
999 return -EINVAL; 1085 return -EINVAL;
1000} 1086}
1001 1087
1088/*
1089 * Map an iov into an array of pages and offset/length tupples. With the
1090 * partial_page structure, we can map several non-contiguous ranges into
1091 * our ones pages[] map instead of splitting that operation into pieces.
1092 * Could easily be exported as a generic helper for other users, in which
1093 * case one would probably want to add a 'max_nr_pages' parameter as well.
1094 */
1095static int get_iovec_page_array(const struct iovec __user *iov,
1096 unsigned int nr_vecs, struct page **pages,
1097 struct partial_page *partial, int aligned)
1098{
1099 int buffers = 0, error = 0;
1100
1101 /*
1102 * It's ok to take the mmap_sem for reading, even
1103 * across a "get_user()".
1104 */
1105 down_read(&current->mm->mmap_sem);
1106
1107 while (nr_vecs) {
1108 unsigned long off, npages;
1109 void __user *base;
1110 size_t len;
1111 int i;
1112
1113 /*
1114 * Get user address base and length for this iovec.
1115 */
1116 error = get_user(base, &iov->iov_base);
1117 if (unlikely(error))
1118 break;
1119 error = get_user(len, &iov->iov_len);
1120 if (unlikely(error))
1121 break;
1122
1123 /*
1124 * Sanity check this iovec. 0 read succeeds.
1125 */
1126 if (unlikely(!len))
1127 break;
1128 error = -EFAULT;
1129 if (unlikely(!base))
1130 break;
1131
1132 /*
1133 * Get this base offset and number of pages, then map
1134 * in the user pages.
1135 */
1136 off = (unsigned long) base & ~PAGE_MASK;
1137
1138 /*
1139 * If asked for alignment, the offset must be zero and the
1140 * length a multiple of the PAGE_SIZE.
1141 */
1142 error = -EINVAL;
1143 if (aligned && (off || len & ~PAGE_MASK))
1144 break;
1145
1146 npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1147 if (npages > PIPE_BUFFERS - buffers)
1148 npages = PIPE_BUFFERS - buffers;
1149
1150 error = get_user_pages(current, current->mm,
1151 (unsigned long) base, npages, 0, 0,
1152 &pages[buffers], NULL);
1153
1154 if (unlikely(error <= 0))
1155 break;
1156
1157 /*
1158 * Fill this contiguous range into the partial page map.
1159 */
1160 for (i = 0; i < error; i++) {
1161 const int plen = min_t(size_t, len, PAGE_SIZE - off);
1162
1163 partial[buffers].offset = off;
1164 partial[buffers].len = plen;
1165
1166 off = 0;
1167 len -= plen;
1168 buffers++;
1169 }
1170
1171 /*
1172 * We didn't complete this iov, stop here since it probably
1173 * means we have to move some of this into a pipe to
1174 * be able to continue.
1175 */
1176 if (len)
1177 break;
1178
1179 /*
1180 * Don't continue if we mapped fewer pages than we asked for,
1181 * or if we mapped the max number of pages that we have
1182 * room for.
1183 */
1184 if (error < npages || buffers == PIPE_BUFFERS)
1185 break;
1186
1187 nr_vecs--;
1188 iov++;
1189 }
1190
1191 up_read(&current->mm->mmap_sem);
1192
1193 if (buffers)
1194 return buffers;
1195
1196 return error;
1197}
1198
1199/*
1200 * vmsplice splices a user address range into a pipe. It can be thought of
1201 * as splice-from-memory, where the regular splice is splice-from-file (or
1202 * to file). In both cases the output is a pipe, naturally.
1203 *
1204 * Note that vmsplice only supports splicing _from_ user memory to a pipe,
1205 * not the other way around. Splicing from user memory is a simple operation
1206 * that can be supported without any funky alignment restrictions or nasty
1207 * vm tricks. We simply map in the user memory and fill them into a pipe.
1208 * The reverse isn't quite as easy, though. There are two possible solutions
1209 * for that:
1210 *
1211 * - memcpy() the data internally, at which point we might as well just
1212 * do a regular read() on the buffer anyway.
1213 * - Lots of nasty vm tricks, that are neither fast nor flexible (it
1214 * has restriction limitations on both ends of the pipe).
1215 *
1216 * Alas, it isn't here.
1217 *
1218 */
1219static long do_vmsplice(struct file *file, const struct iovec __user *iov,
1220 unsigned long nr_segs, unsigned int flags)
1221{
1222 struct pipe_inode_info *pipe = file->f_dentry->d_inode->i_pipe;
1223 struct page *pages[PIPE_BUFFERS];
1224 struct partial_page partial[PIPE_BUFFERS];
1225 struct splice_pipe_desc spd = {
1226 .pages = pages,
1227 .partial = partial,
1228 .flags = flags,
1229 .ops = &user_page_pipe_buf_ops,
1230 };
1231
1232 if (unlikely(!pipe))
1233 return -EBADF;
1234 if (unlikely(nr_segs > UIO_MAXIOV))
1235 return -EINVAL;
1236 else if (unlikely(!nr_segs))
1237 return 0;
1238
1239 spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
1240 flags & SPLICE_F_GIFT);
1241 if (spd.nr_pages <= 0)
1242 return spd.nr_pages;
1243
1244 return splice_to_pipe(pipe, &spd);
1245}
1246
1247asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
1248 unsigned long nr_segs, unsigned int flags)
1249{
1250 struct file *file;
1251 long error;
1252 int fput;
1253
1254 error = -EBADF;
1255 file = fget_light(fd, &fput);
1256 if (file) {
1257 if (file->f_mode & FMODE_WRITE)
1258 error = do_vmsplice(file, iov, nr_segs, flags);
1259
1260 fput_light(file, fput);
1261 }
1262
1263 return error;
1264}
1265
1002asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, 1266asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
1003 int fd_out, loff_t __user *off_out, 1267 int fd_out, loff_t __user *off_out,
1004 size_t len, unsigned int flags) 1268 size_t len, unsigned int flags)
@@ -1081,6 +1345,12 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1081 obuf = opipe->bufs + nbuf; 1345 obuf = opipe->bufs + nbuf;
1082 *obuf = *ibuf; 1346 *obuf = *ibuf;
1083 1347
1348 /*
1349 * Don't inherit the gift flag, we need to
1350 * prevent multiple steals of this page.
1351 */
1352 obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1353
1084 if (obuf->len > len) 1354 if (obuf->len > len)
1085 obuf->len = len; 1355 obuf->len = len;
1086 1356
diff --git a/fs/stat.c b/fs/stat.c
index 9948cc1685a4..0f282face322 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -261,7 +261,7 @@ asmlinkage long sys_newlstat(char __user *filename, struct stat __user *statbuf)
261 return error; 261 return error;
262} 262}
263 263
264#ifndef __ARCH_WANT_STAT64 264#if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT)
265asmlinkage long sys_newfstatat(int dfd, char __user *filename, 265asmlinkage long sys_newfstatat(int dfd, char __user *filename,
266 struct stat __user *statbuf, int flag) 266 struct stat __user *statbuf, int flag)
267{ 267{
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 64ee07db0d5e..8558226281c4 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1942,8 +1942,10 @@ xfs_alloc_fix_freelist(
1942 /* 1942 /*
1943 * Allocate as many blocks as possible at once. 1943 * Allocate as many blocks as possible at once.
1944 */ 1944 */
1945 if ((error = xfs_alloc_ag_vextent(&targs))) 1945 if ((error = xfs_alloc_ag_vextent(&targs))) {
1946 xfs_trans_brelse(tp, agflbp);
1946 return error; 1947 return error;
1948 }
1947 /* 1949 /*
1948 * Stop if we run out. Won't happen if callers are obeying 1950 * Stop if we run out. Won't happen if callers are obeying
1949 * the restrictions correctly. Can happen for free calls 1951 * the restrictions correctly. Can happen for free calls
@@ -1960,6 +1962,7 @@ xfs_alloc_fix_freelist(
1960 return error; 1962 return error;
1961 } 1963 }
1962 } 1964 }
1965 xfs_trans_brelse(tp, agflbp);
1963 args->agbp = agbp; 1966 args->agbp = agbp;
1964 return 0; 1967 return 0;
1965} 1968}
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 81a05cfd77d2..1f148762eb28 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -316,6 +316,18 @@ xfs_rename(
316 } 316 }
317 } 317 }
318 318
319 /*
320 * If we are using project inheritance, we only allow renames
321 * into our tree when the project IDs are the same; else the
322 * tree quota mechanism would be circumvented.
323 */
324 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
325 (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
326 error = XFS_ERROR(EXDEV);
327 xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
328 goto rele_return;
329 }
330
319 new_parent = (src_dp != target_dp); 331 new_parent = (src_dp != target_dp);
320 src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR); 332 src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
321 333
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index f0e09ca14139..36ea1b2094f2 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -669,31 +669,22 @@ xfs_mntupdate(
669 xfs_mount_t *mp = XFS_BHVTOM(bdp); 669 xfs_mount_t *mp = XFS_BHVTOM(bdp);
670 int error; 670 int error;
671 671
672 if (args->flags & XFSMNT_BARRIER) 672 if (!(*flags & MS_RDONLY)) { /* rw/ro -> rw */
673 mp->m_flags |= XFS_MOUNT_BARRIER; 673 if (vfsp->vfs_flag & VFS_RDONLY)
674 else 674 vfsp->vfs_flag &= ~VFS_RDONLY;
675 mp->m_flags &= ~XFS_MOUNT_BARRIER; 675 if (args->flags & XFSMNT_BARRIER) {
676 676 mp->m_flags |= XFS_MOUNT_BARRIER;
677 if ((vfsp->vfs_flag & VFS_RDONLY) &&
678 !(*flags & MS_RDONLY)) {
679 vfsp->vfs_flag &= ~VFS_RDONLY;
680
681 if (args->flags & XFSMNT_BARRIER)
682 xfs_mountfs_check_barriers(mp); 677 xfs_mountfs_check_barriers(mp);
683 } 678 } else {
684 679 mp->m_flags &= ~XFS_MOUNT_BARRIER;
685 if (!(vfsp->vfs_flag & VFS_RDONLY) && 680 }
686 (*flags & MS_RDONLY)) { 681 } else if (!(vfsp->vfs_flag & VFS_RDONLY)) { /* rw -> ro */
687 VFS_SYNC(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL, error); 682 VFS_SYNC(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL, error);
688
689 xfs_quiesce_fs(mp); 683 xfs_quiesce_fs(mp);
690
691 /* Ok now write out an unmount record */
692 xfs_log_unmount_write(mp); 684 xfs_log_unmount_write(mp);
693 xfs_unmountfs_writesb(mp); 685 xfs_unmountfs_writesb(mp);
694 vfsp->vfs_flag |= VFS_RDONLY; 686 vfsp->vfs_flag |= VFS_RDONLY;
695 } 687 }
696
697 return 0; 688 return 0;
698} 689}
699 690
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index fa71b305ba5c..7027ae68ee38 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -2663,7 +2663,7 @@ xfs_link(
2663 */ 2663 */
2664 if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 2664 if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
2665 (tdp->i_d.di_projid != sip->i_d.di_projid))) { 2665 (tdp->i_d.di_projid != sip->i_d.di_projid))) {
2666 error = XFS_ERROR(EPERM); 2666 error = XFS_ERROR(EXDEV);
2667 goto error_return; 2667 goto error_return;
2668 } 2668 }
2669 2669