aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJeff Garzik <jeff@garzik.org>2006-04-12 16:54:16 -0400
committerJeff Garzik <jeff@garzik.org>2006-04-12 16:54:16 -0400
commit875999c5539999f61a45620aae0c3e5fb1d2b035 (patch)
tree4535032a8a10f5782c0aef6a620b1a624ea9f863 /fs
parent79072f38909e3d9883317238887460c39ddcc4cb (diff)
parent26ec634c31a11a003040e10b4d650495158632fd (diff)
Merge branch 'upstream'
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_super.c13
-rw-r--r--fs/Kconfig3
-rw-r--r--fs/Makefile2
-rw-r--r--fs/char_dev.c87
-rw-r--r--fs/cifs/CHANGES18
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/README7
-rw-r--r--fs/cifs/cifsencrypt.c42
-rw-r--r--fs/cifs/cifsfs.c5
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h11
-rw-r--r--fs/cifs/cifspdu.h13
-rw-r--r--fs/cifs/cifsproto.h15
-rw-r--r--fs/cifs/cifssmb.c135
-rw-r--r--fs/cifs/connect.c99
-rw-r--r--fs/cifs/dir.c7
-rw-r--r--fs/cifs/file.c94
-rw-r--r--fs/cifs/inode.c22
-rw-r--r--fs/cifs/link.c2
-rw-r--r--fs/cifs/misc.c46
-rw-r--r--fs/cifs/ntlmssp.c129
-rw-r--r--fs/cifs/ntlmssp.h2
-rw-r--r--fs/cifs/readdir.c7
-rw-r--r--fs/cifs/transport.c22
-rw-r--r--fs/configfs/dir.c2
-rw-r--r--fs/dcache.c50
-rw-r--r--fs/direct-io.c3
-rw-r--r--fs/dquot.c6
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/exec.c21
-rw-r--r--fs/ext2/file.c2
-rw-r--r--fs/ext3/file.c2
-rw-r--r--fs/ext3/resize.c1
-rw-r--r--fs/fcntl.c3
-rw-r--r--fs/fifo.c65
-rw-r--r--fs/freevxfs/vxfs_olt.c9
-rw-r--r--fs/fuse/dev.c256
-rw-r--r--fs/fuse/dir.c118
-rw-r--r--fs/fuse/file.c56
-rw-r--r--fs/fuse/fuse_i.h61
-rw-r--r--fs/fuse/inode.c135
-rw-r--r--fs/hfsplus/bnode.c6
-rw-r--r--fs/hfsplus/btree.c3
-rw-r--r--fs/hppfs/hppfs_kern.c14
-rw-r--r--fs/inode.c15
-rw-r--r--fs/inotify.c2
-rw-r--r--fs/jffs2/background.c3
-rw-r--r--fs/locks.c45
-rw-r--r--fs/msdos/namei.c15
-rw-r--r--fs/namei.c3
-rw-r--r--fs/namespace.c7
-rw-r--r--fs/nfsd/auth.c46
-rw-r--r--fs/nfsd/export.c3
-rw-r--r--fs/nfsd/nfs3proc.c2
-rw-r--r--fs/nfsd/nfs4acl.c8
-rw-r--r--fs/nfsd/nfs4callback.c6
-rw-r--r--fs/nfsd/nfs4proc.c4
-rw-r--r--fs/nfsd/nfs4state.c150
-rw-r--r--fs/nfsd/nfs4xdr.c62
-rw-r--r--fs/nfsd/nfsproc.c2
-rw-r--r--fs/nfsd/vfs.c6
-rw-r--r--fs/ocfs2/cluster/heartbeat.c40
-rw-r--r--fs/ocfs2/dlm/userdlm.c74
-rw-r--r--fs/ocfs2/file.c19
-rw-r--r--fs/pipe.c333
-rw-r--r--fs/proc/base.c13
-rw-r--r--fs/proc/proc_misc.c163
-rw-r--r--fs/proc/vmcore.c4
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/select.c38
-rw-r--r--fs/smbfs/file.c6
-rw-r--r--fs/splice.c963
-rw-r--r--fs/sync.c164
-rw-r--r--fs/sysfs/dir.c2
-rw-r--r--fs/sysfs/file.c2
-rw-r--r--fs/sysfs/inode.c3
-rw-r--r--fs/sysv/dir.c6
-rw-r--r--fs/udf/inode.c6
-rw-r--r--fs/vfat/namei.c18
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c18
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c113
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c120
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h11
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h12
-rw-r--r--fs/xfs/quota/xfs_qm.c17
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c68
-rw-r--r--fs/xfs/xfs_bmap.c11
-rw-r--r--fs/xfs/xfs_bmap.h9
-rw-r--r--fs/xfs/xfs_clnt.h1
-rw-r--r--fs/xfs/xfs_error.h3
-rw-r--r--fs/xfs/xfs_ialloc.c15
-rw-r--r--fs/xfs/xfs_iget.c29
-rw-r--r--fs/xfs/xfs_inode.c27
-rw-r--r--fs/xfs/xfs_inode.h1
-rw-r--r--fs/xfs/xfs_mount.c73
-rw-r--r--fs/xfs/xfs_mount.h5
-rw-r--r--fs/xfs/xfs_quota.h5
-rw-r--r--fs/xfs/xfs_vfsops.c10
-rw-r--r--fs/xfs/xfs_vnodeops.c4
104 files changed, 2970 insertions, 1438 deletions
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index b0a0ae509c00..61c599b4a1e3 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -127,12 +127,13 @@ static struct super_block *v9fs_get_sb(struct file_system_type
127 127
128 if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { 128 if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
129 dprintk(DEBUG_ERROR, "problem initiating session\n"); 129 dprintk(DEBUG_ERROR, "problem initiating session\n");
130 kfree(v9ses); 130 sb = ERR_PTR(newfid);
131 return ERR_PTR(newfid); 131 goto out_free_session;
132 } 132 }
133 133
134 sb = sget(fs_type, NULL, v9fs_set_super, v9ses); 134 sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
135 135 if (IS_ERR(sb))
136 goto out_close_session;
136 v9fs_fill_super(sb, v9ses, flags); 137 v9fs_fill_super(sb, v9ses, flags);
137 138
138 inode = v9fs_get_inode(sb, S_IFDIR | mode); 139 inode = v9fs_get_inode(sb, S_IFDIR | mode);
@@ -185,6 +186,12 @@ static struct super_block *v9fs_get_sb(struct file_system_type
185 186
186 return sb; 187 return sb;
187 188
189out_close_session:
190 v9fs_session_close(v9ses);
191out_free_session:
192 kfree(v9ses);
193 return sb;
194
188put_back_sb: 195put_back_sb:
189 /* deactivate_super calls v9fs_kill_super which will frees the rest */ 196 /* deactivate_super calls v9fs_kill_super which will frees the rest */
190 up_write(&sb->s_umount); 197 up_write(&sb->s_umount);
diff --git a/fs/Kconfig b/fs/Kconfig
index e207be68d4ca..2524629dc835 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -799,6 +799,7 @@ config PROC_KCORE
799config PROC_VMCORE 799config PROC_VMCORE
800 bool "/proc/vmcore support (EXPERIMENTAL)" 800 bool "/proc/vmcore support (EXPERIMENTAL)"
801 depends on PROC_FS && EXPERIMENTAL && CRASH_DUMP 801 depends on PROC_FS && EXPERIMENTAL && CRASH_DUMP
802 default y
802 help 803 help
803 Exports the dump image of crashed kernel in ELF format. 804 Exports the dump image of crashed kernel in ELF format.
804 805
@@ -861,7 +862,7 @@ config RAMFS
861 862
862config CONFIGFS_FS 863config CONFIGFS_FS
863 tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)" 864 tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)"
864 depends on EXPERIMENTAL 865 depends on SYSFS && EXPERIMENTAL
865 help 866 help
866 configfs is a ram-based filesystem that provides the converse 867 configfs is a ram-based filesystem that provides the converse
867 of sysfs's functionality. Where sysfs is a filesystem-based 868 of sysfs's functionality. Where sysfs is a filesystem-based
diff --git a/fs/Makefile b/fs/Makefile
index 080b3867be4d..83bf478e786b 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -10,7 +10,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \
10 ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ 10 ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
11 attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ 11 attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
12 seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ 12 seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
13 ioprio.o pnode.o drop_caches.o 13 ioprio.o pnode.o drop_caches.o splice.o sync.o
14 14
15obj-$(CONFIG_INOTIFY) += inotify.o 15obj-$(CONFIG_INOTIFY) += inotify.o
16obj-$(CONFIG_EPOLL) += eventpoll.o 16obj-$(CONFIG_EPOLL) += eventpoll.o
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 4e1b849f912f..f3418f7a6e9d 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -15,6 +15,7 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/smp_lock.h> 16#include <linux/smp_lock.h>
17#include <linux/devfs_fs_kernel.h> 17#include <linux/devfs_fs_kernel.h>
18#include <linux/seq_file.h>
18 19
19#include <linux/kobject.h> 20#include <linux/kobject.h>
20#include <linux/kobj_map.h> 21#include <linux/kobj_map.h>
@@ -27,8 +28,6 @@
27 28
28static struct kobj_map *cdev_map; 29static struct kobj_map *cdev_map;
29 30
30#define MAX_PROBE_HASH 255 /* random */
31
32static DEFINE_MUTEX(chrdevs_lock); 31static DEFINE_MUTEX(chrdevs_lock);
33 32
34static struct char_device_struct { 33static struct char_device_struct {
@@ -39,93 +38,29 @@ static struct char_device_struct {
39 char name[64]; 38 char name[64];
40 struct file_operations *fops; 39 struct file_operations *fops;
41 struct cdev *cdev; /* will die */ 40 struct cdev *cdev; /* will die */
42} *chrdevs[MAX_PROBE_HASH]; 41} *chrdevs[CHRDEV_MAJOR_HASH_SIZE];
43 42
44/* index in the above */ 43/* index in the above */
45static inline int major_to_index(int major) 44static inline int major_to_index(int major)
46{ 45{
47 return major % MAX_PROBE_HASH; 46 return major % CHRDEV_MAJOR_HASH_SIZE;
48}
49
50struct chrdev_info {
51 int index;
52 struct char_device_struct *cd;
53};
54
55void *get_next_chrdev(void *dev)
56{
57 struct chrdev_info *info;
58
59 if (dev == NULL) {
60 info = kmalloc(sizeof(*info), GFP_KERNEL);
61 if (!info)
62 goto out;
63 info->index=0;
64 info->cd = chrdevs[info->index];
65 if (info->cd)
66 goto out;
67 } else {
68 info = dev;
69 }
70
71 while (info->index < ARRAY_SIZE(chrdevs)) {
72 if (info->cd)
73 info->cd = info->cd->next;
74 if (info->cd)
75 goto out;
76 /*
77 * No devices on this chain, move to the next
78 */
79 info->index++;
80 info->cd = (info->index < ARRAY_SIZE(chrdevs)) ?
81 chrdevs[info->index] : NULL;
82 if (info->cd)
83 goto out;
84 }
85
86out:
87 return info;
88}
89
90void *acquire_chrdev_list(void)
91{
92 mutex_lock(&chrdevs_lock);
93 return get_next_chrdev(NULL);
94}
95
96void release_chrdev_list(void *dev)
97{
98 mutex_unlock(&chrdevs_lock);
99 kfree(dev);
100} 47}
101 48
49#ifdef CONFIG_PROC_FS
102 50
103int count_chrdev_list(void) 51void chrdev_show(struct seq_file *f, off_t offset)
104{ 52{
105 struct char_device_struct *cd; 53 struct char_device_struct *cd;
106 int i, count;
107
108 count = 0;
109 54
110 for (i = 0; i < ARRAY_SIZE(chrdevs) ; i++) { 55 if (offset < CHRDEV_MAJOR_HASH_SIZE) {
111 for (cd = chrdevs[i]; cd; cd = cd->next) 56 mutex_lock(&chrdevs_lock);
112 count++; 57 for (cd = chrdevs[offset]; cd; cd = cd->next)
58 seq_printf(f, "%3d %s\n", cd->major, cd->name);
59 mutex_unlock(&chrdevs_lock);
113 } 60 }
114
115 return count;
116} 61}
117 62
118int get_chrdev_info(void *dev, int *major, char **name) 63#endif /* CONFIG_PROC_FS */
119{
120 struct chrdev_info *info = dev;
121
122 if (info->cd == NULL)
123 return 1;
124
125 *major = info->cd->major;
126 *name = info->cd->name;
127 return 0;
128}
129 64
130/* 65/*
131 * Register a single major with a specified minor range. 66 * Register a single major with a specified minor range.
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index cb68efba35db..8a2de038882e 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,21 @@
1Version 1.42
2------------
3Fix slow oplock break when mounted to different servers at the same time and
4the tids match and we try to find matching fid on wrong server.
5
6Version 1.41
7------------
8Fix NTLMv2 security (can be enabled in /proc/fs/cifs) so customers can
9configure stronger authentication. Fix sfu symlinks so they can
10be followed (not just recognized). Fix wraparound of bcc on
11read responses when buffer size over 64K and also fix wrap of
12max smb buffer size when CIFSMaxBufSize over 64K. Fix oops in
13cifs_user_read and cifs_readpages (when EAGAIN on send of smb
14on socket is returned over and over). Add POSIX (advisory) byte range
15locking support (requires server with newest CIFS UNIX Extensions
16to the protocol implemented). Slow down negprot slightly in port 139
17RFC1001 case to give session_init time on buggy servers.
18
1Version 1.40 19Version 1.40
2------------ 20------------
3Use fsuid (fsgid) more consistently instead of uid (gid). Improve performance 21Use fsuid (fsgid) more consistently instead of uid (gid). Improve performance
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 7384947a0f93..58c77254a23b 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -3,4 +3,4 @@
3# 3#
4obj-$(CONFIG_CIFS) += cifs.o 4obj-$(CONFIG_CIFS) += cifs.o
5 5
6cifs-objs := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o readdir.o ioctl.o 6cifs-objs := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o readdir.o ioctl.o ntlmssp.o
diff --git a/fs/cifs/README b/fs/cifs/README
index b0070d1b149d..b2b4d0803761 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -422,6 +422,13 @@ A partial list of the supported mount options follows:
422 nomapchars Do not translate any of these seven characters (default). 422 nomapchars Do not translate any of these seven characters (default).
423 nocase Request case insensitive path name matching (case 423 nocase Request case insensitive path name matching (case
424 sensitive is the default if the server suports it). 424 sensitive is the default if the server suports it).
425 posixpaths If CIFS Unix extensions are supported, attempt to
426 negotiate posix path name support which allows certain
427 characters forbidden in typical CIFS filenames, without
428 requiring remapping. (default)
429 noposixpaths If CIFS Unix extensions are supported, do not request
430 posix path name support (this may cause servers to
431 reject creatingfile with certain reserved characters).
425 nobrl Do not send byte range lock requests to the server. 432 nobrl Do not send byte range lock requests to the server.
426 This is necessary for certain applications that break 433 This is necessary for certain applications that break
427 with cifs style mandatory byte range locks (and most 434 with cifs style mandatory byte range locks (and most
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index a2c24858d40f..e7d63737e651 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/cifsencrypt.c 2 * fs/cifs/cifsencrypt.c
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2005 4 * Copyright (C) International Business Machines Corp., 2005,2006
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * This library is free software; you can redistribute it and/or modify 7 * This library is free software; you can redistribute it and/or modify
@@ -36,7 +36,8 @@
36extern void mdfour(unsigned char *out, unsigned char *in, int n); 36extern void mdfour(unsigned char *out, unsigned char *in, int n);
37extern void E_md4hash(const unsigned char *passwd, unsigned char *p16); 37extern void E_md4hash(const unsigned char *passwd, unsigned char *p16);
38 38
39static int cifs_calculate_signature(const struct smb_hdr * cifs_pdu, const char * key, char * signature) 39static int cifs_calculate_signature(const struct smb_hdr * cifs_pdu,
40 const char * key, char * signature)
40{ 41{
41 struct MD5Context context; 42 struct MD5Context context;
42 43
@@ -56,9 +57,6 @@ int cifs_sign_smb(struct smb_hdr * cifs_pdu, struct TCP_Server_Info * server,
56 int rc = 0; 57 int rc = 0;
57 char smb_signature[20]; 58 char smb_signature[20];
58 59
59 /* BB remember to initialize sequence number elsewhere and initialize mac_signing key elsewhere BB */
60 /* BB remember to add code to save expected sequence number in midQ entry BB */
61
62 if((cifs_pdu == NULL) || (server == NULL)) 60 if((cifs_pdu == NULL) || (server == NULL))
63 return -EINVAL; 61 return -EINVAL;
64 62
@@ -85,20 +83,33 @@ int cifs_sign_smb(struct smb_hdr * cifs_pdu, struct TCP_Server_Info * server,
85static int cifs_calc_signature2(const struct kvec * iov, int n_vec, 83static int cifs_calc_signature2(const struct kvec * iov, int n_vec,
86 const char * key, char * signature) 84 const char * key, char * signature)
87{ 85{
88 struct MD5Context context; 86 struct MD5Context context;
89 87 int i;
90 if((iov == NULL) || (signature == NULL))
91 return -EINVAL;
92 88
93 MD5Init(&context); 89 if((iov == NULL) || (signature == NULL))
94 MD5Update(&context,key,CIFS_SESSION_KEY_SIZE+16); 90 return -EINVAL;
95 91
96/* MD5Update(&context,cifs_pdu->Protocol,cifs_pdu->smb_buf_length); */ /* BB FIXME BB */ 92 MD5Init(&context);
93 MD5Update(&context,key,CIFS_SESSION_KEY_SIZE+16);
94 for(i=0;i<n_vec;i++) {
95 if(iov[i].iov_base == NULL) {
96 cERROR(1,("null iovec entry"));
97 return -EIO;
98 } else if(iov[i].iov_len == 0)
99 break; /* bail out if we are sent nothing to sign */
100 /* The first entry includes a length field (which does not get
101 signed that occupies the first 4 bytes before the header */
102 if(i==0) {
103 if (iov[0].iov_len <= 8 ) /* cmd field at offset 9 */
104 break; /* nothing to sign or corrupt header */
105 MD5Update(&context,iov[0].iov_base+4, iov[0].iov_len-4);
106 } else
107 MD5Update(&context,iov[i].iov_base, iov[i].iov_len);
108 }
97 109
98 MD5Final(signature,&context); 110 MD5Final(signature,&context);
99 111
100 return -EOPNOTSUPP; 112 return 0;
101/* return 0; */
102} 113}
103 114
104 115
@@ -259,4 +270,5 @@ void CalcNTLMv2_response(const struct cifsSesInfo * ses,char * v2_session_respon
259/* hmac_md5_update(v2_session_response+16)client thing,8,&context); */ /* BB fix */ 270/* hmac_md5_update(v2_session_response+16)client thing,8,&context); */ /* BB fix */
260 271
261 hmac_md5_final(v2_session_response,&context); 272 hmac_md5_final(v2_session_response,&context);
273 cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); /* BB removeme BB */
262} 274}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 4bbc544857bc..d4b713e5affb 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -93,13 +93,10 @@ cifs_read_super(struct super_block *sb, void *data,
93 int rc = 0; 93 int rc = 0;
94 94
95 sb->s_flags |= MS_NODIRATIME; /* and probably even noatime */ 95 sb->s_flags |= MS_NODIRATIME; /* and probably even noatime */
96 sb->s_fs_info = kmalloc(sizeof(struct cifs_sb_info),GFP_KERNEL); 96 sb->s_fs_info = kzalloc(sizeof(struct cifs_sb_info),GFP_KERNEL);
97 cifs_sb = CIFS_SB(sb); 97 cifs_sb = CIFS_SB(sb);
98 if(cifs_sb == NULL) 98 if(cifs_sb == NULL)
99 return -ENOMEM; 99 return -ENOMEM;
100 else
101 memset(cifs_sb,0,sizeof(struct cifs_sb_info));
102
103 100
104 rc = cifs_mount(sb, cifs_sb, data, devname); 101 rc = cifs_mount(sb, cifs_sb, data, devname);
105 102
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 74f405ae4da3..4e829dc672a6 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -99,5 +99,5 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
99extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); 99extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
100extern int cifs_ioctl (struct inode * inode, struct file * filep, 100extern int cifs_ioctl (struct inode * inode, struct file * filep,
101 unsigned int command, unsigned long arg); 101 unsigned int command, unsigned long arg);
102#define CIFS_VERSION "1.40" 102#define CIFS_VERSION "1.42"
103#endif /* _CIFSFS_H */ 103#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 7bed27601ce5..006eb33bff5f 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/cifsglob.h 2 * fs/cifs/cifsglob.h
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2002,2005 4 * Copyright (C) International Business Machines Corp., 2002,2006
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * This library is free software; you can redistribute it and/or modify 7 * This library is free software; you can redistribute it and/or modify
@@ -430,6 +430,15 @@ struct dir_notify_req {
430#define CIFS_LARGE_BUFFER 2 430#define CIFS_LARGE_BUFFER 2
431#define CIFS_IOVEC 4 /* array of response buffers */ 431#define CIFS_IOVEC 4 /* array of response buffers */
432 432
433/* Type of session setup needed */
434#define CIFS_PLAINTEXT 0
435#define CIFS_LANMAN 1
436#define CIFS_NTLM 2
437#define CIFS_NTLMSSP_NEG 3
438#define CIFS_NTLMSSP_AUTH 4
439#define CIFS_SPNEGO_INIT 5
440#define CIFS_SPNEGO_TARG 6
441
433/* 442/*
434 ***************************************************************** 443 *****************************************************************
435 * All constants go here 444 * All constants go here
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index cc2471094ca5..b2233ac05bd2 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -859,7 +859,10 @@ typedef struct smb_com_lock_req {
859 LOCKING_ANDX_RANGE Locks[1]; 859 LOCKING_ANDX_RANGE Locks[1];
860} __attribute__((packed)) LOCK_REQ; 860} __attribute__((packed)) LOCK_REQ;
861 861
862 862/* lock type */
863#define CIFS_RDLCK 0
864#define CIFS_WRLCK 1
865#define CIFS_UNLCK 2
863typedef struct cifs_posix_lock { 866typedef struct cifs_posix_lock {
864 __le16 lock_type; /* 0 = Read, 1 = Write, 2 = Unlock */ 867 __le16 lock_type; /* 0 = Read, 1 = Write, 2 = Unlock */
865 __le16 lock_flags; /* 1 = Wait (only valid for setlock) */ 868 __le16 lock_flags; /* 1 = Wait (only valid for setlock) */
@@ -1786,7 +1789,13 @@ typedef struct {
1786#define CIFS_UNIX_POSIX_ACL_CAP 0x00000002 /* support getfacl/setfacl */ 1789#define CIFS_UNIX_POSIX_ACL_CAP 0x00000002 /* support getfacl/setfacl */
1787#define CIFS_UNIX_XATTR_CAP 0x00000004 /* support new namespace */ 1790#define CIFS_UNIX_XATTR_CAP 0x00000004 /* support new namespace */
1788#define CIFS_UNIX_EXTATTR_CAP 0x00000008 /* support chattr/chflag */ 1791#define CIFS_UNIX_EXTATTR_CAP 0x00000008 /* support chattr/chflag */
1789#define CIFS_UNIX_POSIX_PATHNAMES_CAP 0x00000010 /* Use POSIX pathnames on the wire. */ 1792#define CIFS_UNIX_POSIX_PATHNAMES_CAP 0x00000010 /* Allow POSIX path chars */
1793#ifdef CONFIG_CIFS_POSIX
1794#define CIFS_UNIX_CAP_MASK 0x0000001b
1795#else
1796#define CIFS_UNIX_CAP_MASK 0x00000013
1797#endif /* CONFIG_CIFS_POSIX */
1798
1790 1799
1791#define CIFS_POSIX_EXTENSIONS 0x00000010 /* support for new QFSInfo */ 1800#define CIFS_POSIX_EXTENSIONS 0x00000010 /* support for new QFSInfo */
1792 1801
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 7b25463d3c14..2879ba343ca7 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/cifsproto.h 2 * fs/cifs/cifsproto.h
3 * 3 *
4 * Copyright (c) International Business Machines Corp., 2002,2005 4 * Copyright (c) International Business Machines Corp., 2002,2006
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * This library is free software; you can redistribute it and/or modify 7 * This library is free software; you can redistribute it and/or modify
@@ -64,6 +64,14 @@ extern int map_smb_to_linux_error(struct smb_hdr *smb);
64extern void header_assemble(struct smb_hdr *, char /* command */ , 64extern void header_assemble(struct smb_hdr *, char /* command */ ,
65 const struct cifsTconInfo *, int /* length of 65 const struct cifsTconInfo *, int /* length of
66 fixed section (word count) in two byte units */); 66 fixed section (word count) in two byte units */);
67#ifdef CONFIG_CIFS_EXPERIMENTAL
68extern int small_smb_init_no_tc(const int smb_cmd, const int wct,
69 struct cifsSesInfo *ses,
70 void ** request_buf);
71extern int CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
72 const int stage, int * pNTLMv2_flg,
73 const struct nls_table *nls_cp);
74#endif
67extern __u16 GetNextMid(struct TCP_Server_Info *server); 75extern __u16 GetNextMid(struct TCP_Server_Info *server);
68extern struct oplock_q_entry * AllocOplockQEntry(struct inode *, u16, 76extern struct oplock_q_entry * AllocOplockQEntry(struct inode *, u16,
69 struct cifsTconInfo *); 77 struct cifsTconInfo *);
@@ -257,7 +265,10 @@ extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
257 const __u64 offset, const __u32 numUnlock, 265 const __u64 offset, const __u32 numUnlock,
258 const __u32 numLock, const __u8 lockType, 266 const __u32 numLock, const __u8 lockType,
259 const int waitFlag); 267 const int waitFlag);
260 268extern int CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
269 const __u16 smb_file_id, const int get_flag,
270 const __u64 len, const __u64 offset,
271 const __u16 lock_type, const int waitFlag);
261extern int CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon); 272extern int CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon);
262extern int CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses); 273extern int CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses);
263 274
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index a243fe2792d5..d705500aa283 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/cifssmb.c 2 * fs/cifs/cifssmb.c
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2002,2005 4 * Copyright (C) International Business Machines Corp., 2002,2006
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * Contains the routines for constructing the SMB PDUs themselves 7 * Contains the routines for constructing the SMB PDUs themselves
@@ -186,7 +186,35 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
186 cifs_stats_inc(&tcon->num_smbs_sent); 186 cifs_stats_inc(&tcon->num_smbs_sent);
187 187
188 return rc; 188 return rc;
189} 189}
190
191#ifdef CONFIG_CIFS_EXPERIMENTAL
192int
193small_smb_init_no_tc(const int smb_command, const int wct,
194 struct cifsSesInfo *ses, void **request_buf)
195{
196 int rc;
197 struct smb_hdr * buffer;
198
199 rc = small_smb_init(smb_command, wct, NULL, request_buf);
200 if(rc)
201 return rc;
202
203 buffer = (struct smb_hdr *)*request_buf;
204 buffer->Mid = GetNextMid(ses->server);
205 if (ses->capabilities & CAP_UNICODE)
206 buffer->Flags2 |= SMBFLG2_UNICODE;
207 if (ses->capabilities & CAP_STATUS32)
208 buffer->Flags2 |= SMBFLG2_ERR_STATUS;
209
210 /* uid, tid can stay at zero as set in header assemble */
211
212 /* BB add support for turning on the signing when
213 this function is used after 1st of session setup requests */
214
215 return rc;
216}
217#endif /* CONFIG_CIFS_EXPERIMENTAL */
190 218
191/* If the return code is zero, this function must fill in request_buf pointer */ 219/* If the return code is zero, this function must fill in request_buf pointer */
192static int 220static int
@@ -1042,7 +1070,7 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
1042 } 1070 }
1043 } 1071 }
1044 1072
1045 cifs_small_buf_release(pSMB); 1073/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
1046 if(*buf) { 1074 if(*buf) {
1047 if(resp_buf_type == CIFS_SMALL_BUFFER) 1075 if(resp_buf_type == CIFS_SMALL_BUFFER)
1048 cifs_small_buf_release(iov[0].iov_base); 1076 cifs_small_buf_release(iov[0].iov_base);
@@ -1246,7 +1274,7 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1246 *nbytes += le16_to_cpu(pSMBr->Count); 1274 *nbytes += le16_to_cpu(pSMBr->Count);
1247 } 1275 }
1248 1276
1249 cifs_small_buf_release(pSMB); 1277/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
1250 if(resp_buf_type == CIFS_SMALL_BUFFER) 1278 if(resp_buf_type == CIFS_SMALL_BUFFER)
1251 cifs_small_buf_release(iov[0].iov_base); 1279 cifs_small_buf_release(iov[0].iov_base);
1252 else if(resp_buf_type == CIFS_LARGE_BUFFER) 1280 else if(resp_buf_type == CIFS_LARGE_BUFFER)
@@ -1325,6 +1353,85 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1325} 1353}
1326 1354
1327int 1355int
1356CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1357 const __u16 smb_file_id, const int get_flag, const __u64 len,
1358 const __u64 lkoffset, const __u16 lock_type, const int waitFlag)
1359{
1360 struct smb_com_transaction2_sfi_req *pSMB = NULL;
1361 struct smb_com_transaction2_sfi_rsp *pSMBr = NULL;
1362 char *data_offset;
1363 struct cifs_posix_lock *parm_data;
1364 int rc = 0;
1365 int bytes_returned = 0;
1366 __u16 params, param_offset, offset, byte_count, count;
1367
1368 cFYI(1, ("Posix Lock"));
1369 rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB);
1370
1371 if (rc)
1372 return rc;
1373
1374 pSMBr = (struct smb_com_transaction2_sfi_rsp *)pSMB;
1375
1376 params = 6;
1377 pSMB->MaxSetupCount = 0;
1378 pSMB->Reserved = 0;
1379 pSMB->Flags = 0;
1380 pSMB->Timeout = 0;
1381 pSMB->Reserved2 = 0;
1382 param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4;
1383 offset = param_offset + params;
1384
1385 data_offset = (char *) (&pSMB->hdr.Protocol) + offset;
1386
1387 count = sizeof(struct cifs_posix_lock);
1388 pSMB->MaxParameterCount = cpu_to_le16(2);
1389 pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB PDU from sess */
1390 pSMB->SetupCount = 1;
1391 pSMB->Reserved3 = 0;
1392 if(get_flag)
1393 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FILE_INFORMATION);
1394 else
1395 pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FILE_INFORMATION);
1396 byte_count = 3 /* pad */ + params + count;
1397 pSMB->DataCount = cpu_to_le16(count);
1398 pSMB->ParameterCount = cpu_to_le16(params);
1399 pSMB->TotalDataCount = pSMB->DataCount;
1400 pSMB->TotalParameterCount = pSMB->ParameterCount;
1401 pSMB->ParameterOffset = cpu_to_le16(param_offset);
1402 parm_data = (struct cifs_posix_lock *)
1403 (((char *) &pSMB->hdr.Protocol) + offset);
1404
1405 parm_data->lock_type = cpu_to_le16(lock_type);
1406 if(waitFlag)
1407 parm_data->lock_flags = 1;
1408 parm_data->pid = cpu_to_le32(current->tgid);
1409 parm_data->start = lkoffset;
1410 parm_data->length = len; /* normalize negative numbers */
1411
1412 pSMB->DataOffset = cpu_to_le16(offset);
1413 pSMB->Fid = smb_file_id;
1414 pSMB->InformationLevel = cpu_to_le16(SMB_SET_POSIX_LOCK);
1415 pSMB->Reserved4 = 0;
1416 pSMB->hdr.smb_buf_length += byte_count;
1417 pSMB->ByteCount = cpu_to_le16(byte_count);
1418 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
1419 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
1420 if (rc) {
1421 cFYI(1, ("Send error in Posix Lock = %d", rc));
1422 }
1423
1424 if (pSMB)
1425 cifs_small_buf_release(pSMB);
1426
1427 /* Note: On -EAGAIN error only caller can retry on handle based calls
1428 since file handle passed in no longer valid */
1429
1430 return rc;
1431}
1432
1433
1434int
1328CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, int smb_file_id) 1435CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, int smb_file_id)
1329{ 1436{
1330 int rc = 0; 1437 int rc = 0;
@@ -2578,7 +2685,7 @@ qsec_out:
2578 cifs_small_buf_release(iov[0].iov_base); 2685 cifs_small_buf_release(iov[0].iov_base);
2579 else if(buf_type == CIFS_LARGE_BUFFER) 2686 else if(buf_type == CIFS_LARGE_BUFFER)
2580 cifs_buf_release(iov[0].iov_base); 2687 cifs_buf_release(iov[0].iov_base);
2581 cifs_small_buf_release(pSMB); 2688/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
2582 return rc; 2689 return rc;
2583} 2690}
2584 2691
@@ -2954,7 +3061,8 @@ findFirstRetry:
2954 pSMB->TotalParameterCount = cpu_to_le16(params); 3061 pSMB->TotalParameterCount = cpu_to_le16(params);
2955 pSMB->ParameterCount = pSMB->TotalParameterCount; 3062 pSMB->ParameterCount = pSMB->TotalParameterCount;
2956 pSMB->ParameterOffset = cpu_to_le16( 3063 pSMB->ParameterOffset = cpu_to_le16(
2957 offsetof(struct smb_com_transaction2_ffirst_req, SearchAttributes) - 4); 3064 offsetof(struct smb_com_transaction2_ffirst_req, SearchAttributes)
3065 - 4);
2958 pSMB->DataCount = 0; 3066 pSMB->DataCount = 0;
2959 pSMB->DataOffset = 0; 3067 pSMB->DataOffset = 0;
2960 pSMB->SetupCount = 1; /* one byte, no need to make endian neutral */ 3068 pSMB->SetupCount = 1; /* one byte, no need to make endian neutral */
@@ -2977,12 +3085,12 @@ findFirstRetry:
2977 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3085 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2978 cifs_stats_inc(&tcon->num_ffirst); 3086 cifs_stats_inc(&tcon->num_ffirst);
2979 3087
2980 if (rc) {/* BB add logic to retry regular search if Unix search rejected unexpectedly by server */ 3088 if (rc) {/* BB add logic to retry regular search if Unix search
3089 rejected unexpectedly by server */
2981 /* BB Add code to handle unsupported level rc */ 3090 /* BB Add code to handle unsupported level rc */
2982 cFYI(1, ("Error in FindFirst = %d", rc)); 3091 cFYI(1, ("Error in FindFirst = %d", rc));
2983 3092
2984 if (pSMB) 3093 cifs_buf_release(pSMB);
2985 cifs_buf_release(pSMB);
2986 3094
2987 /* BB eventually could optimize out free and realloc of buf */ 3095 /* BB eventually could optimize out free and realloc of buf */
2988 /* for this case */ 3096 /* for this case */
@@ -2998,6 +3106,7 @@ findFirstRetry:
2998 psrch_inf->unicode = FALSE; 3106 psrch_inf->unicode = FALSE;
2999 3107
3000 psrch_inf->ntwrk_buf_start = (char *)pSMBr; 3108 psrch_inf->ntwrk_buf_start = (char *)pSMBr;
3109 psrch_inf->smallBuf = 0;
3001 psrch_inf->srch_entries_start = 3110 psrch_inf->srch_entries_start =
3002 (char *) &pSMBr->hdr.Protocol + 3111 (char *) &pSMBr->hdr.Protocol +
3003 le16_to_cpu(pSMBr->t2.DataOffset); 3112 le16_to_cpu(pSMBr->t2.DataOffset);
@@ -3118,9 +3227,14 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
3118 parms = (T2_FNEXT_RSP_PARMS *)response_data; 3227 parms = (T2_FNEXT_RSP_PARMS *)response_data;
3119 response_data = (char *)&pSMBr->hdr.Protocol + 3228 response_data = (char *)&pSMBr->hdr.Protocol +
3120 le16_to_cpu(pSMBr->t2.DataOffset); 3229 le16_to_cpu(pSMBr->t2.DataOffset);
3121 cifs_buf_release(psrch_inf->ntwrk_buf_start); 3230 if(psrch_inf->smallBuf)
3231 cifs_small_buf_release(
3232 psrch_inf->ntwrk_buf_start);
3233 else
3234 cifs_buf_release(psrch_inf->ntwrk_buf_start);
3122 psrch_inf->srch_entries_start = response_data; 3235 psrch_inf->srch_entries_start = response_data;
3123 psrch_inf->ntwrk_buf_start = (char *)pSMB; 3236 psrch_inf->ntwrk_buf_start = (char *)pSMB;
3237 psrch_inf->smallBuf = 0;
3124 if(parms->EndofSearch) 3238 if(parms->EndofSearch)
3125 psrch_inf->endOfSearch = TRUE; 3239 psrch_inf->endOfSearch = TRUE;
3126 else 3240 else
@@ -3834,6 +3948,7 @@ CIFSSMBSetFSUnixInfo(const int xid, struct cifsTconInfo *tcon, __u64 cap)
3834 3948
3835 cFYI(1, ("In SETFSUnixInfo")); 3949 cFYI(1, ("In SETFSUnixInfo"));
3836SETFSUnixRetry: 3950SETFSUnixRetry:
3951 /* BB switch to small buf init to save memory */
3837 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 3952 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
3838 (void **) &pSMBr); 3953 (void **) &pSMBr);
3839 if (rc) 3954 if (rc)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 2a0c1f4ca0ae..0b86d5ca9014 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/connect.c 2 * fs/cifs/connect.c
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2002,2005 4 * Copyright (C) International Business Machines Corp., 2002,2006
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * This library is free software; you can redistribute it and/or modify 7 * This library is free software; you can redistribute it and/or modify
@@ -564,7 +564,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
564 564
565 565
566 dump_smb(smb_buffer, length); 566 dump_smb(smb_buffer, length);
567 if (checkSMB (smb_buffer, smb_buffer->Mid, total_read+4)) { 567 if (checkSMB(smb_buffer, smb_buffer->Mid, total_read+4)) {
568 cifs_dump_mem("Bad SMB: ", smb_buffer, 48); 568 cifs_dump_mem("Bad SMB: ", smb_buffer, 48);
569 continue; 569 continue;
570 } 570 }
@@ -1476,6 +1476,14 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1476 rc = smb_send(*csocket, smb_buf, 0x44, 1476 rc = smb_send(*csocket, smb_buf, 0x44,
1477 (struct sockaddr *)psin_server); 1477 (struct sockaddr *)psin_server);
1478 kfree(ses_init_buf); 1478 kfree(ses_init_buf);
1479 msleep(1); /* RFC1001 layer in at least one server
1480 requires very short break before negprot
1481 presumably because not expecting negprot
1482 to follow so fast. This is a simple
1483 solution that works without
1484 complicating the code and causes no
1485 significant slowing down on mount
1486 for everyone else */
1479 } 1487 }
1480 /* else the negprot may still work without this 1488 /* else the negprot may still work without this
1481 even though malloc failed */ 1489 even though malloc failed */
@@ -1920,27 +1928,34 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1920 cifs_sb->tcon = tcon; 1928 cifs_sb->tcon = tcon;
1921 tcon->ses = pSesInfo; 1929 tcon->ses = pSesInfo;
1922 1930
1923 /* do not care if following two calls succeed - informational only */ 1931 /* do not care if following two calls succeed - informational */
1924 CIFSSMBQFSDeviceInfo(xid, tcon); 1932 CIFSSMBQFSDeviceInfo(xid, tcon);
1925 CIFSSMBQFSAttributeInfo(xid, tcon); 1933 CIFSSMBQFSAttributeInfo(xid, tcon);
1934
1926 if (tcon->ses->capabilities & CAP_UNIX) { 1935 if (tcon->ses->capabilities & CAP_UNIX) {
1927 if(!CIFSSMBQFSUnixInfo(xid, tcon)) { 1936 if(!CIFSSMBQFSUnixInfo(xid, tcon)) {
1928 if(!volume_info.no_psx_acl) { 1937 __u64 cap =
1929 if(CIFS_UNIX_POSIX_ACL_CAP & 1938 le64_to_cpu(tcon->fsUnixInfo.Capability);
1930 le64_to_cpu(tcon->fsUnixInfo.Capability)) 1939 cap &= CIFS_UNIX_CAP_MASK;
1931 cFYI(1,("server negotiated posix acl support")); 1940 if(volume_info.no_psx_acl)
1932 sb->s_flags |= MS_POSIXACL; 1941 cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
1942 else if(CIFS_UNIX_POSIX_ACL_CAP & cap) {
1943 cFYI(1,("negotiated posix acl support"));
1944 sb->s_flags |= MS_POSIXACL;
1933 } 1945 }
1934 1946
1935 /* Try and negotiate POSIX pathnames if we can. */ 1947 if(volume_info.posix_paths == 0)
1936 if (volume_info.posix_paths && (CIFS_UNIX_POSIX_PATHNAMES_CAP & 1948 cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP;
1937 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 1949 else if(cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) {
1938 if (!CIFSSMBSetFSUnixInfo(xid, tcon, CIFS_UNIX_POSIX_PATHNAMES_CAP)) { 1950 cFYI(1,("negotiate posix pathnames"));
1939 cFYI(1,("negotiated posix pathnames support")); 1951 cifs_sb->mnt_cifs_flags |=
1940 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS; 1952 CIFS_MOUNT_POSIX_PATHS;
1941 } else { 1953 }
1942 cFYI(1,("posix pathnames support requested but not supported")); 1954
1943 } 1955 cFYI(1,("Negotiate caps 0x%x",(int)cap));
1956
1957 if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) {
1958 cFYI(1,("setting capabilities failed"));
1944 } 1959 }
1945 } 1960 }
1946 } 1961 }
@@ -2278,6 +2293,8 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2278 smb_buffer->Mid = GetNextMid(ses->server); 2293 smb_buffer->Mid = GetNextMid(ses->server);
2279 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; 2294 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
2280 pSMB->req.AndXCommand = 0xFF; 2295 pSMB->req.AndXCommand = 0xFF;
2296 if(ses->server->maxBuf > 64*1024)
2297 ses->server->maxBuf = (64*1023);
2281 pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf); 2298 pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
2282 pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq); 2299 pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
2283 2300
@@ -2525,7 +2542,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2525 __u32 negotiate_flags, capabilities; 2542 __u32 negotiate_flags, capabilities;
2526 __u16 count; 2543 __u16 count;
2527 2544
2528 cFYI(1, ("In NTLMSSP sesssetup (negotiate) ")); 2545 cFYI(1, ("In NTLMSSP sesssetup (negotiate)"));
2529 if(ses == NULL) 2546 if(ses == NULL)
2530 return -EINVAL; 2547 return -EINVAL;
2531 domain = ses->domainName; 2548 domain = ses->domainName;
@@ -2575,7 +2592,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2575 SecurityBlob->MessageType = NtLmNegotiate; 2592 SecurityBlob->MessageType = NtLmNegotiate;
2576 negotiate_flags = 2593 negotiate_flags =
2577 NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_NEGOTIATE_OEM | 2594 NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_NEGOTIATE_OEM |
2578 NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_NTLM | 0x80000000 | 2595 NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_NTLM |
2596 NTLMSSP_NEGOTIATE_56 |
2579 /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN | */ NTLMSSP_NEGOTIATE_128; 2597 /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN | */ NTLMSSP_NEGOTIATE_128;
2580 if(sign_CIFS_PDUs) 2598 if(sign_CIFS_PDUs)
2581 negotiate_flags |= NTLMSSP_NEGOTIATE_SIGN; 2599 negotiate_flags |= NTLMSSP_NEGOTIATE_SIGN;
@@ -2588,26 +2606,11 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2588 SecurityBlob->WorkstationName.Length = 0; 2606 SecurityBlob->WorkstationName.Length = 0;
2589 SecurityBlob->WorkstationName.MaximumLength = 0; 2607 SecurityBlob->WorkstationName.MaximumLength = 0;
2590 2608
2591 if (domain == NULL) { 2609 /* Domain not sent on first Sesssetup in NTLMSSP, instead it is sent
2592 SecurityBlob->DomainName.Buffer = 0; 2610 along with username on auth request (ie the response to challenge) */
2593 SecurityBlob->DomainName.Length = 0; 2611 SecurityBlob->DomainName.Buffer = 0;
2594 SecurityBlob->DomainName.MaximumLength = 0; 2612 SecurityBlob->DomainName.Length = 0;
2595 } else { 2613 SecurityBlob->DomainName.MaximumLength = 0;
2596 __u16 len;
2597 negotiate_flags |= NTLMSSP_NEGOTIATE_DOMAIN_SUPPLIED;
2598 strncpy(bcc_ptr, domain, 63);
2599 len = strnlen(domain, 64);
2600 SecurityBlob->DomainName.MaximumLength =
2601 cpu_to_le16(len);
2602 SecurityBlob->DomainName.Buffer =
2603 cpu_to_le32((long) &SecurityBlob->
2604 DomainString -
2605 (long) &SecurityBlob->Signature);
2606 bcc_ptr += len;
2607 SecurityBlobLength += len;
2608 SecurityBlob->DomainName.Length =
2609 cpu_to_le16(len);
2610 }
2611 if (ses->capabilities & CAP_UNICODE) { 2614 if (ses->capabilities & CAP_UNICODE) {
2612 if ((long) bcc_ptr % 2) { 2615 if ((long) bcc_ptr % 2) {
2613 *bcc_ptr = 0; 2616 *bcc_ptr = 0;
@@ -2677,7 +2680,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2677 SecurityBlob2->MessageType)); 2680 SecurityBlob2->MessageType));
2678 } else if (ses) { 2681 } else if (ses) {
2679 ses->Suid = smb_buffer_response->Uid; /* UID left in le format */ 2682 ses->Suid = smb_buffer_response->Uid; /* UID left in le format */
2680 cFYI(1, ("UID = %d ", ses->Suid)); 2683 cFYI(1, ("UID = %d", ses->Suid));
2681 if ((pSMBr->resp.hdr.WordCount == 3) 2684 if ((pSMBr->resp.hdr.WordCount == 3)
2682 || ((pSMBr->resp.hdr.WordCount == 4) 2685 || ((pSMBr->resp.hdr.WordCount == 4)
2683 && (blob_len < 2686 && (blob_len <
@@ -2685,17 +2688,17 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2685 2688
2686 if (pSMBr->resp.hdr.WordCount == 4) { 2689 if (pSMBr->resp.hdr.WordCount == 4) {
2687 bcc_ptr += blob_len; 2690 bcc_ptr += blob_len;
2688 cFYI(1, 2691 cFYI(1, ("Security Blob Length %d",
2689 ("Security Blob Length %d ",
2690 blob_len)); 2692 blob_len));
2691 } 2693 }
2692 2694
2693 cFYI(1, ("NTLMSSP Challenge rcvd ")); 2695 cFYI(1, ("NTLMSSP Challenge rcvd"));
2694 2696
2695 memcpy(ses->server->cryptKey, 2697 memcpy(ses->server->cryptKey,
2696 SecurityBlob2->Challenge, 2698 SecurityBlob2->Challenge,
2697 CIFS_CRYPTO_KEY_SIZE); 2699 CIFS_CRYPTO_KEY_SIZE);
2698 if(SecurityBlob2->NegotiateFlags & cpu_to_le32(NTLMSSP_NEGOTIATE_NTLMV2)) 2700 if(SecurityBlob2->NegotiateFlags &
2701 cpu_to_le32(NTLMSSP_NEGOTIATE_NTLMV2))
2699 *pNTLMv2_flag = TRUE; 2702 *pNTLMv2_flag = TRUE;
2700 2703
2701 if((SecurityBlob2->NegotiateFlags & 2704 if((SecurityBlob2->NegotiateFlags &
@@ -2818,7 +2821,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2818 bcc_ptr++; 2821 bcc_ptr++;
2819 } else 2822 } else
2820 cFYI(1, 2823 cFYI(1,
2821 ("Variable field of length %d extends beyond end of smb ", 2824 ("Variable field of length %d extends beyond end of smb",
2822 len)); 2825 len));
2823 } 2826 }
2824 } else { 2827 } else {
@@ -2830,7 +2833,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2830 } 2833 }
2831 } else { 2834 } else {
2832 cERROR(1, 2835 cERROR(1,
2833 (" Invalid Word count %d: ", 2836 (" Invalid Word count %d:",
2834 smb_buffer_response->WordCount)); 2837 smb_buffer_response->WordCount));
2835 rc = -EIO; 2838 rc = -EIO;
2836 } 2839 }
@@ -3447,7 +3450,7 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3447 if (extended_security 3450 if (extended_security
3448 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) 3451 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
3449 && (pSesInfo->server->secType == NTLMSSP)) { 3452 && (pSesInfo->server->secType == NTLMSSP)) {
3450 cFYI(1, ("New style sesssetup ")); 3453 cFYI(1, ("New style sesssetup"));
3451 rc = CIFSSpnegoSessSetup(xid, pSesInfo, 3454 rc = CIFSSpnegoSessSetup(xid, pSesInfo,
3452 NULL /* security blob */, 3455 NULL /* security blob */,
3453 0 /* blob length */, 3456 0 /* blob length */,
@@ -3455,7 +3458,7 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3455 } else if (extended_security 3458 } else if (extended_security
3456 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) 3459 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
3457 && (pSesInfo->server->secType == RawNTLMSSP)) { 3460 && (pSesInfo->server->secType == RawNTLMSSP)) {
3458 cFYI(1, ("NTLMSSP sesssetup ")); 3461 cFYI(1, ("NTLMSSP sesssetup"));
3459 rc = CIFSNTLMSSPNegotiateSessSetup(xid, 3462 rc = CIFSNTLMSSPNegotiateSessSetup(xid,
3460 pSesInfo, 3463 pSesInfo,
3461 &ntlmv2_flag, 3464 &ntlmv2_flag,
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 632561dd9c50..1d0ca3eaaca5 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -48,13 +48,14 @@ build_path_from_dentry(struct dentry *direntry)
48 struct dentry *temp; 48 struct dentry *temp;
49 int namelen = 0; 49 int namelen = 0;
50 char *full_path; 50 char *full_path;
51 char dirsep = CIFS_DIR_SEP(CIFS_SB(direntry->d_sb)); 51 char dirsep;
52 52
53 if(direntry == NULL) 53 if(direntry == NULL)
54 return NULL; /* not much we can do if dentry is freed and 54 return NULL; /* not much we can do if dentry is freed and
55 we need to reopen the file after it was closed implicitly 55 we need to reopen the file after it was closed implicitly
56 when the server crashed */ 56 when the server crashed */
57 57
58 dirsep = CIFS_DIR_SEP(CIFS_SB(direntry->d_sb));
58cifs_bp_rename_retry: 59cifs_bp_rename_retry:
59 for (temp = direntry; !IS_ROOT(temp);) { 60 for (temp = direntry; !IS_ROOT(temp);) {
60 namelen += (1 + temp->d_name.len); 61 namelen += (1 + temp->d_name.len);
@@ -255,12 +256,10 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
255 CIFSSMBClose(xid, pTcon, fileHandle); 256 CIFSSMBClose(xid, pTcon, fileHandle);
256 } else if(newinode) { 257 } else if(newinode) {
257 pCifsFile = 258 pCifsFile =
258 kmalloc(sizeof (struct cifsFileInfo), GFP_KERNEL); 259 kzalloc(sizeof (struct cifsFileInfo), GFP_KERNEL);
259 260
260 if(pCifsFile == NULL) 261 if(pCifsFile == NULL)
261 goto cifs_create_out; 262 goto cifs_create_out;
262 memset((char *)pCifsFile, 0,
263 sizeof (struct cifsFileInfo));
264 pCifsFile->netfid = fileHandle; 263 pCifsFile->netfid = fileHandle;
265 pCifsFile->pid = current->tgid; 264 pCifsFile->pid = current->tgid;
266 pCifsFile->pInode = newinode; 265 pCifsFile->pInode = newinode;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index fb49aef1f2ec..5c497c529772 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -555,7 +555,10 @@ int cifs_closedir(struct inode *inode, struct file *file)
555 if (ptmp) { 555 if (ptmp) {
556 cFYI(1, ("closedir free smb buf in srch struct")); 556 cFYI(1, ("closedir free smb buf in srch struct"));
557 pCFileStruct->srch_inf.ntwrk_buf_start = NULL; 557 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
558 cifs_buf_release(ptmp); 558 if(pCFileStruct->srch_inf.smallBuf)
559 cifs_small_buf_release(ptmp);
560 else
561 cifs_buf_release(ptmp);
559 } 562 }
560 ptmp = pCFileStruct->search_resume_name; 563 ptmp = pCFileStruct->search_resume_name;
561 if (ptmp) { 564 if (ptmp) {
@@ -574,13 +577,14 @@ int cifs_closedir(struct inode *inode, struct file *file)
574int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) 577int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
575{ 578{
576 int rc, xid; 579 int rc, xid;
577 __u32 lockType = LOCKING_ANDX_LARGE_FILES;
578 __u32 numLock = 0; 580 __u32 numLock = 0;
579 __u32 numUnlock = 0; 581 __u32 numUnlock = 0;
580 __u64 length; 582 __u64 length;
581 int wait_flag = FALSE; 583 int wait_flag = FALSE;
582 struct cifs_sb_info *cifs_sb; 584 struct cifs_sb_info *cifs_sb;
583 struct cifsTconInfo *pTcon; 585 struct cifsTconInfo *pTcon;
586 __u16 netfid;
587 __u8 lockType = LOCKING_ANDX_LARGE_FILES;
584 588
585 length = 1 + pfLock->fl_end - pfLock->fl_start; 589 length = 1 + pfLock->fl_end - pfLock->fl_start;
586 rc = -EACCES; 590 rc = -EACCES;
@@ -592,11 +596,11 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
592 pfLock->fl_end)); 596 pfLock->fl_end));
593 597
594 if (pfLock->fl_flags & FL_POSIX) 598 if (pfLock->fl_flags & FL_POSIX)
595 cFYI(1, ("Posix ")); 599 cFYI(1, ("Posix"));
596 if (pfLock->fl_flags & FL_FLOCK) 600 if (pfLock->fl_flags & FL_FLOCK)
597 cFYI(1, ("Flock ")); 601 cFYI(1, ("Flock"));
598 if (pfLock->fl_flags & FL_SLEEP) { 602 if (pfLock->fl_flags & FL_SLEEP) {
599 cFYI(1, ("Blocking lock ")); 603 cFYI(1, ("Blocking lock"));
600 wait_flag = TRUE; 604 wait_flag = TRUE;
601 } 605 }
602 if (pfLock->fl_flags & FL_ACCESS) 606 if (pfLock->fl_flags & FL_ACCESS)
@@ -612,21 +616,23 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
612 cFYI(1, ("F_WRLCK ")); 616 cFYI(1, ("F_WRLCK "));
613 numLock = 1; 617 numLock = 1;
614 } else if (pfLock->fl_type == F_UNLCK) { 618 } else if (pfLock->fl_type == F_UNLCK) {
615 cFYI(1, ("F_UNLCK ")); 619 cFYI(1, ("F_UNLCK"));
616 numUnlock = 1; 620 numUnlock = 1;
621 /* Check if unlock includes more than
622 one lock range */
617 } else if (pfLock->fl_type == F_RDLCK) { 623 } else if (pfLock->fl_type == F_RDLCK) {
618 cFYI(1, ("F_RDLCK ")); 624 cFYI(1, ("F_RDLCK"));
619 lockType |= LOCKING_ANDX_SHARED_LOCK; 625 lockType |= LOCKING_ANDX_SHARED_LOCK;
620 numLock = 1; 626 numLock = 1;
621 } else if (pfLock->fl_type == F_EXLCK) { 627 } else if (pfLock->fl_type == F_EXLCK) {
622 cFYI(1, ("F_EXLCK ")); 628 cFYI(1, ("F_EXLCK"));
623 numLock = 1; 629 numLock = 1;
624 } else if (pfLock->fl_type == F_SHLCK) { 630 } else if (pfLock->fl_type == F_SHLCK) {
625 cFYI(1, ("F_SHLCK ")); 631 cFYI(1, ("F_SHLCK"));
626 lockType |= LOCKING_ANDX_SHARED_LOCK; 632 lockType |= LOCKING_ANDX_SHARED_LOCK;
627 numLock = 1; 633 numLock = 1;
628 } else 634 } else
629 cFYI(1, ("Unknown type of lock ")); 635 cFYI(1, ("Unknown type of lock"));
630 636
631 cifs_sb = CIFS_SB(file->f_dentry->d_sb); 637 cifs_sb = CIFS_SB(file->f_dentry->d_sb);
632 pTcon = cifs_sb->tcon; 638 pTcon = cifs_sb->tcon;
@@ -635,27 +641,41 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
635 FreeXid(xid); 641 FreeXid(xid);
636 return -EBADF; 642 return -EBADF;
637 } 643 }
644 netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
645
638 646
647 /* BB add code here to normalize offset and length to
648 account for negative length which we can not accept over the
649 wire */
639 if (IS_GETLK(cmd)) { 650 if (IS_GETLK(cmd)) {
640 rc = CIFSSMBLock(xid, pTcon, 651 if(experimEnabled &&
641 ((struct cifsFileInfo *)file-> 652 (cifs_sb->tcon->ses->capabilities & CAP_UNIX) &&
642 private_data)->netfid, 653 (CIFS_UNIX_FCNTL_CAP &
643 length, 654 le64_to_cpu(cifs_sb->tcon->fsUnixInfo.Capability))) {
644 pfLock->fl_start, 0, 1, lockType, 655 int posix_lock_type;
645 0 /* wait flag */ ); 656 if(lockType & LOCKING_ANDX_SHARED_LOCK)
657 posix_lock_type = CIFS_RDLCK;
658 else
659 posix_lock_type = CIFS_WRLCK;
660 rc = CIFSSMBPosixLock(xid, pTcon, netfid, 1 /* get */,
661 length, pfLock->fl_start,
662 posix_lock_type, wait_flag);
663 FreeXid(xid);
664 return rc;
665 }
666
667 /* BB we could chain these into one lock request BB */
668 rc = CIFSSMBLock(xid, pTcon, netfid, length, pfLock->fl_start,
669 0, 1, lockType, 0 /* wait flag */ );
646 if (rc == 0) { 670 if (rc == 0) {
647 rc = CIFSSMBLock(xid, pTcon, 671 rc = CIFSSMBLock(xid, pTcon, netfid, length,
648 ((struct cifsFileInfo *) file->
649 private_data)->netfid,
650 length,
651 pfLock->fl_start, 1 /* numUnlock */ , 672 pfLock->fl_start, 1 /* numUnlock */ ,
652 0 /* numLock */ , lockType, 673 0 /* numLock */ , lockType,
653 0 /* wait flag */ ); 674 0 /* wait flag */ );
654 pfLock->fl_type = F_UNLCK; 675 pfLock->fl_type = F_UNLCK;
655 if (rc != 0) 676 if (rc != 0)
656 cERROR(1, ("Error unlocking previously locked " 677 cERROR(1, ("Error unlocking previously locked "
657 "range %d during test of lock ", 678 "range %d during test of lock", rc));
658 rc));
659 rc = 0; 679 rc = 0;
660 680
661 } else { 681 } else {
@@ -667,12 +687,30 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
667 FreeXid(xid); 687 FreeXid(xid);
668 return rc; 688 return rc;
669 } 689 }
670 690 if (experimEnabled &&
671 rc = CIFSSMBLock(xid, pTcon, 691 (cifs_sb->tcon->ses->capabilities & CAP_UNIX) &&
672 ((struct cifsFileInfo *) file->private_data)-> 692 (CIFS_UNIX_FCNTL_CAP &
673 netfid, length, 693 le64_to_cpu(cifs_sb->tcon->fsUnixInfo.Capability))) {
674 pfLock->fl_start, numUnlock, numLock, lockType, 694 int posix_lock_type;
675 wait_flag); 695 if(lockType & LOCKING_ANDX_SHARED_LOCK)
696 posix_lock_type = CIFS_RDLCK;
697 else
698 posix_lock_type = CIFS_WRLCK;
699
700 if(numUnlock == 1)
701 posix_lock_type = CIFS_UNLCK;
702 else if(numLock == 0) {
703 /* if no lock or unlock then nothing
704 to do since we do not know what it is */
705 FreeXid(xid);
706 return -EOPNOTSUPP;
707 }
708 rc = CIFSSMBPosixLock(xid, pTcon, netfid, 0 /* set */,
709 length, pfLock->fl_start,
710 posix_lock_type, wait_flag);
711 } else
712 rc = CIFSSMBLock(xid, pTcon, netfid, length, pfLock->fl_start,
713 numUnlock, numLock, lockType, wait_flag);
676 if (pfLock->fl_flags & FL_POSIX) 714 if (pfLock->fl_flags & FL_POSIX)
677 posix_lock_file_wait(file, pfLock); 715 posix_lock_file_wait(file, pfLock);
678 FreeXid(xid); 716 FreeXid(xid);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 598eec9778f6..957ddd1571c6 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -565,11 +565,14 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry)
565 struct cifsInodeInfo *cifsInode; 565 struct cifsInodeInfo *cifsInode;
566 FILE_BASIC_INFO *pinfo_buf; 566 FILE_BASIC_INFO *pinfo_buf;
567 567
568 cFYI(1, ("cifs_unlink, inode = 0x%p with ", inode)); 568 cFYI(1, ("cifs_unlink, inode = 0x%p", inode));
569 569
570 xid = GetXid(); 570 xid = GetXid();
571 571
572 cifs_sb = CIFS_SB(inode->i_sb); 572 if(inode)
573 cifs_sb = CIFS_SB(inode->i_sb);
574 else
575 cifs_sb = CIFS_SB(direntry->d_sb);
573 pTcon = cifs_sb->tcon; 576 pTcon = cifs_sb->tcon;
574 577
575 /* Unlink can be called from rename so we can not grab the sem here 578 /* Unlink can be called from rename so we can not grab the sem here
@@ -609,9 +612,8 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry)
609 } 612 }
610 } else if (rc == -EACCES) { 613 } else if (rc == -EACCES) {
611 /* try only if r/o attribute set in local lookup data? */ 614 /* try only if r/o attribute set in local lookup data? */
612 pinfo_buf = kmalloc(sizeof(FILE_BASIC_INFO), GFP_KERNEL); 615 pinfo_buf = kzalloc(sizeof(FILE_BASIC_INFO), GFP_KERNEL);
613 if (pinfo_buf) { 616 if (pinfo_buf) {
614 memset(pinfo_buf, 0, sizeof(FILE_BASIC_INFO));
615 /* ATTRS set to normal clears r/o bit */ 617 /* ATTRS set to normal clears r/o bit */
616 pinfo_buf->Attributes = cpu_to_le32(ATTR_NORMAL); 618 pinfo_buf->Attributes = cpu_to_le32(ATTR_NORMAL);
617 if (!(pTcon->ses->flags & CIFS_SES_NT4)) 619 if (!(pTcon->ses->flags & CIFS_SES_NT4))
@@ -693,9 +695,11 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry)
693 when needed */ 695 when needed */
694 direntry->d_inode->i_ctime = current_fs_time(inode->i_sb); 696 direntry->d_inode->i_ctime = current_fs_time(inode->i_sb);
695 } 697 }
696 inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb); 698 if(inode) {
697 cifsInode = CIFS_I(inode); 699 inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
698 cifsInode->time = 0; /* force revalidate of dir as well */ 700 cifsInode = CIFS_I(inode);
701 cifsInode->time = 0; /* force revalidate of dir as well */
702 }
699 703
700 kfree(full_path); 704 kfree(full_path);
701 FreeXid(xid); 705 FreeXid(xid);
@@ -1167,7 +1171,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1167 nfid, npid, FALSE); 1171 nfid, npid, FALSE);
1168 atomic_dec(&open_file->wrtPending); 1172 atomic_dec(&open_file->wrtPending);
1169 cFYI(1,("SetFSize for attrs rc = %d", rc)); 1173 cFYI(1,("SetFSize for attrs rc = %d", rc));
1170 if(rc == -EINVAL) { 1174 if((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1171 int bytes_written; 1175 int bytes_written;
1172 rc = CIFSSMBWrite(xid, pTcon, 1176 rc = CIFSSMBWrite(xid, pTcon,
1173 nfid, 0, attrs->ia_size, 1177 nfid, 0, attrs->ia_size,
@@ -1189,7 +1193,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1189 cifs_sb->mnt_cifs_flags & 1193 cifs_sb->mnt_cifs_flags &
1190 CIFS_MOUNT_MAP_SPECIAL_CHR); 1194 CIFS_MOUNT_MAP_SPECIAL_CHR);
1191 cFYI(1, ("SetEOF by path (setattrs) rc = %d", rc)); 1195 cFYI(1, ("SetEOF by path (setattrs) rc = %d", rc));
1192 if(rc == -EINVAL) { 1196 if((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1193 __u16 netfid; 1197 __u16 netfid;
1194 int oplock = FALSE; 1198 int oplock = FALSE;
1195 1199
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 8d0da7c87c7b..9562f5bba65c 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -67,7 +67,7 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
67 cifs_sb_target->local_nls, 67 cifs_sb_target->local_nls,
68 cifs_sb_target->mnt_cifs_flags & 68 cifs_sb_target->mnt_cifs_flags &
69 CIFS_MOUNT_MAP_SPECIAL_CHR); 69 CIFS_MOUNT_MAP_SPECIAL_CHR);
70 if(rc == -EIO) 70 if((rc == -EIO) || (rc == -EINVAL))
71 rc = -EOPNOTSUPP; 71 rc = -EOPNOTSUPP;
72 } 72 }
73 73
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 432ba15e2c2d..fafd056426e4 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -72,10 +72,9 @@ sesInfoAlloc(void)
72 struct cifsSesInfo *ret_buf; 72 struct cifsSesInfo *ret_buf;
73 73
74 ret_buf = 74 ret_buf =
75 (struct cifsSesInfo *) kmalloc(sizeof (struct cifsSesInfo), 75 (struct cifsSesInfo *) kzalloc(sizeof (struct cifsSesInfo),
76 GFP_KERNEL); 76 GFP_KERNEL);
77 if (ret_buf) { 77 if (ret_buf) {
78 memset(ret_buf, 0, sizeof (struct cifsSesInfo));
79 write_lock(&GlobalSMBSeslock); 78 write_lock(&GlobalSMBSeslock);
80 atomic_inc(&sesInfoAllocCount); 79 atomic_inc(&sesInfoAllocCount);
81 ret_buf->status = CifsNew; 80 ret_buf->status = CifsNew;
@@ -110,10 +109,9 @@ tconInfoAlloc(void)
110{ 109{
111 struct cifsTconInfo *ret_buf; 110 struct cifsTconInfo *ret_buf;
112 ret_buf = 111 ret_buf =
113 (struct cifsTconInfo *) kmalloc(sizeof (struct cifsTconInfo), 112 (struct cifsTconInfo *) kzalloc(sizeof (struct cifsTconInfo),
114 GFP_KERNEL); 113 GFP_KERNEL);
115 if (ret_buf) { 114 if (ret_buf) {
116 memset(ret_buf, 0, sizeof (struct cifsTconInfo));
117 write_lock(&GlobalSMBSeslock); 115 write_lock(&GlobalSMBSeslock);
118 atomic_inc(&tconInfoAllocCount); 116 atomic_inc(&tconInfoAllocCount);
119 list_add(&ret_buf->cifsConnectionList, 117 list_add(&ret_buf->cifsConnectionList,
@@ -423,9 +421,7 @@ checkSMB(struct smb_hdr *smb, __u16 mid, int length)
423{ 421{
424 __u32 len = smb->smb_buf_length; 422 __u32 len = smb->smb_buf_length;
425 __u32 clc_len; /* calculated length */ 423 __u32 clc_len; /* calculated length */
426 cFYI(0, 424 cFYI(0, ("checkSMB Length: 0x%x, smb_buf_length: 0x%x", length, len));
427 ("Entering checkSMB with Length: %x, smb_buf_length: %x",
428 length, len));
429 if (((unsigned int)length < 2 + sizeof (struct smb_hdr)) || 425 if (((unsigned int)length < 2 + sizeof (struct smb_hdr)) ||
430 (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4)) { 426 (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4)) {
431 if ((unsigned int)length < 2 + sizeof (struct smb_hdr)) { 427 if ((unsigned int)length < 2 + sizeof (struct smb_hdr)) {
@@ -433,29 +429,36 @@ checkSMB(struct smb_hdr *smb, __u16 mid, int length)
433 sizeof (struct smb_hdr) - 1) 429 sizeof (struct smb_hdr) - 1)
434 && (smb->Status.CifsError != 0)) { 430 && (smb->Status.CifsError != 0)) {
435 smb->WordCount = 0; 431 smb->WordCount = 0;
436 return 0; /* some error cases do not return wct and bcc */ 432 /* some error cases do not return wct and bcc */
433 return 0;
437 } else { 434 } else {
438 cERROR(1, ("Length less than smb header size")); 435 cERROR(1, ("Length less than smb header size"));
439 } 436 }
440
441 } 437 }
442 if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) 438 if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4)
443 cERROR(1, 439 cERROR(1, ("smb length greater than MaxBufSize, mid=%d",
444 ("smb_buf_length greater than MaxBufSize")); 440 smb->Mid));
445 cERROR(1,
446 ("bad smb detected. Illegal length. mid=%d",
447 smb->Mid));
448 return 1; 441 return 1;
449 } 442 }
450 443
451 if (checkSMBhdr(smb, mid)) 444 if (checkSMBhdr(smb, mid))
452 return 1; 445 return 1;
453 clc_len = smbCalcSize_LE(smb); 446 clc_len = smbCalcSize_LE(smb);
454 if ((4 + len != clc_len) 447
455 || (4 + len != (unsigned int)length)) { 448 if(4 + len != (unsigned int)length) {
456 cERROR(1, ("Calculated size 0x%x vs actual length 0x%x", 449 cERROR(1, ("Length read does not match RFC1001 length %d",len));
457 clc_len, 4 + len)); 450 return 1;
458 cERROR(1, ("bad smb size detected for Mid=%d", smb->Mid)); 451 }
452
453 if (4 + len != clc_len) {
454 /* check if bcc wrapped around for large read responses */
455 if((len > 64 * 1024) && (len > clc_len)) {
456 /* check if lengths match mod 64K */
457 if(((4 + len) & 0xFFFF) == (clc_len & 0xFFFF))
458 return 0; /* bcc wrapped */
459 }
460 cFYI(1, ("Calculated size %d vs length %d mismatch for mid %d",
461 clc_len, 4 + len, smb->Mid));
459 /* Windows XP can return a few bytes too much, presumably 462 /* Windows XP can return a few bytes too much, presumably
460 an illegal pad, at the end of byte range lock responses 463 an illegal pad, at the end of byte range lock responses
461 so we allow for that three byte pad, as long as actual 464 so we allow for that three byte pad, as long as actual
@@ -469,8 +472,11 @@ checkSMB(struct smb_hdr *smb, __u16 mid, int length)
469 wct and bcc to minimum size and drop the t2 parms and data */ 472 wct and bcc to minimum size and drop the t2 parms and data */
470 if((4+len > clc_len) && (len <= clc_len + 512)) 473 if((4+len > clc_len) && (len <= clc_len + 512))
471 return 0; 474 return 0;
472 else 475 else {
476 cERROR(1, ("RFC1001 size %d bigger than SMB for Mid=%d",
477 len, smb->Mid));
473 return 1; 478 return 1;
479 }
474 } 480 }
475 return 0; 481 return 0;
476} 482}
diff --git a/fs/cifs/ntlmssp.c b/fs/cifs/ntlmssp.c
new file mode 100644
index 000000000000..78866f925747
--- /dev/null
+++ b/fs/cifs/ntlmssp.c
@@ -0,0 +1,129 @@
1/*
2 * fs/cifs/ntlmssp.h
3 *
4 * Copyright (c) International Business Machines Corp., 2006
5 * Author(s): Steve French (sfrench@us.ibm.com)
6 *
7 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published
9 * by the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
15 * the GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#include "cifspdu.h"
23#include "cifsglob.h"
24#include "cifsproto.h"
25#include "cifs_unicode.h"
26#include "cifs_debug.h"
27#include "ntlmssp.h"
28#include "nterr.h"
29
30#ifdef CONFIG_CIFS_EXPERIMENTAL
31static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB)
32{
33 __u32 capabilities = 0;
34
35 /* init fields common to all four types of SessSetup */
36 /* note that header is initialized to zero in header_assemble */
37 pSMB->req.AndXCommand = 0xFF;
38 pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
39 pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
40
41 /* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
42
43 /* BB verify whether signing required on neg or just on auth frame
44 (and NTLM case) */
45
46 capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
47 CAP_LARGE_WRITE_X | CAP_LARGE_READ_X;
48
49 if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
50 pSMB->req.hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
51
52 if (ses->capabilities & CAP_UNICODE) {
53 pSMB->req.hdr.Flags2 |= SMBFLG2_UNICODE;
54 capabilities |= CAP_UNICODE;
55 }
56 if (ses->capabilities & CAP_STATUS32) {
57 pSMB->req.hdr.Flags2 |= SMBFLG2_ERR_STATUS;
58 capabilities |= CAP_STATUS32;
59 }
60 if (ses->capabilities & CAP_DFS) {
61 pSMB->req.hdr.Flags2 |= SMBFLG2_DFS;
62 capabilities |= CAP_DFS;
63 }
64
65 /* BB check whether to init vcnum BB */
66 return capabilities;
67}
68int
69CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, const int type,
70 int * pNTLMv2_flg, const struct nls_table *nls_cp)
71{
72 int rc = 0;
73 int wct;
74 struct smb_hdr *smb_buffer;
75 char *bcc_ptr;
76 SESSION_SETUP_ANDX *pSMB;
77 __u32 capabilities;
78
79 if(ses == NULL)
80 return -EINVAL;
81
82 cFYI(1,("SStp type: %d",type));
83 if(type < CIFS_NTLM) {
84#ifndef CONFIG_CIFS_WEAK_PW_HASH
85 /* LANMAN and plaintext are less secure and off by default.
86 So we make this explicitly be turned on in kconfig (in the
87 build) and turned on at runtime (changed from the default)
88 in proc/fs/cifs or via mount parm. Unfortunately this is
89 needed for old Win (e.g. Win95), some obscure NAS and OS/2 */
90 return -EOPNOTSUPP;
91#endif
92 wct = 10; /* lanman 2 style sessionsetup */
93 } else if(type < CIFS_NTLMSSP_NEG)
94 wct = 13; /* old style NTLM sessionsetup */
95 else /* same size for negotiate or auth, NTLMSSP or extended security */
96 wct = 12;
97
98 rc = small_smb_init_no_tc(SMB_COM_SESSION_SETUP_ANDX, wct, ses,
99 (void **)&smb_buffer);
100 if(rc)
101 return rc;
102
103 pSMB = (SESSION_SETUP_ANDX *)smb_buffer;
104
105 capabilities = cifs_ssetup_hdr(ses, pSMB);
106 bcc_ptr = pByteArea(smb_buffer);
107 if(type > CIFS_NTLM) {
108 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
109 capabilities |= CAP_EXTENDED_SECURITY;
110 pSMB->req.Capabilities = cpu_to_le32(capabilities);
111 /* BB set password lengths */
112 } else if(type < CIFS_NTLM) /* lanman */ {
113 /* no capabilities flags in old lanman negotiation */
114 /* pSMB->old_req.PasswordLength = */ /* BB fixme BB */
115 } else /* type CIFS_NTLM */ {
116 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
117 pSMB->req_no_secext.CaseInsensitivePasswordLength =
118 cpu_to_le16(CIFS_SESSION_KEY_SIZE);
119 pSMB->req_no_secext.CaseSensitivePasswordLength =
120 cpu_to_le16(CIFS_SESSION_KEY_SIZE);
121 }
122
123
124/* rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buf_type, 0); */
125 /* SMB request buf freed in SendReceive2 */
126
127 return rc;
128}
129#endif /* CONFIG_CIFS_EXPERIMENTAL */
diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h
index 803389b64a2c..d39b712a11c5 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/cifs/ntlmssp.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/ntlmssp.h 2 * fs/cifs/ntlmssp.h
3 * 3 *
4 * Copyright (c) International Business Machines Corp., 2002 4 * Copyright (c) International Business Machines Corp., 2002,2006
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * This library is free software; you can redistribute it and/or modify 7 * This library is free software; you can redistribute it and/or modify
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 488bd0d81dcf..2f6e2825571e 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -604,7 +604,12 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
604 cifsFile->search_resume_name = NULL; 604 cifsFile->search_resume_name = NULL;
605 if(cifsFile->srch_inf.ntwrk_buf_start) { 605 if(cifsFile->srch_inf.ntwrk_buf_start) {
606 cFYI(1,("freeing SMB ff cache buf on search rewind")); 606 cFYI(1,("freeing SMB ff cache buf on search rewind"));
607 cifs_buf_release(cifsFile->srch_inf.ntwrk_buf_start); 607 if(cifsFile->srch_inf.smallBuf)
608 cifs_small_buf_release(cifsFile->srch_inf.
609 ntwrk_buf_start);
610 else
611 cifs_buf_release(cifsFile->srch_inf.
612 ntwrk_buf_start);
608 } 613 }
609 rc = initiate_cifs_search(xid,file); 614 rc = initiate_cifs_search(xid,file);
610 if(rc) { 615 if(rc) {
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index b12cb8a7da7c..3da80409466c 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -309,17 +309,16 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
309 309
310 *pRespBufType = CIFS_NO_BUFFER; /* no response buf yet */ 310 *pRespBufType = CIFS_NO_BUFFER; /* no response buf yet */
311 311
312 if (ses == NULL) { 312 if ((ses == NULL) || (ses->server == NULL)) {
313 cERROR(1,("Null smb session")); 313 cifs_small_buf_release(in_buf);
314 return -EIO; 314 cERROR(1,("Null session"));
315 }
316 if(ses->server == NULL) {
317 cERROR(1,("Null tcp session"));
318 return -EIO; 315 return -EIO;
319 } 316 }
320 317
321 if(ses->server->tcpStatus == CifsExiting) 318 if(ses->server->tcpStatus == CifsExiting) {
319 cifs_small_buf_release(in_buf);
322 return -ENOENT; 320 return -ENOENT;
321 }
323 322
324 /* Ensure that we do not send more than 50 overlapping requests 323 /* Ensure that we do not send more than 50 overlapping requests
325 to the same server. We may make this configurable later or 324 to the same server. We may make this configurable later or
@@ -346,6 +345,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
346 } else { 345 } else {
347 if(ses->server->tcpStatus == CifsExiting) { 346 if(ses->server->tcpStatus == CifsExiting) {
348 spin_unlock(&GlobalMid_Lock); 347 spin_unlock(&GlobalMid_Lock);
348 cifs_small_buf_release(in_buf);
349 return -ENOENT; 349 return -ENOENT;
350 } 350 }
351 351
@@ -385,6 +385,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
385 midQ = AllocMidQEntry(in_buf, ses); 385 midQ = AllocMidQEntry(in_buf, ses);
386 if (midQ == NULL) { 386 if (midQ == NULL) {
387 up(&ses->server->tcpSem); 387 up(&ses->server->tcpSem);
388 cifs_small_buf_release(in_buf);
388 /* If not lock req, update # of requests on wire to server */ 389 /* If not lock req, update # of requests on wire to server */
389 if(long_op < 3) { 390 if(long_op < 3) {
390 atomic_dec(&ses->server->inFlight); 391 atomic_dec(&ses->server->inFlight);
@@ -408,14 +409,18 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
408 if(rc < 0) { 409 if(rc < 0) {
409 DeleteMidQEntry(midQ); 410 DeleteMidQEntry(midQ);
410 up(&ses->server->tcpSem); 411 up(&ses->server->tcpSem);
412 cifs_small_buf_release(in_buf);
411 /* If not lock req, update # of requests on wire to server */ 413 /* If not lock req, update # of requests on wire to server */
412 if(long_op < 3) { 414 if(long_op < 3) {
413 atomic_dec(&ses->server->inFlight); 415 atomic_dec(&ses->server->inFlight);
414 wake_up(&ses->server->request_q); 416 wake_up(&ses->server->request_q);
415 } 417 }
416 return rc; 418 return rc;
417 } else 419 } else {
418 up(&ses->server->tcpSem); 420 up(&ses->server->tcpSem);
421 cifs_small_buf_release(in_buf);
422 }
423
419 if (long_op == -1) 424 if (long_op == -1)
420 goto cifs_no_response_exit2; 425 goto cifs_no_response_exit2;
421 else if (long_op == 2) /* writes past end of file can take loong time */ 426 else if (long_op == 2) /* writes past end of file can take loong time */
@@ -543,6 +548,7 @@ cifs_no_response_exit2:
543 548
544out_unlock2: 549out_unlock2:
545 up(&ses->server->tcpSem); 550 up(&ses->server->tcpSem);
551 cifs_small_buf_release(in_buf);
546 /* If not lock req, update # of requests on wire to server */ 552 /* If not lock req, update # of requests on wire to server */
547 if(long_op < 3) { 553 if(long_op < 3) {
548 atomic_dec(&ses->server->inFlight); 554 atomic_dec(&ses->server->inFlight);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 8ed9b06a9828..5638c8f9362f 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -504,7 +504,7 @@ static int populate_groups(struct config_group *group)
504 int ret = 0; 504 int ret = 0;
505 int i; 505 int i;
506 506
507 if (group && group->default_groups) { 507 if (group->default_groups) {
508 /* FYI, we're faking mkdir here 508 /* FYI, we're faking mkdir here
509 * I'm not sure we need this semaphore, as we're called 509 * I'm not sure we need this semaphore, as we're called
510 * from our parent's mkdir. That holds our parent's 510 * from our parent's mkdir. That holds our parent's
diff --git a/fs/dcache.c b/fs/dcache.c
index 19458d399502..940d188e5d14 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1101,6 +1101,32 @@ next:
1101} 1101}
1102 1102
1103/** 1103/**
1104 * d_hash_and_lookup - hash the qstr then search for a dentry
1105 * @dir: Directory to search in
1106 * @name: qstr of name we wish to find
1107 *
1108 * On hash failure or on lookup failure NULL is returned.
1109 */
1110struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
1111{
1112 struct dentry *dentry = NULL;
1113
1114 /*
1115 * Check for a fs-specific hash function. Note that we must
1116 * calculate the standard hash first, as the d_op->d_hash()
1117 * routine may choose to leave the hash value unchanged.
1118 */
1119 name->hash = full_name_hash(name->name, name->len);
1120 if (dir->d_op && dir->d_op->d_hash) {
1121 if (dir->d_op->d_hash(dir, name) < 0)
1122 goto out;
1123 }
1124 dentry = d_lookup(dir, name);
1125out:
1126 return dentry;
1127}
1128
1129/**
1104 * d_validate - verify dentry provided from insecure source 1130 * d_validate - verify dentry provided from insecure source
1105 * @dentry: The dentry alleged to be valid child of @dparent 1131 * @dentry: The dentry alleged to be valid child of @dparent
1106 * @dparent: The parent dentry (known to be valid) 1132 * @dparent: The parent dentry (known to be valid)
@@ -1172,11 +1198,11 @@ void d_delete(struct dentry * dentry)
1172 spin_lock(&dentry->d_lock); 1198 spin_lock(&dentry->d_lock);
1173 isdir = S_ISDIR(dentry->d_inode->i_mode); 1199 isdir = S_ISDIR(dentry->d_inode->i_mode);
1174 if (atomic_read(&dentry->d_count) == 1) { 1200 if (atomic_read(&dentry->d_count) == 1) {
1175 /* remove this and other inotify debug checks after 2.6.18 */
1176 dentry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
1177
1178 dentry_iput(dentry); 1201 dentry_iput(dentry);
1179 fsnotify_nameremove(dentry, isdir); 1202 fsnotify_nameremove(dentry, isdir);
1203
1204 /* remove this and other inotify debug checks after 2.6.18 */
1205 dentry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
1180 return; 1206 return;
1181 } 1207 }
1182 1208
@@ -1616,26 +1642,12 @@ ino_t find_inode_number(struct dentry *dir, struct qstr *name)
1616 struct dentry * dentry; 1642 struct dentry * dentry;
1617 ino_t ino = 0; 1643 ino_t ino = 0;
1618 1644
1619 /* 1645 dentry = d_hash_and_lookup(dir, name);
1620 * Check for a fs-specific hash function. Note that we must 1646 if (dentry) {
1621 * calculate the standard hash first, as the d_op->d_hash()
1622 * routine may choose to leave the hash value unchanged.
1623 */
1624 name->hash = full_name_hash(name->name, name->len);
1625 if (dir->d_op && dir->d_op->d_hash)
1626 {
1627 if (dir->d_op->d_hash(dir, name) != 0)
1628 goto out;
1629 }
1630
1631 dentry = d_lookup(dir, name);
1632 if (dentry)
1633 {
1634 if (dentry->d_inode) 1647 if (dentry->d_inode)
1635 ino = dentry->d_inode->i_ino; 1648 ino = dentry->d_inode->i_ino;
1636 dput(dentry); 1649 dput(dentry);
1637 } 1650 }
1638out:
1639 return ino; 1651 return ino;
1640} 1652}
1641 1653
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 910a8ed74b5d..b05d1b218776 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -929,8 +929,7 @@ do_holes:
929 block_in_page += this_chunk_blocks; 929 block_in_page += this_chunk_blocks;
930 dio->blocks_available -= this_chunk_blocks; 930 dio->blocks_available -= this_chunk_blocks;
931next_block: 931next_block:
932 if (dio->block_in_file > dio->final_block_in_request) 932 BUG_ON(dio->block_in_file > dio->final_block_in_request);
933 BUG();
934 if (dio->block_in_file == dio->final_block_in_request) 933 if (dio->block_in_file == dio->final_block_in_request)
935 break; 934 break;
936 } 935 }
diff --git a/fs/dquot.c b/fs/dquot.c
index 6b3886920939..81d87a413c68 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -590,8 +590,7 @@ we_slept:
590 atomic_dec(&dquot->dq_count); 590 atomic_dec(&dquot->dq_count);
591#ifdef __DQUOT_PARANOIA 591#ifdef __DQUOT_PARANOIA
592 /* sanity check */ 592 /* sanity check */
593 if (!list_empty(&dquot->dq_free)) 593 BUG_ON(!list_empty(&dquot->dq_free));
594 BUG();
595#endif 594#endif
596 put_dquot_last(dquot); 595 put_dquot_last(dquot);
597 spin_unlock(&dq_list_lock); 596 spin_unlock(&dq_list_lock);
@@ -666,8 +665,7 @@ we_slept:
666 return NODQUOT; 665 return NODQUOT;
667 } 666 }
668#ifdef __DQUOT_PARANOIA 667#ifdef __DQUOT_PARANOIA
669 if (!dquot->dq_sb) /* Has somebody invalidated entry under us? */ 668 BUG_ON(!dquot->dq_sb); /* Has somebody invalidated entry under us? */
670 BUG();
671#endif 669#endif
672 670
673 return dquot; 671 return dquot;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 242fe1a66ce5..1b4491cdd115 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -599,7 +599,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
599 switch (op) { 599 switch (op) {
600 case EPOLL_CTL_ADD: 600 case EPOLL_CTL_ADD:
601 if (!epi) { 601 if (!epi) {
602 epds.events |= POLLERR | POLLHUP | POLLRDHUP; 602 epds.events |= POLLERR | POLLHUP;
603 603
604 error = ep_insert(ep, &epds, tfile, fd); 604 error = ep_insert(ep, &epds, tfile, fd);
605 } else 605 } else
@@ -613,7 +613,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
613 break; 613 break;
614 case EPOLL_CTL_MOD: 614 case EPOLL_CTL_MOD:
615 if (epi) { 615 if (epi) {
616 epds.events |= POLLERR | POLLHUP | POLLRDHUP; 616 epds.events |= POLLERR | POLLHUP;
617 error = ep_modify(ep, epi, &epds); 617 error = ep_modify(ep, epi, &epds);
618 } else 618 } else
619 error = -ENOENT; 619 error = -ENOENT;
diff --git a/fs/exec.c b/fs/exec.c
index 950ebd43cdc3..3234a0c32d54 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -561,7 +561,7 @@ static int exec_mmap(struct mm_struct *mm)
561 arch_pick_mmap_layout(mm); 561 arch_pick_mmap_layout(mm);
562 if (old_mm) { 562 if (old_mm) {
563 up_read(&old_mm->mmap_sem); 563 up_read(&old_mm->mmap_sem);
564 if (active_mm != old_mm) BUG(); 564 BUG_ON(active_mm != old_mm);
565 mmput(old_mm); 565 mmput(old_mm);
566 return 0; 566 return 0;
567 } 567 }
@@ -678,6 +678,18 @@ static int de_thread(struct task_struct *tsk)
678 while (leader->exit_state != EXIT_ZOMBIE) 678 while (leader->exit_state != EXIT_ZOMBIE)
679 yield(); 679 yield();
680 680
681 /*
682 * The only record we have of the real-time age of a
683 * process, regardless of execs it's done, is start_time.
684 * All the past CPU time is accumulated in signal_struct
685 * from sister threads now dead. But in this non-leader
686 * exec, nothing survives from the original leader thread,
687 * whose birth marks the true age of this process now.
688 * When we take on its identity by switching to its PID, we
689 * also take its birthdate (always earlier than our own).
690 */
691 current->start_time = leader->start_time;
692
681 spin_lock(&leader->proc_lock); 693 spin_lock(&leader->proc_lock);
682 spin_lock(&current->proc_lock); 694 spin_lock(&current->proc_lock);
683 proc_dentry1 = proc_pid_unhash(current); 695 proc_dentry1 = proc_pid_unhash(current);
@@ -723,7 +735,12 @@ static int de_thread(struct task_struct *tsk)
723 current->parent = current->real_parent = leader->real_parent; 735 current->parent = current->real_parent = leader->real_parent;
724 leader->parent = leader->real_parent = child_reaper; 736 leader->parent = leader->real_parent = child_reaper;
725 current->group_leader = current; 737 current->group_leader = current;
726 leader->group_leader = leader; 738 leader->group_leader = current;
739
740 /* Reduce leader to a thread */
741 detach_pid(leader, PIDTYPE_PGID);
742 detach_pid(leader, PIDTYPE_SID);
743 list_del_init(&leader->tasks);
727 744
728 add_parent(current); 745 add_parent(current);
729 add_parent(leader); 746 add_parent(leader);
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 509cceca04db..23e2c7ccec1d 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -53,6 +53,8 @@ const struct file_operations ext2_file_operations = {
53 .readv = generic_file_readv, 53 .readv = generic_file_readv,
54 .writev = generic_file_writev, 54 .writev = generic_file_writev,
55 .sendfile = generic_file_sendfile, 55 .sendfile = generic_file_sendfile,
56 .splice_read = generic_file_splice_read,
57 .splice_write = generic_file_splice_write,
56}; 58};
57 59
58#ifdef CONFIG_EXT2_FS_XIP 60#ifdef CONFIG_EXT2_FS_XIP
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 783a796220bb..1efefb630ea9 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -119,6 +119,8 @@ const struct file_operations ext3_file_operations = {
119 .release = ext3_release_file, 119 .release = ext3_release_file,
120 .fsync = ext3_sync_file, 120 .fsync = ext3_sync_file,
121 .sendfile = generic_file_sendfile, 121 .sendfile = generic_file_sendfile,
122 .splice_read = generic_file_splice_read,
123 .splice_write = generic_file_splice_write,
122}; 124};
123 125
124struct inode_operations ext3_file_inode_operations = { 126struct inode_operations ext3_file_inode_operations = {
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 1041dab6de2f..14f5f6ea3e72 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -974,6 +974,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
974 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) { 974 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
975 ext3_warning(sb, __FUNCTION__, 975 ext3_warning(sb, __FUNCTION__,
976 "multiple resizers run on filesystem!"); 976 "multiple resizers run on filesystem!");
977 unlock_super(sb);
977 err = -EBUSY; 978 err = -EBUSY;
978 goto exit_put; 979 goto exit_put;
979 } 980 }
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 2a2479196f96..d35cbc6bc112 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -453,8 +453,7 @@ static void send_sigio_to_task(struct task_struct *p,
453 /* Make sure we are called with one of the POLL_* 453 /* Make sure we are called with one of the POLL_*
454 reasons, otherwise we could leak kernel stack into 454 reasons, otherwise we could leak kernel stack into
455 userspace. */ 455 userspace. */
456 if ((reason & __SI_MASK) != __SI_POLL) 456 BUG_ON((reason & __SI_MASK) != __SI_POLL);
457 BUG();
458 if (reason - POLL_IN >= NSIGPOLL) 457 if (reason - POLL_IN >= NSIGPOLL)
459 si.si_band = ~0L; 458 si.si_band = ~0L;
460 else 459 else
diff --git a/fs/fifo.c b/fs/fifo.c
index 889f722ee36d..49035b174b48 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -15,30 +15,35 @@
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/pipe_fs_i.h> 16#include <linux/pipe_fs_i.h>
17 17
18static void wait_for_partner(struct inode* inode, unsigned int* cnt) 18static void wait_for_partner(struct inode* inode, unsigned int *cnt)
19{ 19{
20 int cur = *cnt; 20 int cur = *cnt;
21 while(cur == *cnt) { 21
22 pipe_wait(inode); 22 while (cur == *cnt) {
23 if(signal_pending(current)) 23 pipe_wait(inode->i_pipe);
24 if (signal_pending(current))
24 break; 25 break;
25 } 26 }
26} 27}
27 28
28static void wake_up_partner(struct inode* inode) 29static void wake_up_partner(struct inode* inode)
29{ 30{
30 wake_up_interruptible(PIPE_WAIT(*inode)); 31 wake_up_interruptible(&inode->i_pipe->wait);
31} 32}
32 33
33static int fifo_open(struct inode *inode, struct file *filp) 34static int fifo_open(struct inode *inode, struct file *filp)
34{ 35{
36 struct pipe_inode_info *pipe;
35 int ret; 37 int ret;
36 38
37 mutex_lock(PIPE_MUTEX(*inode)); 39 mutex_lock(&inode->i_mutex);
38 if (!inode->i_pipe) { 40 pipe = inode->i_pipe;
41 if (!pipe) {
39 ret = -ENOMEM; 42 ret = -ENOMEM;
40 if(!pipe_new(inode)) 43 pipe = alloc_pipe_info(inode);
44 if (!pipe)
41 goto err_nocleanup; 45 goto err_nocleanup;
46 inode->i_pipe = pipe;
42 } 47 }
43 filp->f_version = 0; 48 filp->f_version = 0;
44 49
@@ -53,18 +58,18 @@ static int fifo_open(struct inode *inode, struct file *filp)
53 * opened, even when there is no process writing the FIFO. 58 * opened, even when there is no process writing the FIFO.
54 */ 59 */
55 filp->f_op = &read_fifo_fops; 60 filp->f_op = &read_fifo_fops;
56 PIPE_RCOUNTER(*inode)++; 61 pipe->r_counter++;
57 if (PIPE_READERS(*inode)++ == 0) 62 if (pipe->readers++ == 0)
58 wake_up_partner(inode); 63 wake_up_partner(inode);
59 64
60 if (!PIPE_WRITERS(*inode)) { 65 if (!pipe->writers) {
61 if ((filp->f_flags & O_NONBLOCK)) { 66 if ((filp->f_flags & O_NONBLOCK)) {
62 /* suppress POLLHUP until we have 67 /* suppress POLLHUP until we have
63 * seen a writer */ 68 * seen a writer */
64 filp->f_version = PIPE_WCOUNTER(*inode); 69 filp->f_version = pipe->w_counter;
65 } else 70 } else
66 { 71 {
67 wait_for_partner(inode, &PIPE_WCOUNTER(*inode)); 72 wait_for_partner(inode, &pipe->w_counter);
68 if(signal_pending(current)) 73 if(signal_pending(current))
69 goto err_rd; 74 goto err_rd;
70 } 75 }
@@ -78,16 +83,16 @@ static int fifo_open(struct inode *inode, struct file *filp)
78 * errno=ENXIO when there is no process reading the FIFO. 83 * errno=ENXIO when there is no process reading the FIFO.
79 */ 84 */
80 ret = -ENXIO; 85 ret = -ENXIO;
81 if ((filp->f_flags & O_NONBLOCK) && !PIPE_READERS(*inode)) 86 if ((filp->f_flags & O_NONBLOCK) && !pipe->readers)
82 goto err; 87 goto err;
83 88
84 filp->f_op = &write_fifo_fops; 89 filp->f_op = &write_fifo_fops;
85 PIPE_WCOUNTER(*inode)++; 90 pipe->w_counter++;
86 if (!PIPE_WRITERS(*inode)++) 91 if (!pipe->writers++)
87 wake_up_partner(inode); 92 wake_up_partner(inode);
88 93
89 if (!PIPE_READERS(*inode)) { 94 if (!pipe->readers) {
90 wait_for_partner(inode, &PIPE_RCOUNTER(*inode)); 95 wait_for_partner(inode, &pipe->r_counter);
91 if (signal_pending(current)) 96 if (signal_pending(current))
92 goto err_wr; 97 goto err_wr;
93 } 98 }
@@ -102,11 +107,11 @@ static int fifo_open(struct inode *inode, struct file *filp)
102 */ 107 */
103 filp->f_op = &rdwr_fifo_fops; 108 filp->f_op = &rdwr_fifo_fops;
104 109
105 PIPE_READERS(*inode)++; 110 pipe->readers++;
106 PIPE_WRITERS(*inode)++; 111 pipe->writers++;
107 PIPE_RCOUNTER(*inode)++; 112 pipe->r_counter++;
108 PIPE_WCOUNTER(*inode)++; 113 pipe->w_counter++;
109 if (PIPE_READERS(*inode) == 1 || PIPE_WRITERS(*inode) == 1) 114 if (pipe->readers == 1 || pipe->writers == 1)
110 wake_up_partner(inode); 115 wake_up_partner(inode);
111 break; 116 break;
112 117
@@ -116,27 +121,27 @@ static int fifo_open(struct inode *inode, struct file *filp)
116 } 121 }
117 122
118 /* Ok! */ 123 /* Ok! */
119 mutex_unlock(PIPE_MUTEX(*inode)); 124 mutex_unlock(&inode->i_mutex);
120 return 0; 125 return 0;
121 126
122err_rd: 127err_rd:
123 if (!--PIPE_READERS(*inode)) 128 if (!--pipe->readers)
124 wake_up_interruptible(PIPE_WAIT(*inode)); 129 wake_up_interruptible(&pipe->wait);
125 ret = -ERESTARTSYS; 130 ret = -ERESTARTSYS;
126 goto err; 131 goto err;
127 132
128err_wr: 133err_wr:
129 if (!--PIPE_WRITERS(*inode)) 134 if (!--pipe->writers)
130 wake_up_interruptible(PIPE_WAIT(*inode)); 135 wake_up_interruptible(&pipe->wait);
131 ret = -ERESTARTSYS; 136 ret = -ERESTARTSYS;
132 goto err; 137 goto err;
133 138
134err: 139err:
135 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) 140 if (!pipe->readers && !pipe->writers)
136 free_pipe_info(inode); 141 free_pipe_info(inode);
137 142
138err_nocleanup: 143err_nocleanup:
139 mutex_unlock(PIPE_MUTEX(*inode)); 144 mutex_unlock(&inode->i_mutex);
140 return ret; 145 return ret;
141} 146}
142 147
diff --git a/fs/freevxfs/vxfs_olt.c b/fs/freevxfs/vxfs_olt.c
index 76a0708ae978..049500847903 100644
--- a/fs/freevxfs/vxfs_olt.c
+++ b/fs/freevxfs/vxfs_olt.c
@@ -42,24 +42,21 @@
42static inline void 42static inline void
43vxfs_get_fshead(struct vxfs_oltfshead *fshp, struct vxfs_sb_info *infp) 43vxfs_get_fshead(struct vxfs_oltfshead *fshp, struct vxfs_sb_info *infp)
44{ 44{
45 if (infp->vsi_fshino) 45 BUG_ON(infp->vsi_fshino);
46 BUG();
47 infp->vsi_fshino = fshp->olt_fsino[0]; 46 infp->vsi_fshino = fshp->olt_fsino[0];
48} 47}
49 48
50static inline void 49static inline void
51vxfs_get_ilist(struct vxfs_oltilist *ilistp, struct vxfs_sb_info *infp) 50vxfs_get_ilist(struct vxfs_oltilist *ilistp, struct vxfs_sb_info *infp)
52{ 51{
53 if (infp->vsi_iext) 52 BUG_ON(infp->vsi_iext);
54 BUG();
55 infp->vsi_iext = ilistp->olt_iext[0]; 53 infp->vsi_iext = ilistp->olt_iext[0];
56} 54}
57 55
58static inline u_long 56static inline u_long
59vxfs_oblock(struct super_block *sbp, daddr_t block, u_long bsize) 57vxfs_oblock(struct super_block *sbp, daddr_t block, u_long bsize)
60{ 58{
61 if (sbp->s_blocksize % bsize) 59 BUG_ON(sbp->s_blocksize % bsize);
62 BUG();
63 return (block * (sbp->s_blocksize / bsize)); 60 return (block * (sbp->s_blocksize / bsize));
64} 61}
65 62
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 23d1f52eb1b8..6c740f860665 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -23,13 +23,11 @@ static kmem_cache_t *fuse_req_cachep;
23 23
24static struct fuse_conn *fuse_get_conn(struct file *file) 24static struct fuse_conn *fuse_get_conn(struct file *file)
25{ 25{
26 struct fuse_conn *fc; 26 /*
27 spin_lock(&fuse_lock); 27 * Lockless access is OK, because file->private data is set
28 fc = file->private_data; 28 * once during mount and is valid until the file is released.
29 if (fc && !fc->connected) 29 */
30 fc = NULL; 30 return file->private_data;
31 spin_unlock(&fuse_lock);
32 return fc;
33} 31}
34 32
35static void fuse_request_init(struct fuse_req *req) 33static void fuse_request_init(struct fuse_req *req)
@@ -74,10 +72,8 @@ static void restore_sigs(sigset_t *oldset)
74 */ 72 */
75void fuse_reset_request(struct fuse_req *req) 73void fuse_reset_request(struct fuse_req *req)
76{ 74{
77 int preallocated = req->preallocated;
78 BUG_ON(atomic_read(&req->count) != 1); 75 BUG_ON(atomic_read(&req->count) != 1);
79 fuse_request_init(req); 76 fuse_request_init(req);
80 req->preallocated = preallocated;
81} 77}
82 78
83static void __fuse_get_request(struct fuse_req *req) 79static void __fuse_get_request(struct fuse_req *req)
@@ -92,80 +88,52 @@ static void __fuse_put_request(struct fuse_req *req)
92 atomic_dec(&req->count); 88 atomic_dec(&req->count);
93} 89}
94 90
95static struct fuse_req *do_get_request(struct fuse_conn *fc) 91struct fuse_req *fuse_get_req(struct fuse_conn *fc)
96{ 92{
97 struct fuse_req *req; 93 struct fuse_req *req;
98
99 spin_lock(&fuse_lock);
100 BUG_ON(list_empty(&fc->unused_list));
101 req = list_entry(fc->unused_list.next, struct fuse_req, list);
102 list_del_init(&req->list);
103 spin_unlock(&fuse_lock);
104 fuse_request_init(req);
105 req->preallocated = 1;
106 req->in.h.uid = current->fsuid;
107 req->in.h.gid = current->fsgid;
108 req->in.h.pid = current->pid;
109 return req;
110}
111
112/* This can return NULL, but only in case it's interrupted by a SIGKILL */
113struct fuse_req *fuse_get_request(struct fuse_conn *fc)
114{
115 int intr;
116 sigset_t oldset; 94 sigset_t oldset;
95 int err;
117 96
118 atomic_inc(&fc->num_waiting);
119 block_sigs(&oldset); 97 block_sigs(&oldset);
120 intr = down_interruptible(&fc->outstanding_sem); 98 err = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
121 restore_sigs(&oldset); 99 restore_sigs(&oldset);
122 if (intr) { 100 if (err)
123 atomic_dec(&fc->num_waiting); 101 return ERR_PTR(-EINTR);
124 return NULL;
125 }
126 return do_get_request(fc);
127}
128 102
129/* Must be called with fuse_lock held */ 103 req = fuse_request_alloc();
130static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req) 104 if (!req)
131{ 105 return ERR_PTR(-ENOMEM);
132 if (req->preallocated) {
133 atomic_dec(&fc->num_waiting);
134 list_add(&req->list, &fc->unused_list);
135 } else
136 fuse_request_free(req);
137 106
138 /* If we are in debt decrease that first */ 107 atomic_inc(&fc->num_waiting);
139 if (fc->outstanding_debt) 108 fuse_request_init(req);
140 fc->outstanding_debt--; 109 req->in.h.uid = current->fsuid;
141 else 110 req->in.h.gid = current->fsgid;
142 up(&fc->outstanding_sem); 111 req->in.h.pid = current->pid;
112 return req;
143} 113}
144 114
145void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) 115void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
146{ 116{
147 if (atomic_dec_and_test(&req->count)) { 117 if (atomic_dec_and_test(&req->count)) {
148 spin_lock(&fuse_lock); 118 atomic_dec(&fc->num_waiting);
149 fuse_putback_request(fc, req); 119 fuse_request_free(req);
150 spin_unlock(&fuse_lock);
151 } 120 }
152} 121}
153 122
154static void fuse_put_request_locked(struct fuse_conn *fc, struct fuse_req *req) 123void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req)
155{
156 if (atomic_dec_and_test(&req->count))
157 fuse_putback_request(fc, req);
158}
159
160void fuse_release_background(struct fuse_req *req)
161{ 124{
162 iput(req->inode); 125 iput(req->inode);
163 iput(req->inode2); 126 iput(req->inode2);
164 if (req->file) 127 if (req->file)
165 fput(req->file); 128 fput(req->file);
166 spin_lock(&fuse_lock); 129 spin_lock(&fc->lock);
167 list_del(&req->bg_entry); 130 list_del(&req->bg_entry);
168 spin_unlock(&fuse_lock); 131 if (fc->num_background == FUSE_MAX_BACKGROUND) {
132 fc->blocked = 0;
133 wake_up_all(&fc->blocked_waitq);
134 }
135 fc->num_background--;
136 spin_unlock(&fc->lock);
169} 137}
170 138
171/* 139/*
@@ -184,23 +152,23 @@ void fuse_release_background(struct fuse_req *req)
184 * interrupted and put in the background, it will return with an error 152 * interrupted and put in the background, it will return with an error
185 * and hence never be reset and reused. 153 * and hence never be reset and reused.
186 * 154 *
187 * Called with fuse_lock, unlocks it 155 * Called with fc->lock, unlocks it
188 */ 156 */
189static void request_end(struct fuse_conn *fc, struct fuse_req *req) 157static void request_end(struct fuse_conn *fc, struct fuse_req *req)
190{ 158{
191 list_del(&req->list); 159 list_del(&req->list);
192 req->state = FUSE_REQ_FINISHED; 160 req->state = FUSE_REQ_FINISHED;
193 if (!req->background) { 161 if (!req->background) {
162 spin_unlock(&fc->lock);
194 wake_up(&req->waitq); 163 wake_up(&req->waitq);
195 fuse_put_request_locked(fc, req); 164 fuse_put_request(fc, req);
196 spin_unlock(&fuse_lock);
197 } else { 165 } else {
198 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; 166 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
199 req->end = NULL; 167 req->end = NULL;
200 spin_unlock(&fuse_lock); 168 spin_unlock(&fc->lock);
201 down_read(&fc->sbput_sem); 169 down_read(&fc->sbput_sem);
202 if (fc->mounted) 170 if (fc->mounted)
203 fuse_release_background(req); 171 fuse_release_background(fc, req);
204 up_read(&fc->sbput_sem); 172 up_read(&fc->sbput_sem);
205 if (end) 173 if (end)
206 end(fc, req); 174 end(fc, req);
@@ -242,6 +210,9 @@ static void background_request(struct fuse_conn *fc, struct fuse_req *req)
242{ 210{
243 req->background = 1; 211 req->background = 1;
244 list_add(&req->bg_entry, &fc->background); 212 list_add(&req->bg_entry, &fc->background);
213 fc->num_background++;
214 if (fc->num_background == FUSE_MAX_BACKGROUND)
215 fc->blocked = 1;
245 if (req->inode) 216 if (req->inode)
246 req->inode = igrab(req->inode); 217 req->inode = igrab(req->inode);
247 if (req->inode2) 218 if (req->inode2)
@@ -250,16 +221,16 @@ static void background_request(struct fuse_conn *fc, struct fuse_req *req)
250 get_file(req->file); 221 get_file(req->file);
251} 222}
252 223
253/* Called with fuse_lock held. Releases, and then reacquires it. */ 224/* Called with fc->lock held. Releases, and then reacquires it. */
254static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) 225static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
255{ 226{
256 sigset_t oldset; 227 sigset_t oldset;
257 228
258 spin_unlock(&fuse_lock); 229 spin_unlock(&fc->lock);
259 block_sigs(&oldset); 230 block_sigs(&oldset);
260 wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED); 231 wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
261 restore_sigs(&oldset); 232 restore_sigs(&oldset);
262 spin_lock(&fuse_lock); 233 spin_lock(&fc->lock);
263 if (req->state == FUSE_REQ_FINISHED && !req->interrupted) 234 if (req->state == FUSE_REQ_FINISHED && !req->interrupted)
264 return; 235 return;
265 236
@@ -273,9 +244,9 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
273 locked state, there mustn't be any filesystem 244 locked state, there mustn't be any filesystem
274 operation (e.g. page fault), since that could lead 245 operation (e.g. page fault), since that could lead
275 to deadlock */ 246 to deadlock */
276 spin_unlock(&fuse_lock); 247 spin_unlock(&fc->lock);
277 wait_event(req->waitq, !req->locked); 248 wait_event(req->waitq, !req->locked);
278 spin_lock(&fuse_lock); 249 spin_lock(&fc->lock);
279 } 250 }
280 if (req->state == FUSE_REQ_PENDING) { 251 if (req->state == FUSE_REQ_PENDING) {
281 list_del(&req->list); 252 list_del(&req->list);
@@ -304,19 +275,10 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
304 req->in.h.unique = fc->reqctr; 275 req->in.h.unique = fc->reqctr;
305 req->in.h.len = sizeof(struct fuse_in_header) + 276 req->in.h.len = sizeof(struct fuse_in_header) +
306 len_args(req->in.numargs, (struct fuse_arg *) req->in.args); 277 len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
307 if (!req->preallocated) {
308 /* If request is not preallocated (either FORGET or
309 RELEASE), then still decrease outstanding_sem, so
310 user can't open infinite number of files while not
311 processing the RELEASE requests. However for
312 efficiency do it without blocking, so if down()
313 would block, just increase the debt instead */
314 if (down_trylock(&fc->outstanding_sem))
315 fc->outstanding_debt++;
316 }
317 list_add_tail(&req->list, &fc->pending); 278 list_add_tail(&req->list, &fc->pending);
318 req->state = FUSE_REQ_PENDING; 279 req->state = FUSE_REQ_PENDING;
319 wake_up(&fc->waitq); 280 wake_up(&fc->waitq);
281 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
320} 282}
321 283
322/* 284/*
@@ -325,7 +287,7 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
325void request_send(struct fuse_conn *fc, struct fuse_req *req) 287void request_send(struct fuse_conn *fc, struct fuse_req *req)
326{ 288{
327 req->isreply = 1; 289 req->isreply = 1;
328 spin_lock(&fuse_lock); 290 spin_lock(&fc->lock);
329 if (!fc->connected) 291 if (!fc->connected)
330 req->out.h.error = -ENOTCONN; 292 req->out.h.error = -ENOTCONN;
331 else if (fc->conn_error) 293 else if (fc->conn_error)
@@ -338,15 +300,16 @@ void request_send(struct fuse_conn *fc, struct fuse_req *req)
338 300
339 request_wait_answer(fc, req); 301 request_wait_answer(fc, req);
340 } 302 }
341 spin_unlock(&fuse_lock); 303 spin_unlock(&fc->lock);
342} 304}
343 305
344static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) 306static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
345{ 307{
346 spin_lock(&fuse_lock); 308 spin_lock(&fc->lock);
309 background_request(fc, req);
347 if (fc->connected) { 310 if (fc->connected) {
348 queue_request(fc, req); 311 queue_request(fc, req);
349 spin_unlock(&fuse_lock); 312 spin_unlock(&fc->lock);
350 } else { 313 } else {
351 req->out.h.error = -ENOTCONN; 314 req->out.h.error = -ENOTCONN;
352 request_end(fc, req); 315 request_end(fc, req);
@@ -362,9 +325,6 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
362void request_send_background(struct fuse_conn *fc, struct fuse_req *req) 325void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
363{ 326{
364 req->isreply = 1; 327 req->isreply = 1;
365 spin_lock(&fuse_lock);
366 background_request(fc, req);
367 spin_unlock(&fuse_lock);
368 request_send_nowait(fc, req); 328 request_send_nowait(fc, req);
369} 329}
370 330
@@ -373,16 +333,16 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
373 * anything that could cause a page-fault. If the request was already 333 * anything that could cause a page-fault. If the request was already
374 * interrupted bail out. 334 * interrupted bail out.
375 */ 335 */
376static int lock_request(struct fuse_req *req) 336static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
377{ 337{
378 int err = 0; 338 int err = 0;
379 if (req) { 339 if (req) {
380 spin_lock(&fuse_lock); 340 spin_lock(&fc->lock);
381 if (req->interrupted) 341 if (req->interrupted)
382 err = -ENOENT; 342 err = -ENOENT;
383 else 343 else
384 req->locked = 1; 344 req->locked = 1;
385 spin_unlock(&fuse_lock); 345 spin_unlock(&fc->lock);
386 } 346 }
387 return err; 347 return err;
388} 348}
@@ -392,18 +352,19 @@ static int lock_request(struct fuse_req *req)
392 * requester thread is currently waiting for it to be unlocked, so 352 * requester thread is currently waiting for it to be unlocked, so
393 * wake it up. 353 * wake it up.
394 */ 354 */
395static void unlock_request(struct fuse_req *req) 355static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
396{ 356{
397 if (req) { 357 if (req) {
398 spin_lock(&fuse_lock); 358 spin_lock(&fc->lock);
399 req->locked = 0; 359 req->locked = 0;
400 if (req->interrupted) 360 if (req->interrupted)
401 wake_up(&req->waitq); 361 wake_up(&req->waitq);
402 spin_unlock(&fuse_lock); 362 spin_unlock(&fc->lock);
403 } 363 }
404} 364}
405 365
406struct fuse_copy_state { 366struct fuse_copy_state {
367 struct fuse_conn *fc;
407 int write; 368 int write;
408 struct fuse_req *req; 369 struct fuse_req *req;
409 const struct iovec *iov; 370 const struct iovec *iov;
@@ -416,11 +377,12 @@ struct fuse_copy_state {
416 unsigned len; 377 unsigned len;
417}; 378};
418 379
419static void fuse_copy_init(struct fuse_copy_state *cs, int write, 380static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
420 struct fuse_req *req, const struct iovec *iov, 381 int write, struct fuse_req *req,
421 unsigned long nr_segs) 382 const struct iovec *iov, unsigned long nr_segs)
422{ 383{
423 memset(cs, 0, sizeof(*cs)); 384 memset(cs, 0, sizeof(*cs));
385 cs->fc = fc;
424 cs->write = write; 386 cs->write = write;
425 cs->req = req; 387 cs->req = req;
426 cs->iov = iov; 388 cs->iov = iov;
@@ -450,7 +412,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
450 unsigned long offset; 412 unsigned long offset;
451 int err; 413 int err;
452 414
453 unlock_request(cs->req); 415 unlock_request(cs->fc, cs->req);
454 fuse_copy_finish(cs); 416 fuse_copy_finish(cs);
455 if (!cs->seglen) { 417 if (!cs->seglen) {
456 BUG_ON(!cs->nr_segs); 418 BUG_ON(!cs->nr_segs);
@@ -473,7 +435,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
473 cs->seglen -= cs->len; 435 cs->seglen -= cs->len;
474 cs->addr += cs->len; 436 cs->addr += cs->len;
475 437
476 return lock_request(cs->req); 438 return lock_request(cs->fc, cs->req);
477} 439}
478 440
479/* Do as much copy to/from userspace buffer as we can */ 441/* Do as much copy to/from userspace buffer as we can */
@@ -585,9 +547,9 @@ static void request_wait(struct fuse_conn *fc)
585 if (signal_pending(current)) 547 if (signal_pending(current))
586 break; 548 break;
587 549
588 spin_unlock(&fuse_lock); 550 spin_unlock(&fc->lock);
589 schedule(); 551 schedule();
590 spin_lock(&fuse_lock); 552 spin_lock(&fc->lock);
591 } 553 }
592 set_current_state(TASK_RUNNING); 554 set_current_state(TASK_RUNNING);
593 remove_wait_queue(&fc->waitq, &wait); 555 remove_wait_queue(&fc->waitq, &wait);
@@ -606,18 +568,21 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
606 unsigned long nr_segs, loff_t *off) 568 unsigned long nr_segs, loff_t *off)
607{ 569{
608 int err; 570 int err;
609 struct fuse_conn *fc;
610 struct fuse_req *req; 571 struct fuse_req *req;
611 struct fuse_in *in; 572 struct fuse_in *in;
612 struct fuse_copy_state cs; 573 struct fuse_copy_state cs;
613 unsigned reqsize; 574 unsigned reqsize;
575 struct fuse_conn *fc = fuse_get_conn(file);
576 if (!fc)
577 return -EPERM;
614 578
615 restart: 579 restart:
616 spin_lock(&fuse_lock); 580 spin_lock(&fc->lock);
617 fc = file->private_data; 581 err = -EAGAIN;
618 err = -EPERM; 582 if ((file->f_flags & O_NONBLOCK) && fc->connected &&
619 if (!fc) 583 list_empty(&fc->pending))
620 goto err_unlock; 584 goto err_unlock;
585
621 request_wait(fc); 586 request_wait(fc);
622 err = -ENODEV; 587 err = -ENODEV;
623 if (!fc->connected) 588 if (!fc->connected)
@@ -641,14 +606,14 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
641 request_end(fc, req); 606 request_end(fc, req);
642 goto restart; 607 goto restart;
643 } 608 }
644 spin_unlock(&fuse_lock); 609 spin_unlock(&fc->lock);
645 fuse_copy_init(&cs, 1, req, iov, nr_segs); 610 fuse_copy_init(&cs, fc, 1, req, iov, nr_segs);
646 err = fuse_copy_one(&cs, &in->h, sizeof(in->h)); 611 err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
647 if (!err) 612 if (!err)
648 err = fuse_copy_args(&cs, in->numargs, in->argpages, 613 err = fuse_copy_args(&cs, in->numargs, in->argpages,
649 (struct fuse_arg *) in->args, 0); 614 (struct fuse_arg *) in->args, 0);
650 fuse_copy_finish(&cs); 615 fuse_copy_finish(&cs);
651 spin_lock(&fuse_lock); 616 spin_lock(&fc->lock);
652 req->locked = 0; 617 req->locked = 0;
653 if (!err && req->interrupted) 618 if (!err && req->interrupted)
654 err = -ENOENT; 619 err = -ENOENT;
@@ -663,12 +628,12 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
663 else { 628 else {
664 req->state = FUSE_REQ_SENT; 629 req->state = FUSE_REQ_SENT;
665 list_move_tail(&req->list, &fc->processing); 630 list_move_tail(&req->list, &fc->processing);
666 spin_unlock(&fuse_lock); 631 spin_unlock(&fc->lock);
667 } 632 }
668 return reqsize; 633 return reqsize;
669 634
670 err_unlock: 635 err_unlock:
671 spin_unlock(&fuse_lock); 636 spin_unlock(&fc->lock);
672 return err; 637 return err;
673} 638}
674 639
@@ -735,9 +700,9 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
735 struct fuse_copy_state cs; 700 struct fuse_copy_state cs;
736 struct fuse_conn *fc = fuse_get_conn(file); 701 struct fuse_conn *fc = fuse_get_conn(file);
737 if (!fc) 702 if (!fc)
738 return -ENODEV; 703 return -EPERM;
739 704
740 fuse_copy_init(&cs, 0, NULL, iov, nr_segs); 705 fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs);
741 if (nbytes < sizeof(struct fuse_out_header)) 706 if (nbytes < sizeof(struct fuse_out_header))
742 return -EINVAL; 707 return -EINVAL;
743 708
@@ -749,7 +714,7 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
749 oh.len != nbytes) 714 oh.len != nbytes)
750 goto err_finish; 715 goto err_finish;
751 716
752 spin_lock(&fuse_lock); 717 spin_lock(&fc->lock);
753 err = -ENOENT; 718 err = -ENOENT;
754 if (!fc->connected) 719 if (!fc->connected)
755 goto err_unlock; 720 goto err_unlock;
@@ -760,9 +725,9 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
760 goto err_unlock; 725 goto err_unlock;
761 726
762 if (req->interrupted) { 727 if (req->interrupted) {
763 spin_unlock(&fuse_lock); 728 spin_unlock(&fc->lock);
764 fuse_copy_finish(&cs); 729 fuse_copy_finish(&cs);
765 spin_lock(&fuse_lock); 730 spin_lock(&fc->lock);
766 request_end(fc, req); 731 request_end(fc, req);
767 return -ENOENT; 732 return -ENOENT;
768 } 733 }
@@ -770,12 +735,12 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
770 req->out.h = oh; 735 req->out.h = oh;
771 req->locked = 1; 736 req->locked = 1;
772 cs.req = req; 737 cs.req = req;
773 spin_unlock(&fuse_lock); 738 spin_unlock(&fc->lock);
774 739
775 err = copy_out_args(&cs, &req->out, nbytes); 740 err = copy_out_args(&cs, &req->out, nbytes);
776 fuse_copy_finish(&cs); 741 fuse_copy_finish(&cs);
777 742
778 spin_lock(&fuse_lock); 743 spin_lock(&fc->lock);
779 req->locked = 0; 744 req->locked = 0;
780 if (!err) { 745 if (!err) {
781 if (req->interrupted) 746 if (req->interrupted)
@@ -787,7 +752,7 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
787 return err ? err : nbytes; 752 return err ? err : nbytes;
788 753
789 err_unlock: 754 err_unlock:
790 spin_unlock(&fuse_lock); 755 spin_unlock(&fc->lock);
791 err_finish: 756 err_finish:
792 fuse_copy_finish(&cs); 757 fuse_copy_finish(&cs);
793 return err; 758 return err;
@@ -804,18 +769,19 @@ static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
804 769
805static unsigned fuse_dev_poll(struct file *file, poll_table *wait) 770static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
806{ 771{
807 struct fuse_conn *fc = fuse_get_conn(file);
808 unsigned mask = POLLOUT | POLLWRNORM; 772 unsigned mask = POLLOUT | POLLWRNORM;
809 773 struct fuse_conn *fc = fuse_get_conn(file);
810 if (!fc) 774 if (!fc)
811 return -ENODEV; 775 return POLLERR;
812 776
813 poll_wait(file, &fc->waitq, wait); 777 poll_wait(file, &fc->waitq, wait);
814 778
815 spin_lock(&fuse_lock); 779 spin_lock(&fc->lock);
816 if (!list_empty(&fc->pending)) 780 if (!fc->connected)
817 mask |= POLLIN | POLLRDNORM; 781 mask = POLLERR;
818 spin_unlock(&fuse_lock); 782 else if (!list_empty(&fc->pending))
783 mask |= POLLIN | POLLRDNORM;
784 spin_unlock(&fc->lock);
819 785
820 return mask; 786 return mask;
821} 787}
@@ -823,7 +789,7 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
823/* 789/*
824 * Abort all requests on the given list (pending or processing) 790 * Abort all requests on the given list (pending or processing)
825 * 791 *
826 * This function releases and reacquires fuse_lock 792 * This function releases and reacquires fc->lock
827 */ 793 */
828static void end_requests(struct fuse_conn *fc, struct list_head *head) 794static void end_requests(struct fuse_conn *fc, struct list_head *head)
829{ 795{
@@ -832,7 +798,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
832 req = list_entry(head->next, struct fuse_req, list); 798 req = list_entry(head->next, struct fuse_req, list);
833 req->out.h.error = -ECONNABORTED; 799 req->out.h.error = -ECONNABORTED;
834 request_end(fc, req); 800 request_end(fc, req);
835 spin_lock(&fuse_lock); 801 spin_lock(&fc->lock);
836 } 802 }
837} 803}
838 804
@@ -863,10 +829,10 @@ static void end_io_requests(struct fuse_conn *fc)
863 req->end = NULL; 829 req->end = NULL;
864 /* The end function will consume this reference */ 830 /* The end function will consume this reference */
865 __fuse_get_request(req); 831 __fuse_get_request(req);
866 spin_unlock(&fuse_lock); 832 spin_unlock(&fc->lock);
867 wait_event(req->waitq, !req->locked); 833 wait_event(req->waitq, !req->locked);
868 end(fc, req); 834 end(fc, req);
869 spin_lock(&fuse_lock); 835 spin_lock(&fc->lock);
870 } 836 }
871 } 837 }
872} 838}
@@ -893,35 +859,44 @@ static void end_io_requests(struct fuse_conn *fc)
893 */ 859 */
894void fuse_abort_conn(struct fuse_conn *fc) 860void fuse_abort_conn(struct fuse_conn *fc)
895{ 861{
896 spin_lock(&fuse_lock); 862 spin_lock(&fc->lock);
897 if (fc->connected) { 863 if (fc->connected) {
898 fc->connected = 0; 864 fc->connected = 0;
899 end_io_requests(fc); 865 end_io_requests(fc);
900 end_requests(fc, &fc->pending); 866 end_requests(fc, &fc->pending);
901 end_requests(fc, &fc->processing); 867 end_requests(fc, &fc->processing);
902 wake_up_all(&fc->waitq); 868 wake_up_all(&fc->waitq);
869 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
903 } 870 }
904 spin_unlock(&fuse_lock); 871 spin_unlock(&fc->lock);
905} 872}
906 873
907static int fuse_dev_release(struct inode *inode, struct file *file) 874static int fuse_dev_release(struct inode *inode, struct file *file)
908{ 875{
909 struct fuse_conn *fc; 876 struct fuse_conn *fc = fuse_get_conn(file);
910
911 spin_lock(&fuse_lock);
912 fc = file->private_data;
913 if (fc) { 877 if (fc) {
878 spin_lock(&fc->lock);
914 fc->connected = 0; 879 fc->connected = 0;
915 end_requests(fc, &fc->pending); 880 end_requests(fc, &fc->pending);
916 end_requests(fc, &fc->processing); 881 end_requests(fc, &fc->processing);
917 } 882 spin_unlock(&fc->lock);
918 spin_unlock(&fuse_lock); 883 fasync_helper(-1, file, 0, &fc->fasync);
919 if (fc)
920 kobject_put(&fc->kobj); 884 kobject_put(&fc->kobj);
885 }
921 886
922 return 0; 887 return 0;
923} 888}
924 889
890static int fuse_dev_fasync(int fd, struct file *file, int on)
891{
892 struct fuse_conn *fc = fuse_get_conn(file);
893 if (!fc)
894 return -EPERM;
895
896 /* No locking - fasync_helper does its own locking */
897 return fasync_helper(fd, file, on, &fc->fasync);
898}
899
925const struct file_operations fuse_dev_operations = { 900const struct file_operations fuse_dev_operations = {
926 .owner = THIS_MODULE, 901 .owner = THIS_MODULE,
927 .llseek = no_llseek, 902 .llseek = no_llseek,
@@ -931,6 +906,7 @@ const struct file_operations fuse_dev_operations = {
931 .writev = fuse_dev_writev, 906 .writev = fuse_dev_writev,
932 .poll = fuse_dev_poll, 907 .poll = fuse_dev_poll,
933 .release = fuse_dev_release, 908 .release = fuse_dev_release,
909 .fasync = fuse_dev_fasync,
934}; 910};
935 911
936static struct miscdevice fuse_miscdevice = { 912static struct miscdevice fuse_miscdevice = {
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 256355b80256..8d7546e832e8 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -117,8 +117,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
117 return 0; 117 return 0;
118 118
119 fc = get_fuse_conn(inode); 119 fc = get_fuse_conn(inode);
120 req = fuse_get_request(fc); 120 req = fuse_get_req(fc);
121 if (!req) 121 if (IS_ERR(req))
122 return 0; 122 return 0;
123 123
124 fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg); 124 fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg);
@@ -188,9 +188,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
188 if (entry->d_name.len > FUSE_NAME_MAX) 188 if (entry->d_name.len > FUSE_NAME_MAX)
189 return ERR_PTR(-ENAMETOOLONG); 189 return ERR_PTR(-ENAMETOOLONG);
190 190
191 req = fuse_get_request(fc); 191 req = fuse_get_req(fc);
192 if (!req) 192 if (IS_ERR(req))
193 return ERR_PTR(-EINTR); 193 return ERR_PTR(PTR_ERR(req));
194 194
195 fuse_lookup_init(req, dir, entry, &outarg); 195 fuse_lookup_init(req, dir, entry, &outarg);
196 request_send(fc, req); 196 request_send(fc, req);
@@ -244,15 +244,14 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
244 struct file *file; 244 struct file *file;
245 int flags = nd->intent.open.flags - 1; 245 int flags = nd->intent.open.flags - 1;
246 246
247 err = -ENOSYS;
248 if (fc->no_create) 247 if (fc->no_create)
249 goto out; 248 return -ENOSYS;
250 249
251 err = -EINTR; 250 req = fuse_get_req(fc);
252 req = fuse_get_request(fc); 251 if (IS_ERR(req))
253 if (!req) 252 return PTR_ERR(req);
254 goto out;
255 253
254 err = -ENOMEM;
256 ff = fuse_file_alloc(); 255 ff = fuse_file_alloc();
257 if (!ff) 256 if (!ff)
258 goto out_put_request; 257 goto out_put_request;
@@ -314,7 +313,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
314 fuse_file_free(ff); 313 fuse_file_free(ff);
315 out_put_request: 314 out_put_request:
316 fuse_put_request(fc, req); 315 fuse_put_request(fc, req);
317 out:
318 return err; 316 return err;
319} 317}
320 318
@@ -375,9 +373,9 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
375{ 373{
376 struct fuse_mknod_in inarg; 374 struct fuse_mknod_in inarg;
377 struct fuse_conn *fc = get_fuse_conn(dir); 375 struct fuse_conn *fc = get_fuse_conn(dir);
378 struct fuse_req *req = fuse_get_request(fc); 376 struct fuse_req *req = fuse_get_req(fc);
379 if (!req) 377 if (IS_ERR(req))
380 return -EINTR; 378 return PTR_ERR(req);
381 379
382 memset(&inarg, 0, sizeof(inarg)); 380 memset(&inarg, 0, sizeof(inarg));
383 inarg.mode = mode; 381 inarg.mode = mode;
@@ -407,9 +405,9 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
407{ 405{
408 struct fuse_mkdir_in inarg; 406 struct fuse_mkdir_in inarg;
409 struct fuse_conn *fc = get_fuse_conn(dir); 407 struct fuse_conn *fc = get_fuse_conn(dir);
410 struct fuse_req *req = fuse_get_request(fc); 408 struct fuse_req *req = fuse_get_req(fc);
411 if (!req) 409 if (IS_ERR(req))
412 return -EINTR; 410 return PTR_ERR(req);
413 411
414 memset(&inarg, 0, sizeof(inarg)); 412 memset(&inarg, 0, sizeof(inarg));
415 inarg.mode = mode; 413 inarg.mode = mode;
@@ -427,9 +425,9 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
427{ 425{
428 struct fuse_conn *fc = get_fuse_conn(dir); 426 struct fuse_conn *fc = get_fuse_conn(dir);
429 unsigned len = strlen(link) + 1; 427 unsigned len = strlen(link) + 1;
430 struct fuse_req *req = fuse_get_request(fc); 428 struct fuse_req *req = fuse_get_req(fc);
431 if (!req) 429 if (IS_ERR(req))
432 return -EINTR; 430 return PTR_ERR(req);
433 431
434 req->in.h.opcode = FUSE_SYMLINK; 432 req->in.h.opcode = FUSE_SYMLINK;
435 req->in.numargs = 2; 433 req->in.numargs = 2;
@@ -444,9 +442,9 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
444{ 442{
445 int err; 443 int err;
446 struct fuse_conn *fc = get_fuse_conn(dir); 444 struct fuse_conn *fc = get_fuse_conn(dir);
447 struct fuse_req *req = fuse_get_request(fc); 445 struct fuse_req *req = fuse_get_req(fc);
448 if (!req) 446 if (IS_ERR(req))
449 return -EINTR; 447 return PTR_ERR(req);
450 448
451 req->in.h.opcode = FUSE_UNLINK; 449 req->in.h.opcode = FUSE_UNLINK;
452 req->in.h.nodeid = get_node_id(dir); 450 req->in.h.nodeid = get_node_id(dir);
@@ -476,9 +474,9 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
476{ 474{
477 int err; 475 int err;
478 struct fuse_conn *fc = get_fuse_conn(dir); 476 struct fuse_conn *fc = get_fuse_conn(dir);
479 struct fuse_req *req = fuse_get_request(fc); 477 struct fuse_req *req = fuse_get_req(fc);
480 if (!req) 478 if (IS_ERR(req))
481 return -EINTR; 479 return PTR_ERR(req);
482 480
483 req->in.h.opcode = FUSE_RMDIR; 481 req->in.h.opcode = FUSE_RMDIR;
484 req->in.h.nodeid = get_node_id(dir); 482 req->in.h.nodeid = get_node_id(dir);
@@ -504,9 +502,9 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
504 int err; 502 int err;
505 struct fuse_rename_in inarg; 503 struct fuse_rename_in inarg;
506 struct fuse_conn *fc = get_fuse_conn(olddir); 504 struct fuse_conn *fc = get_fuse_conn(olddir);
507 struct fuse_req *req = fuse_get_request(fc); 505 struct fuse_req *req = fuse_get_req(fc);
508 if (!req) 506 if (IS_ERR(req))
509 return -EINTR; 507 return PTR_ERR(req);
510 508
511 memset(&inarg, 0, sizeof(inarg)); 509 memset(&inarg, 0, sizeof(inarg));
512 inarg.newdir = get_node_id(newdir); 510 inarg.newdir = get_node_id(newdir);
@@ -553,9 +551,9 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
553 struct fuse_link_in inarg; 551 struct fuse_link_in inarg;
554 struct inode *inode = entry->d_inode; 552 struct inode *inode = entry->d_inode;
555 struct fuse_conn *fc = get_fuse_conn(inode); 553 struct fuse_conn *fc = get_fuse_conn(inode);
556 struct fuse_req *req = fuse_get_request(fc); 554 struct fuse_req *req = fuse_get_req(fc);
557 if (!req) 555 if (IS_ERR(req))
558 return -EINTR; 556 return PTR_ERR(req);
559 557
560 memset(&inarg, 0, sizeof(inarg)); 558 memset(&inarg, 0, sizeof(inarg));
561 inarg.oldnodeid = get_node_id(inode); 559 inarg.oldnodeid = get_node_id(inode);
@@ -583,9 +581,9 @@ int fuse_do_getattr(struct inode *inode)
583 int err; 581 int err;
584 struct fuse_attr_out arg; 582 struct fuse_attr_out arg;
585 struct fuse_conn *fc = get_fuse_conn(inode); 583 struct fuse_conn *fc = get_fuse_conn(inode);
586 struct fuse_req *req = fuse_get_request(fc); 584 struct fuse_req *req = fuse_get_req(fc);
587 if (!req) 585 if (IS_ERR(req))
588 return -EINTR; 586 return PTR_ERR(req);
589 587
590 req->in.h.opcode = FUSE_GETATTR; 588 req->in.h.opcode = FUSE_GETATTR;
591 req->in.h.nodeid = get_node_id(inode); 589 req->in.h.nodeid = get_node_id(inode);
@@ -673,9 +671,9 @@ static int fuse_access(struct inode *inode, int mask)
673 if (fc->no_access) 671 if (fc->no_access)
674 return 0; 672 return 0;
675 673
676 req = fuse_get_request(fc); 674 req = fuse_get_req(fc);
677 if (!req) 675 if (IS_ERR(req))
678 return -EINTR; 676 return PTR_ERR(req);
679 677
680 memset(&inarg, 0, sizeof(inarg)); 678 memset(&inarg, 0, sizeof(inarg));
681 inarg.mask = mask; 679 inarg.mask = mask;
@@ -780,9 +778,9 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
780 if (is_bad_inode(inode)) 778 if (is_bad_inode(inode))
781 return -EIO; 779 return -EIO;
782 780
783 req = fuse_get_request(fc); 781 req = fuse_get_req(fc);
784 if (!req) 782 if (IS_ERR(req))
785 return -EINTR; 783 return PTR_ERR(req);
786 784
787 page = alloc_page(GFP_KERNEL); 785 page = alloc_page(GFP_KERNEL);
788 if (!page) { 786 if (!page) {
@@ -809,11 +807,11 @@ static char *read_link(struct dentry *dentry)
809{ 807{
810 struct inode *inode = dentry->d_inode; 808 struct inode *inode = dentry->d_inode;
811 struct fuse_conn *fc = get_fuse_conn(inode); 809 struct fuse_conn *fc = get_fuse_conn(inode);
812 struct fuse_req *req = fuse_get_request(fc); 810 struct fuse_req *req = fuse_get_req(fc);
813 char *link; 811 char *link;
814 812
815 if (!req) 813 if (IS_ERR(req))
816 return ERR_PTR(-EINTR); 814 return ERR_PTR(PTR_ERR(req));
817 815
818 link = (char *) __get_free_page(GFP_KERNEL); 816 link = (char *) __get_free_page(GFP_KERNEL);
819 if (!link) { 817 if (!link) {
@@ -933,9 +931,9 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
933 } 931 }
934 } 932 }
935 933
936 req = fuse_get_request(fc); 934 req = fuse_get_req(fc);
937 if (!req) 935 if (IS_ERR(req))
938 return -EINTR; 936 return PTR_ERR(req);
939 937
940 memset(&inarg, 0, sizeof(inarg)); 938 memset(&inarg, 0, sizeof(inarg));
941 iattr_to_fattr(attr, &inarg); 939 iattr_to_fattr(attr, &inarg);
@@ -995,9 +993,9 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
995 if (fc->no_setxattr) 993 if (fc->no_setxattr)
996 return -EOPNOTSUPP; 994 return -EOPNOTSUPP;
997 995
998 req = fuse_get_request(fc); 996 req = fuse_get_req(fc);
999 if (!req) 997 if (IS_ERR(req))
1000 return -EINTR; 998 return PTR_ERR(req);
1001 999
1002 memset(&inarg, 0, sizeof(inarg)); 1000 memset(&inarg, 0, sizeof(inarg));
1003 inarg.size = size; 1001 inarg.size = size;
@@ -1035,9 +1033,9 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
1035 if (fc->no_getxattr) 1033 if (fc->no_getxattr)
1036 return -EOPNOTSUPP; 1034 return -EOPNOTSUPP;
1037 1035
1038 req = fuse_get_request(fc); 1036 req = fuse_get_req(fc);
1039 if (!req) 1037 if (IS_ERR(req))
1040 return -EINTR; 1038 return PTR_ERR(req);
1041 1039
1042 memset(&inarg, 0, sizeof(inarg)); 1040 memset(&inarg, 0, sizeof(inarg));
1043 inarg.size = size; 1041 inarg.size = size;
@@ -1085,9 +1083,9 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
1085 if (fc->no_listxattr) 1083 if (fc->no_listxattr)
1086 return -EOPNOTSUPP; 1084 return -EOPNOTSUPP;
1087 1085
1088 req = fuse_get_request(fc); 1086 req = fuse_get_req(fc);
1089 if (!req) 1087 if (IS_ERR(req))
1090 return -EINTR; 1088 return PTR_ERR(req);
1091 1089
1092 memset(&inarg, 0, sizeof(inarg)); 1090 memset(&inarg, 0, sizeof(inarg));
1093 inarg.size = size; 1091 inarg.size = size;
@@ -1131,9 +1129,9 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
1131 if (fc->no_removexattr) 1129 if (fc->no_removexattr)
1132 return -EOPNOTSUPP; 1130 return -EOPNOTSUPP;
1133 1131
1134 req = fuse_get_request(fc); 1132 req = fuse_get_req(fc);
1135 if (!req) 1133 if (IS_ERR(req))
1136 return -EINTR; 1134 return PTR_ERR(req);
1137 1135
1138 req->in.h.opcode = FUSE_REMOVEXATTR; 1136 req->in.h.opcode = FUSE_REMOVEXATTR;
1139 req->in.h.nodeid = get_node_id(inode); 1137 req->in.h.nodeid = get_node_id(inode);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 975f2697e866..e4f041a11bb5 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -22,9 +22,9 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
22 struct fuse_req *req; 22 struct fuse_req *req;
23 int err; 23 int err;
24 24
25 req = fuse_get_request(fc); 25 req = fuse_get_req(fc);
26 if (!req) 26 if (IS_ERR(req))
27 return -EINTR; 27 return PTR_ERR(req);
28 28
29 memset(&inarg, 0, sizeof(inarg)); 29 memset(&inarg, 0, sizeof(inarg));
30 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 30 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
@@ -184,9 +184,9 @@ static int fuse_flush(struct file *file)
184 if (fc->no_flush) 184 if (fc->no_flush)
185 return 0; 185 return 0;
186 186
187 req = fuse_get_request(fc); 187 req = fuse_get_req(fc);
188 if (!req) 188 if (IS_ERR(req))
189 return -EINTR; 189 return PTR_ERR(req);
190 190
191 memset(&inarg, 0, sizeof(inarg)); 191 memset(&inarg, 0, sizeof(inarg));
192 inarg.fh = ff->fh; 192 inarg.fh = ff->fh;
@@ -223,9 +223,9 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
223 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) 223 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
224 return 0; 224 return 0;
225 225
226 req = fuse_get_request(fc); 226 req = fuse_get_req(fc);
227 if (!req) 227 if (IS_ERR(req))
228 return -EINTR; 228 return PTR_ERR(req);
229 229
230 memset(&inarg, 0, sizeof(inarg)); 230 memset(&inarg, 0, sizeof(inarg));
231 inarg.fh = ff->fh; 231 inarg.fh = ff->fh;
@@ -297,9 +297,9 @@ static int fuse_readpage(struct file *file, struct page *page)
297 if (is_bad_inode(inode)) 297 if (is_bad_inode(inode))
298 goto out; 298 goto out;
299 299
300 err = -EINTR; 300 req = fuse_get_req(fc);
301 req = fuse_get_request(fc); 301 err = PTR_ERR(req);
302 if (!req) 302 if (IS_ERR(req))
303 goto out; 303 goto out;
304 304
305 req->out.page_zeroing = 1; 305 req->out.page_zeroing = 1;
@@ -368,10 +368,10 @@ static int fuse_readpages_fill(void *_data, struct page *page)
368 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || 368 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
369 req->pages[req->num_pages - 1]->index + 1 != page->index)) { 369 req->pages[req->num_pages - 1]->index + 1 != page->index)) {
370 fuse_send_readpages(req, data->file, inode); 370 fuse_send_readpages(req, data->file, inode);
371 data->req = req = fuse_get_request(fc); 371 data->req = req = fuse_get_req(fc);
372 if (!req) { 372 if (IS_ERR(req)) {
373 unlock_page(page); 373 unlock_page(page);
374 return -EINTR; 374 return PTR_ERR(req);
375 } 375 }
376 } 376 }
377 req->pages[req->num_pages] = page; 377 req->pages[req->num_pages] = page;
@@ -392,13 +392,17 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
392 392
393 data.file = file; 393 data.file = file;
394 data.inode = inode; 394 data.inode = inode;
395 data.req = fuse_get_request(fc); 395 data.req = fuse_get_req(fc);
396 if (!data.req) 396 if (IS_ERR(data.req))
397 return -EINTR; 397 return PTR_ERR(data.req);
398 398
399 err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); 399 err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
400 if (!err) 400 if (!err) {
401 fuse_send_readpages(data.req, file, inode); 401 if (data.req->num_pages)
402 fuse_send_readpages(data.req, file, inode);
403 else
404 fuse_put_request(fc, data.req);
405 }
402 return err; 406 return err;
403} 407}
404 408
@@ -451,9 +455,9 @@ static int fuse_commit_write(struct file *file, struct page *page,
451 if (is_bad_inode(inode)) 455 if (is_bad_inode(inode))
452 return -EIO; 456 return -EIO;
453 457
454 req = fuse_get_request(fc); 458 req = fuse_get_req(fc);
455 if (!req) 459 if (IS_ERR(req))
456 return -EINTR; 460 return PTR_ERR(req);
457 461
458 req->num_pages = 1; 462 req->num_pages = 1;
459 req->pages[0] = page; 463 req->pages[0] = page;
@@ -528,9 +532,9 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
528 if (is_bad_inode(inode)) 532 if (is_bad_inode(inode))
529 return -EIO; 533 return -EIO;
530 534
531 req = fuse_get_request(fc); 535 req = fuse_get_req(fc);
532 if (!req) 536 if (IS_ERR(req))
533 return -EINTR; 537 return PTR_ERR(req);
534 538
535 while (count) { 539 while (count) {
536 size_t nres; 540 size_t nres;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index a16a04fcf41e..19c7185a7546 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -18,8 +18,8 @@
18/** Max number of pages that can be used in a single read request */ 18/** Max number of pages that can be used in a single read request */
19#define FUSE_MAX_PAGES_PER_REQ 32 19#define FUSE_MAX_PAGES_PER_REQ 32
20 20
21/** If more requests are outstanding, then the operation will block */ 21/** Maximum number of outstanding background requests */
22#define FUSE_MAX_OUTSTANDING 10 22#define FUSE_MAX_BACKGROUND 10
23 23
24/** It could be as large as PATH_MAX, but would that have any uses? */ 24/** It could be as large as PATH_MAX, but would that have any uses? */
25#define FUSE_NAME_MAX 1024 25#define FUSE_NAME_MAX 1024
@@ -131,8 +131,8 @@ struct fuse_conn;
131 * A request to the client 131 * A request to the client
132 */ 132 */
133struct fuse_req { 133struct fuse_req {
134 /** This can be on either unused_list, pending processing or 134 /** This can be on either pending processing or io lists in
135 io lists in fuse_conn */ 135 fuse_conn */
136 struct list_head list; 136 struct list_head list;
137 137
138 /** Entry on the background list */ 138 /** Entry on the background list */
@@ -144,15 +144,12 @@ struct fuse_req {
144 /* 144 /*
145 * The following bitfields are either set once before the 145 * The following bitfields are either set once before the
146 * request is queued or setting/clearing them is protected by 146 * request is queued or setting/clearing them is protected by
147 * fuse_lock 147 * fuse_conn->lock
148 */ 148 */
149 149
150 /** True if the request has reply */ 150 /** True if the request has reply */
151 unsigned isreply:1; 151 unsigned isreply:1;
152 152
153 /** The request is preallocated */
154 unsigned preallocated:1;
155
156 /** The request was interrupted */ 153 /** The request was interrupted */
157 unsigned interrupted:1; 154 unsigned interrupted:1;
158 155
@@ -213,6 +210,9 @@ struct fuse_req {
213 * unmounted. 210 * unmounted.
214 */ 211 */
215struct fuse_conn { 212struct fuse_conn {
213 /** Lock protecting accessess to members of this structure */
214 spinlock_t lock;
215
216 /** The user id for this mount */ 216 /** The user id for this mount */
217 uid_t user_id; 217 uid_t user_id;
218 218
@@ -244,19 +244,20 @@ struct fuse_conn {
244 interrupted request) */ 244 interrupted request) */
245 struct list_head background; 245 struct list_head background;
246 246
247 /** Controls the maximum number of outstanding requests */ 247 /** Number of requests currently in the background */
248 struct semaphore outstanding_sem; 248 unsigned num_background;
249
250 /** Flag indicating if connection is blocked. This will be
251 the case before the INIT reply is received, and if there
252 are too many outstading backgrounds requests */
253 int blocked;
249 254
250 /** This counts the number of outstanding requests if 255 /** waitq for blocked connection */
251 outstanding_sem would go negative */ 256 wait_queue_head_t blocked_waitq;
252 unsigned outstanding_debt;
253 257
254 /** RW semaphore for exclusion with fuse_put_super() */ 258 /** RW semaphore for exclusion with fuse_put_super() */
255 struct rw_semaphore sbput_sem; 259 struct rw_semaphore sbput_sem;
256 260
257 /** The list of unused requests */
258 struct list_head unused_list;
259
260 /** The next unique request id */ 261 /** The next unique request id */
261 u64 reqctr; 262 u64 reqctr;
262 263
@@ -318,6 +319,9 @@ struct fuse_conn {
318 319
319 /** kobject */ 320 /** kobject */
320 struct kobject kobj; 321 struct kobject kobj;
322
323 /** O_ASYNC requests */
324 struct fasync_struct *fasync;
321}; 325};
322 326
323static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) 327static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -349,21 +353,6 @@ static inline u64 get_node_id(struct inode *inode)
349extern const struct file_operations fuse_dev_operations; 353extern const struct file_operations fuse_dev_operations;
350 354
351/** 355/**
352 * This is the single global spinlock which protects FUSE's structures
353 *
354 * The following data is protected by this lock:
355 *
356 * - the private_data field of the device file
357 * - the s_fs_info field of the super block
358 * - unused_list, pending, processing lists in fuse_conn
359 * - background list in fuse_conn
360 * - the unique request ID counter reqctr in fuse_conn
361 * - the sb (super_block) field in fuse_conn
362 * - the file (device file) field in fuse_conn
363 */
364extern spinlock_t fuse_lock;
365
366/**
367 * Get a filled in inode 356 * Get a filled in inode
368 */ 357 */
369struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, 358struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
@@ -461,11 +450,11 @@ void fuse_reset_request(struct fuse_req *req);
461/** 450/**
462 * Reserve a preallocated request 451 * Reserve a preallocated request
463 */ 452 */
464struct fuse_req *fuse_get_request(struct fuse_conn *fc); 453struct fuse_req *fuse_get_req(struct fuse_conn *fc);
465 454
466/** 455/**
467 * Decrement reference count of a request. If count goes to zero put 456 * Decrement reference count of a request. If count goes to zero free
468 * on unused list (preallocated) or free request (not preallocated). 457 * the request.
469 */ 458 */
470void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req); 459void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
471 460
@@ -487,7 +476,7 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
487/** 476/**
488 * Release inodes and file associated with background request 477 * Release inodes and file associated with background request
489 */ 478 */
490void fuse_release_background(struct fuse_req *req); 479void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req);
491 480
492/* Abort all requests */ 481/* Abort all requests */
493void fuse_abort_conn(struct fuse_conn *fc); 482void fuse_abort_conn(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 879e6fba9480..fd34037b0588 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -22,7 +22,6 @@ MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
22MODULE_DESCRIPTION("Filesystem in Userspace"); 22MODULE_DESCRIPTION("Filesystem in Userspace");
23MODULE_LICENSE("GPL"); 23MODULE_LICENSE("GPL");
24 24
25spinlock_t fuse_lock;
26static kmem_cache_t *fuse_inode_cachep; 25static kmem_cache_t *fuse_inode_cachep;
27static struct subsystem connections_subsys; 26static struct subsystem connections_subsys;
28 27
@@ -207,15 +206,17 @@ static void fuse_put_super(struct super_block *sb)
207 206
208 down_write(&fc->sbput_sem); 207 down_write(&fc->sbput_sem);
209 while (!list_empty(&fc->background)) 208 while (!list_empty(&fc->background))
210 fuse_release_background(list_entry(fc->background.next, 209 fuse_release_background(fc,
210 list_entry(fc->background.next,
211 struct fuse_req, bg_entry)); 211 struct fuse_req, bg_entry));
212 212
213 spin_lock(&fuse_lock); 213 spin_lock(&fc->lock);
214 fc->mounted = 0; 214 fc->mounted = 0;
215 fc->connected = 0; 215 fc->connected = 0;
216 spin_unlock(&fuse_lock); 216 spin_unlock(&fc->lock);
217 up_write(&fc->sbput_sem); 217 up_write(&fc->sbput_sem);
218 /* Flush all readers on this fs */ 218 /* Flush all readers on this fs */
219 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
219 wake_up_all(&fc->waitq); 220 wake_up_all(&fc->waitq);
220 kobject_del(&fc->kobj); 221 kobject_del(&fc->kobj);
221 kobject_put(&fc->kobj); 222 kobject_put(&fc->kobj);
@@ -242,9 +243,9 @@ static int fuse_statfs(struct super_block *sb, struct kstatfs *buf)
242 struct fuse_statfs_out outarg; 243 struct fuse_statfs_out outarg;
243 int err; 244 int err;
244 245
245 req = fuse_get_request(fc); 246 req = fuse_get_req(fc);
246 if (!req) 247 if (IS_ERR(req))
247 return -EINTR; 248 return PTR_ERR(req);
248 249
249 memset(&outarg, 0, sizeof(outarg)); 250 memset(&outarg, 0, sizeof(outarg));
250 req->in.numargs = 0; 251 req->in.numargs = 0;
@@ -369,15 +370,7 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
369 370
370static void fuse_conn_release(struct kobject *kobj) 371static void fuse_conn_release(struct kobject *kobj)
371{ 372{
372 struct fuse_conn *fc = get_fuse_conn_kobj(kobj); 373 kfree(get_fuse_conn_kobj(kobj));
373
374 while (!list_empty(&fc->unused_list)) {
375 struct fuse_req *req;
376 req = list_entry(fc->unused_list.next, struct fuse_req, list);
377 list_del(&req->list);
378 fuse_request_free(req);
379 }
380 kfree(fc);
381} 374}
382 375
383static struct fuse_conn *new_conn(void) 376static struct fuse_conn *new_conn(void)
@@ -386,64 +379,25 @@ static struct fuse_conn *new_conn(void)
386 379
387 fc = kzalloc(sizeof(*fc), GFP_KERNEL); 380 fc = kzalloc(sizeof(*fc), GFP_KERNEL);
388 if (fc) { 381 if (fc) {
389 int i; 382 spin_lock_init(&fc->lock);
390 init_waitqueue_head(&fc->waitq); 383 init_waitqueue_head(&fc->waitq);
384 init_waitqueue_head(&fc->blocked_waitq);
391 INIT_LIST_HEAD(&fc->pending); 385 INIT_LIST_HEAD(&fc->pending);
392 INIT_LIST_HEAD(&fc->processing); 386 INIT_LIST_HEAD(&fc->processing);
393 INIT_LIST_HEAD(&fc->io); 387 INIT_LIST_HEAD(&fc->io);
394 INIT_LIST_HEAD(&fc->unused_list);
395 INIT_LIST_HEAD(&fc->background); 388 INIT_LIST_HEAD(&fc->background);
396 sema_init(&fc->outstanding_sem, 1); /* One for INIT */
397 init_rwsem(&fc->sbput_sem); 389 init_rwsem(&fc->sbput_sem);
398 kobj_set_kset_s(fc, connections_subsys); 390 kobj_set_kset_s(fc, connections_subsys);
399 kobject_init(&fc->kobj); 391 kobject_init(&fc->kobj);
400 atomic_set(&fc->num_waiting, 0); 392 atomic_set(&fc->num_waiting, 0);
401 for (i = 0; i < FUSE_MAX_OUTSTANDING; i++) {
402 struct fuse_req *req = fuse_request_alloc();
403 if (!req) {
404 kobject_put(&fc->kobj);
405 return NULL;
406 }
407 list_add(&req->list, &fc->unused_list);
408 }
409 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 393 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
410 fc->bdi.unplug_io_fn = default_unplug_io_fn; 394 fc->bdi.unplug_io_fn = default_unplug_io_fn;
411 fc->reqctr = 0; 395 fc->reqctr = 0;
396 fc->blocked = 1;
412 } 397 }
413 return fc; 398 return fc;
414} 399}
415 400
416static struct fuse_conn *get_conn(struct file *file, struct super_block *sb)
417{
418 struct fuse_conn *fc;
419 int err;
420
421 err = -EINVAL;
422 if (file->f_op != &fuse_dev_operations)
423 goto out_err;
424
425 err = -ENOMEM;
426 fc = new_conn();
427 if (!fc)
428 goto out_err;
429
430 spin_lock(&fuse_lock);
431 err = -EINVAL;
432 if (file->private_data)
433 goto out_unlock;
434
435 kobject_get(&fc->kobj);
436 file->private_data = fc;
437 spin_unlock(&fuse_lock);
438 return fc;
439
440 out_unlock:
441 spin_unlock(&fuse_lock);
442 kobject_put(&fc->kobj);
443 out_err:
444 return ERR_PTR(err);
445}
446
447static struct inode *get_root_inode(struct super_block *sb, unsigned mode) 401static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
448{ 402{
449 struct fuse_attr attr; 403 struct fuse_attr attr;
@@ -467,7 +421,6 @@ static struct super_operations fuse_super_operations = {
467 421
468static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) 422static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
469{ 423{
470 int i;
471 struct fuse_init_out *arg = &req->misc.init_out; 424 struct fuse_init_out *arg = &req->misc.init_out;
472 425
473 if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION) 426 if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION)
@@ -486,22 +439,13 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
486 fc->minor = arg->minor; 439 fc->minor = arg->minor;
487 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; 440 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
488 } 441 }
489
490 /* After INIT reply is received other requests can go
491 out. So do (FUSE_MAX_OUTSTANDING - 1) number of
492 up()s on outstanding_sem. The last up() is done in
493 fuse_putback_request() */
494 for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
495 up(&fc->outstanding_sem);
496
497 fuse_put_request(fc, req); 442 fuse_put_request(fc, req);
443 fc->blocked = 0;
444 wake_up_all(&fc->blocked_waitq);
498} 445}
499 446
500static void fuse_send_init(struct fuse_conn *fc) 447static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
501{ 448{
502 /* This is called from fuse_read_super() so there's guaranteed
503 to be exactly one request available */
504 struct fuse_req *req = fuse_get_request(fc);
505 struct fuse_init_in *arg = &req->misc.init_in; 449 struct fuse_init_in *arg = &req->misc.init_in;
506 450
507 arg->major = FUSE_KERNEL_VERSION; 451 arg->major = FUSE_KERNEL_VERSION;
@@ -525,12 +469,9 @@ static void fuse_send_init(struct fuse_conn *fc)
525 469
526static unsigned long long conn_id(void) 470static unsigned long long conn_id(void)
527{ 471{
472 /* BKL is held for ->get_sb() */
528 static unsigned long long ctr = 1; 473 static unsigned long long ctr = 1;
529 unsigned long long val; 474 return ctr++;
530 spin_lock(&fuse_lock);
531 val = ctr++;
532 spin_unlock(&fuse_lock);
533 return val;
534} 475}
535 476
536static int fuse_fill_super(struct super_block *sb, void *data, int silent) 477static int fuse_fill_super(struct super_block *sb, void *data, int silent)
@@ -540,6 +481,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
540 struct fuse_mount_data d; 481 struct fuse_mount_data d;
541 struct file *file; 482 struct file *file;
542 struct dentry *root_dentry; 483 struct dentry *root_dentry;
484 struct fuse_req *init_req;
543 int err; 485 int err;
544 486
545 if (!parse_fuse_opt((char *) data, &d)) 487 if (!parse_fuse_opt((char *) data, &d))
@@ -555,10 +497,17 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
555 if (!file) 497 if (!file)
556 return -EINVAL; 498 return -EINVAL;
557 499
558 fc = get_conn(file, sb); 500 if (file->f_op != &fuse_dev_operations)
559 fput(file); 501 return -EINVAL;
560 if (IS_ERR(fc)) 502
561 return PTR_ERR(fc); 503 /* Setting file->private_data can't race with other mount()
504 instances, since BKL is held for ->get_sb() */
505 if (file->private_data)
506 return -EINVAL;
507
508 fc = new_conn();
509 if (!fc)
510 return -ENOMEM;
562 511
563 fc->flags = d.flags; 512 fc->flags = d.flags;
564 fc->user_id = d.user_id; 513 fc->user_id = d.user_id;
@@ -579,27 +528,40 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
579 goto err; 528 goto err;
580 } 529 }
581 530
531 init_req = fuse_request_alloc();
532 if (!init_req)
533 goto err_put_root;
534
582 err = kobject_set_name(&fc->kobj, "%llu", conn_id()); 535 err = kobject_set_name(&fc->kobj, "%llu", conn_id());
583 if (err) 536 if (err)
584 goto err_put_root; 537 goto err_free_req;
585 538
586 err = kobject_add(&fc->kobj); 539 err = kobject_add(&fc->kobj);
587 if (err) 540 if (err)
588 goto err_put_root; 541 goto err_free_req;
589 542
590 sb->s_root = root_dentry; 543 sb->s_root = root_dentry;
591 spin_lock(&fuse_lock);
592 fc->mounted = 1; 544 fc->mounted = 1;
593 fc->connected = 1; 545 fc->connected = 1;
594 spin_unlock(&fuse_lock); 546 kobject_get(&fc->kobj);
547 file->private_data = fc;
548 /*
549 * atomic_dec_and_test() in fput() provides the necessary
550 * memory barrier for file->private_data to be visible on all
551 * CPUs after this
552 */
553 fput(file);
595 554
596 fuse_send_init(fc); 555 fuse_send_init(fc, init_req);
597 556
598 return 0; 557 return 0;
599 558
559 err_free_req:
560 fuse_request_free(init_req);
600 err_put_root: 561 err_put_root:
601 dput(root_dentry); 562 dput(root_dentry);
602 err: 563 err:
564 fput(file);
603 kobject_put(&fc->kobj); 565 kobject_put(&fc->kobj);
604 return err; 566 return err;
605} 567}
@@ -753,7 +715,6 @@ static int __init fuse_init(void)
753 printk("fuse init (API version %i.%i)\n", 715 printk("fuse init (API version %i.%i)\n",
754 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); 716 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
755 717
756 spin_lock_init(&fuse_lock);
757 res = fuse_fs_init(); 718 res = fuse_fs_init();
758 if (res) 719 if (res)
759 goto err; 720 goto err;
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 8f07e8fbd03d..746abc9ecf70 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -466,8 +466,7 @@ void hfs_bnode_unhash(struct hfs_bnode *node)
466 for (p = &node->tree->node_hash[hfs_bnode_hash(node->this)]; 466 for (p = &node->tree->node_hash[hfs_bnode_hash(node->this)];
467 *p && *p != node; p = &(*p)->next_hash) 467 *p && *p != node; p = &(*p)->next_hash)
468 ; 468 ;
469 if (!*p) 469 BUG_ON(!*p);
470 BUG();
471 *p = node->next_hash; 470 *p = node->next_hash;
472 node->tree->node_hash_cnt--; 471 node->tree->node_hash_cnt--;
473} 472}
@@ -622,8 +621,7 @@ void hfs_bnode_put(struct hfs_bnode *node)
622 621
623 dprint(DBG_BNODE_REFS, "put_node(%d:%d): %d\n", 622 dprint(DBG_BNODE_REFS, "put_node(%d:%d): %d\n",
624 node->tree->cnid, node->this, atomic_read(&node->refcnt)); 623 node->tree->cnid, node->this, atomic_read(&node->refcnt));
625 if (!atomic_read(&node->refcnt)) 624 BUG_ON(!atomic_read(&node->refcnt));
626 BUG();
627 if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock)) 625 if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock))
628 return; 626 return;
629 for (i = 0; i < tree->pages_per_bnode; i++) { 627 for (i = 0; i < tree->pages_per_bnode; i++) {
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index a67edfa34e9e..effa8991999c 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -269,8 +269,7 @@ void hfs_bmap_free(struct hfs_bnode *node)
269 u8 *data, byte, m; 269 u8 *data, byte, m;
270 270
271 dprint(DBG_BNODE_MOD, "btree_free_node: %u\n", node->this); 271 dprint(DBG_BNODE_MOD, "btree_free_node: %u\n", node->this);
272 if (!node->this) 272 BUG_ON(!node->this);
273 BUG();
274 tree = node->tree; 273 tree = node->tree;
275 nidx = node->this; 274 nidx = node->this;
276 node = hfs_bnode_find(tree, 0); 275 node = hfs_bnode_find(tree, 0);
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index 2ba20cdb5baa..5e6363be246f 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -216,10 +216,10 @@ static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
216static struct inode_operations hppfs_file_iops = { 216static struct inode_operations hppfs_file_iops = {
217}; 217};
218 218
219static ssize_t read_proc(struct file *file, char *buf, ssize_t count, 219static ssize_t read_proc(struct file *file, char __user *buf, ssize_t count,
220 loff_t *ppos, int is_user) 220 loff_t *ppos, int is_user)
221{ 221{
222 ssize_t (*read)(struct file *, char *, size_t, loff_t *); 222 ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
223 ssize_t n; 223 ssize_t n;
224 224
225 read = file->f_dentry->d_inode->i_fop->read; 225 read = file->f_dentry->d_inode->i_fop->read;
@@ -236,7 +236,7 @@ static ssize_t read_proc(struct file *file, char *buf, ssize_t count,
236 return n; 236 return n;
237} 237}
238 238
239static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count) 239static ssize_t hppfs_read_file(int fd, char __user *buf, ssize_t count)
240{ 240{
241 ssize_t n; 241 ssize_t n;
242 int cur, err; 242 int cur, err;
@@ -274,7 +274,7 @@ static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count)
274 return n; 274 return n;
275} 275}
276 276
277static ssize_t hppfs_read(struct file *file, char *buf, size_t count, 277static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count,
278 loff_t *ppos) 278 loff_t *ppos)
279{ 279{
280 struct hppfs_private *hppfs = file->private_data; 280 struct hppfs_private *hppfs = file->private_data;
@@ -313,12 +313,12 @@ static ssize_t hppfs_read(struct file *file, char *buf, size_t count,
313 return(count); 313 return(count);
314} 314}
315 315
316static ssize_t hppfs_write(struct file *file, const char *buf, size_t len, 316static ssize_t hppfs_write(struct file *file, const char __user *buf, size_t len,
317 loff_t *ppos) 317 loff_t *ppos)
318{ 318{
319 struct hppfs_private *data = file->private_data; 319 struct hppfs_private *data = file->private_data;
320 struct file *proc_file = data->proc_file; 320 struct file *proc_file = data->proc_file;
321 ssize_t (*write)(struct file *, const char *, size_t, loff_t *); 321 ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
322 int err; 322 int err;
323 323
324 write = proc_file->f_dentry->d_inode->i_fop->write; 324 write = proc_file->f_dentry->d_inode->i_fop->write;
@@ -658,7 +658,7 @@ static struct super_operations hppfs_sbops = {
658 .statfs = hppfs_statfs, 658 .statfs = hppfs_statfs,
659}; 659};
660 660
661static int hppfs_readlink(struct dentry *dentry, char *buffer, int buflen) 661static int hppfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
662{ 662{
663 struct file *proc_file; 663 struct file *proc_file;
664 struct dentry *proc_dentry; 664 struct dentry *proc_dentry;
diff --git a/fs/inode.c b/fs/inode.c
index 32b7c3375021..3a2446a27d2c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -172,8 +172,7 @@ static struct inode *alloc_inode(struct super_block *sb)
172 172
173void destroy_inode(struct inode *inode) 173void destroy_inode(struct inode *inode)
174{ 174{
175 if (inode_has_buffers(inode)) 175 BUG_ON(inode_has_buffers(inode));
176 BUG();
177 security_inode_free(inode); 176 security_inode_free(inode);
178 if (inode->i_sb->s_op->destroy_inode) 177 if (inode->i_sb->s_op->destroy_inode)
179 inode->i_sb->s_op->destroy_inode(inode); 178 inode->i_sb->s_op->destroy_inode(inode);
@@ -249,12 +248,9 @@ void clear_inode(struct inode *inode)
249 might_sleep(); 248 might_sleep();
250 invalidate_inode_buffers(inode); 249 invalidate_inode_buffers(inode);
251 250
252 if (inode->i_data.nrpages) 251 BUG_ON(inode->i_data.nrpages);
253 BUG(); 252 BUG_ON(!(inode->i_state & I_FREEING));
254 if (!(inode->i_state & I_FREEING)) 253 BUG_ON(inode->i_state & I_CLEAR);
255 BUG();
256 if (inode->i_state & I_CLEAR)
257 BUG();
258 wait_on_inode(inode); 254 wait_on_inode(inode);
259 DQUOT_DROP(inode); 255 DQUOT_DROP(inode);
260 if (inode->i_sb && inode->i_sb->s_op->clear_inode) 256 if (inode->i_sb && inode->i_sb->s_op->clear_inode)
@@ -1054,8 +1050,7 @@ void generic_delete_inode(struct inode *inode)
1054 hlist_del_init(&inode->i_hash); 1050 hlist_del_init(&inode->i_hash);
1055 spin_unlock(&inode_lock); 1051 spin_unlock(&inode_lock);
1056 wake_up_inode(inode); 1052 wake_up_inode(inode);
1057 if (inode->i_state != I_CLEAR) 1053 BUG_ON(inode->i_state != I_CLEAR);
1058 BUG();
1059 destroy_inode(inode); 1054 destroy_inode(inode);
1060} 1055}
1061 1056
diff --git a/fs/inotify.c b/fs/inotify.c
index 367c487c014b..1f50302849c5 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -538,7 +538,7 @@ void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
538 WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED); 538 WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED);
539 spin_lock(&entry->d_lock); 539 spin_lock(&entry->d_lock);
540 parent = entry->d_parent; 540 parent = entry->d_parent;
541 if (inotify_inode_watched(parent->d_inode)) 541 if (parent->d_inode && inotify_inode_watched(parent->d_inode))
542 entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED; 542 entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
543 spin_unlock(&entry->d_lock); 543 spin_unlock(&entry->d_lock);
544} 544}
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 7b77a9541125..ff2a872e80e7 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -35,8 +35,7 @@ int jffs2_start_garbage_collect_thread(struct jffs2_sb_info *c)
35 pid_t pid; 35 pid_t pid;
36 int ret = 0; 36 int ret = 0;
37 37
38 if (c->gc_task) 38 BUG_ON(c->gc_task);
39 BUG();
40 39
41 init_completion(&c->gc_thread_start); 40 init_completion(&c->gc_thread_start);
42 init_completion(&c->gc_thread_exit); 41 init_completion(&c->gc_thread_exit);
diff --git a/fs/locks.c b/fs/locks.c
index 4d9e71d43e7e..dda83d6cd48b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -168,18 +168,9 @@ static void locks_release_private(struct file_lock *fl)
168/* Free a lock which is not in use. */ 168/* Free a lock which is not in use. */
169static void locks_free_lock(struct file_lock *fl) 169static void locks_free_lock(struct file_lock *fl)
170{ 170{
171 if (fl == NULL) { 171 BUG_ON(waitqueue_active(&fl->fl_wait));
172 BUG(); 172 BUG_ON(!list_empty(&fl->fl_block));
173 return; 173 BUG_ON(!list_empty(&fl->fl_link));
174 }
175 if (waitqueue_active(&fl->fl_wait))
176 panic("Attempting to free lock with active wait queue");
177
178 if (!list_empty(&fl->fl_block))
179 panic("Attempting to free lock with active block list");
180
181 if (!list_empty(&fl->fl_link))
182 panic("Attempting to free lock on active lock list");
183 174
184 locks_release_private(fl); 175 locks_release_private(fl);
185 kmem_cache_free(filelock_cache, fl); 176 kmem_cache_free(filelock_cache, fl);
@@ -735,8 +726,9 @@ EXPORT_SYMBOL(posix_locks_deadlock);
735 * at the head of the list, but that's secret knowledge known only to 726 * at the head of the list, but that's secret knowledge known only to
736 * flock_lock_file and posix_lock_file. 727 * flock_lock_file and posix_lock_file.
737 */ 728 */
738static int flock_lock_file(struct file *filp, struct file_lock *new_fl) 729static int flock_lock_file(struct file *filp, struct file_lock *request)
739{ 730{
731 struct file_lock *new_fl = NULL;
740 struct file_lock **before; 732 struct file_lock **before;
741 struct inode * inode = filp->f_dentry->d_inode; 733 struct inode * inode = filp->f_dentry->d_inode;
742 int error = 0; 734 int error = 0;
@@ -751,17 +743,19 @@ static int flock_lock_file(struct file *filp, struct file_lock *new_fl)
751 continue; 743 continue;
752 if (filp != fl->fl_file) 744 if (filp != fl->fl_file)
753 continue; 745 continue;
754 if (new_fl->fl_type == fl->fl_type) 746 if (request->fl_type == fl->fl_type)
755 goto out; 747 goto out;
756 found = 1; 748 found = 1;
757 locks_delete_lock(before); 749 locks_delete_lock(before);
758 break; 750 break;
759 } 751 }
760 unlock_kernel();
761 752
762 if (new_fl->fl_type == F_UNLCK) 753 if (request->fl_type == F_UNLCK)
763 return 0; 754 goto out;
764 755
756 new_fl = locks_alloc_lock();
757 if (new_fl == NULL)
758 goto out;
765 /* 759 /*
766 * If a higher-priority process was blocked on the old file lock, 760 * If a higher-priority process was blocked on the old file lock,
767 * give it the opportunity to lock the file. 761 * give it the opportunity to lock the file.
@@ -769,26 +763,27 @@ static int flock_lock_file(struct file *filp, struct file_lock *new_fl)
769 if (found) 763 if (found)
770 cond_resched(); 764 cond_resched();
771 765
772 lock_kernel();
773 for_each_lock(inode, before) { 766 for_each_lock(inode, before) {
774 struct file_lock *fl = *before; 767 struct file_lock *fl = *before;
775 if (IS_POSIX(fl)) 768 if (IS_POSIX(fl))
776 break; 769 break;
777 if (IS_LEASE(fl)) 770 if (IS_LEASE(fl))
778 continue; 771 continue;
779 if (!flock_locks_conflict(new_fl, fl)) 772 if (!flock_locks_conflict(request, fl))
780 continue; 773 continue;
781 error = -EAGAIN; 774 error = -EAGAIN;
782 if (new_fl->fl_flags & FL_SLEEP) { 775 if (request->fl_flags & FL_SLEEP)
783 locks_insert_block(fl, new_fl); 776 locks_insert_block(fl, request);
784 }
785 goto out; 777 goto out;
786 } 778 }
779 locks_copy_lock(new_fl, request);
787 locks_insert_lock(&inode->i_flock, new_fl); 780 locks_insert_lock(&inode->i_flock, new_fl);
788 error = 0; 781 new_fl = NULL;
789 782
790out: 783out:
791 unlock_kernel(); 784 unlock_kernel();
785 if (new_fl)
786 locks_free_lock(new_fl);
792 return error; 787 return error;
793} 788}
794 789
@@ -1569,9 +1564,7 @@ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd)
1569 error = flock_lock_file_wait(filp, lock); 1564 error = flock_lock_file_wait(filp, lock);
1570 1565
1571 out_free: 1566 out_free:
1572 if (list_empty(&lock->fl_link)) { 1567 locks_free_lock(lock);
1573 locks_free_lock(lock);
1574 }
1575 1568
1576 out_putf: 1569 out_putf:
1577 fput(filp); 1570 fput(filp);
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 626a367bcd81..5b76ccd19e3f 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -12,14 +12,6 @@
12#include <linux/msdos_fs.h> 12#include <linux/msdos_fs.h>
13#include <linux/smp_lock.h> 13#include <linux/smp_lock.h>
14 14
15/* MS-DOS "device special files" */
16static const unsigned char *reserved_names[] = {
17 "CON ", "PRN ", "NUL ", "AUX ",
18 "LPT1 ", "LPT2 ", "LPT3 ", "LPT4 ",
19 "COM1 ", "COM2 ", "COM3 ", "COM4 ",
20 NULL
21};
22
23/* Characters that are undesirable in an MS-DOS file name */ 15/* Characters that are undesirable in an MS-DOS file name */
24static unsigned char bad_chars[] = "*?<>|\""; 16static unsigned char bad_chars[] = "*?<>|\"";
25static unsigned char bad_if_strict_pc[] = "+=,; "; 17static unsigned char bad_if_strict_pc[] = "+=,; ";
@@ -40,7 +32,6 @@ static int msdos_format_name(const unsigned char *name, int len,
40 */ 32 */
41{ 33{
42 unsigned char *walk; 34 unsigned char *walk;
43 const unsigned char **reserved;
44 unsigned char c; 35 unsigned char c;
45 int space; 36 int space;
46 37
@@ -127,11 +118,7 @@ static int msdos_format_name(const unsigned char *name, int len,
127 } 118 }
128 while (walk - res < MSDOS_NAME) 119 while (walk - res < MSDOS_NAME)
129 *walk++ = ' '; 120 *walk++ = ' ';
130 if (!opts->atari) 121
131 /* GEMDOS is less stupid and has no reserved names */
132 for (reserved = reserved_names; *reserved; reserved++)
133 if (!strncmp(res, *reserved, 8))
134 return -EINVAL;
135 return 0; 122 return 0;
136} 123}
137 124
diff --git a/fs/namei.c b/fs/namei.c
index 22f6e8d16aa8..96723ae83c89 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1254,7 +1254,7 @@ out:
1254 return dentry; 1254 return dentry;
1255} 1255}
1256 1256
1257struct dentry * lookup_hash(struct nameidata *nd) 1257static struct dentry *lookup_hash(struct nameidata *nd)
1258{ 1258{
1259 return __lookup_hash(&nd->last, nd->dentry, nd); 1259 return __lookup_hash(&nd->last, nd->dentry, nd);
1260} 1260}
@@ -2697,7 +2697,6 @@ EXPORT_SYMBOL(follow_up);
2697EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ 2697EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
2698EXPORT_SYMBOL(getname); 2698EXPORT_SYMBOL(getname);
2699EXPORT_SYMBOL(lock_rename); 2699EXPORT_SYMBOL(lock_rename);
2700EXPORT_SYMBOL(lookup_hash);
2701EXPORT_SYMBOL(lookup_one_len); 2700EXPORT_SYMBOL(lookup_one_len);
2702EXPORT_SYMBOL(page_follow_link_light); 2701EXPORT_SYMBOL(page_follow_link_light);
2703EXPORT_SYMBOL(page_put_link); 2702EXPORT_SYMBOL(page_put_link);
diff --git a/fs/namespace.c b/fs/namespace.c
index bf478addb852..2c5f1f80bdc2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -899,11 +899,13 @@ static int do_change_type(struct nameidata *nd, int flag)
899/* 899/*
900 * do loopback mount. 900 * do loopback mount.
901 */ 901 */
902static int do_loopback(struct nameidata *nd, char *old_name, int recurse) 902static int do_loopback(struct nameidata *nd, char *old_name, unsigned long flags, int mnt_flags)
903{ 903{
904 struct nameidata old_nd; 904 struct nameidata old_nd;
905 struct vfsmount *mnt = NULL; 905 struct vfsmount *mnt = NULL;
906 int recurse = flags & MS_REC;
906 int err = mount_is_safe(nd); 907 int err = mount_is_safe(nd);
908
907 if (err) 909 if (err)
908 return err; 910 return err;
909 if (!old_name || !*old_name) 911 if (!old_name || !*old_name)
@@ -937,6 +939,7 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
937 spin_unlock(&vfsmount_lock); 939 spin_unlock(&vfsmount_lock);
938 release_mounts(&umount_list); 940 release_mounts(&umount_list);
939 } 941 }
942 mnt->mnt_flags = mnt_flags;
940 943
941out: 944out:
942 up_write(&namespace_sem); 945 up_write(&namespace_sem);
@@ -1350,7 +1353,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1350 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, 1353 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
1351 data_page); 1354 data_page);
1352 else if (flags & MS_BIND) 1355 else if (flags & MS_BIND)
1353 retval = do_loopback(&nd, dev_name, flags & MS_REC); 1356 retval = do_loopback(&nd, dev_name, flags, mnt_flags);
1354 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) 1357 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
1355 retval = do_change_type(&nd, flags); 1358 retval = do_change_type(&nd, flags);
1356 else if (flags & MS_MOVE) 1359 else if (flags & MS_MOVE)
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index cfe9ce881613..6e92b0fe5323 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -14,46 +14,46 @@
14 14
15int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) 15int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
16{ 16{
17 struct svc_cred *cred = &rqstp->rq_cred; 17 struct svc_cred cred = rqstp->rq_cred;
18 int i; 18 int i;
19 int ret; 19 int ret;
20 20
21 if (exp->ex_flags & NFSEXP_ALLSQUASH) { 21 if (exp->ex_flags & NFSEXP_ALLSQUASH) {
22 cred->cr_uid = exp->ex_anon_uid; 22 cred.cr_uid = exp->ex_anon_uid;
23 cred->cr_gid = exp->ex_anon_gid; 23 cred.cr_gid = exp->ex_anon_gid;
24 put_group_info(cred->cr_group_info); 24 cred.cr_group_info = groups_alloc(0);
25 cred->cr_group_info = groups_alloc(0);
26 } else if (exp->ex_flags & NFSEXP_ROOTSQUASH) { 25 } else if (exp->ex_flags & NFSEXP_ROOTSQUASH) {
27 struct group_info *gi; 26 struct group_info *gi;
28 if (!cred->cr_uid) 27 if (!cred.cr_uid)
29 cred->cr_uid = exp->ex_anon_uid; 28 cred.cr_uid = exp->ex_anon_uid;
30 if (!cred->cr_gid) 29 if (!cred.cr_gid)
31 cred->cr_gid = exp->ex_anon_gid; 30 cred.cr_gid = exp->ex_anon_gid;
32 gi = groups_alloc(cred->cr_group_info->ngroups); 31 gi = groups_alloc(cred.cr_group_info->ngroups);
33 if (gi) 32 if (gi)
34 for (i = 0; i < cred->cr_group_info->ngroups; i++) { 33 for (i = 0; i < cred.cr_group_info->ngroups; i++) {
35 if (!GROUP_AT(cred->cr_group_info, i)) 34 if (!GROUP_AT(cred.cr_group_info, i))
36 GROUP_AT(gi, i) = exp->ex_anon_gid; 35 GROUP_AT(gi, i) = exp->ex_anon_gid;
37 else 36 else
38 GROUP_AT(gi, i) = GROUP_AT(cred->cr_group_info, i); 37 GROUP_AT(gi, i) = GROUP_AT(cred.cr_group_info, i);
39 } 38 }
40 put_group_info(cred->cr_group_info); 39 cred.cr_group_info = gi;
41 cred->cr_group_info = gi; 40 } else
42 } 41 get_group_info(cred.cr_group_info);
43 42
44 if (cred->cr_uid != (uid_t) -1) 43 if (cred.cr_uid != (uid_t) -1)
45 current->fsuid = cred->cr_uid; 44 current->fsuid = cred.cr_uid;
46 else 45 else
47 current->fsuid = exp->ex_anon_uid; 46 current->fsuid = exp->ex_anon_uid;
48 if (cred->cr_gid != (gid_t) -1) 47 if (cred.cr_gid != (gid_t) -1)
49 current->fsgid = cred->cr_gid; 48 current->fsgid = cred.cr_gid;
50 else 49 else
51 current->fsgid = exp->ex_anon_gid; 50 current->fsgid = exp->ex_anon_gid;
52 51
53 if (!cred->cr_group_info) 52 if (!cred.cr_group_info)
54 return -ENOMEM; 53 return -ENOMEM;
55 ret = set_current_groups(cred->cr_group_info); 54 ret = set_current_groups(cred.cr_group_info);
56 if ((cred->cr_uid)) { 55 put_group_info(cred.cr_group_info);
56 if ((cred.cr_uid)) {
57 cap_t(current->cap_effective) &= ~CAP_NFSD_MASK; 57 cap_t(current->cap_effective) &= ~CAP_NFSD_MASK;
58 } else { 58 } else {
59 cap_t(current->cap_effective) |= (CAP_NFSD_MASK & 59 cap_t(current->cap_effective) |= (CAP_NFSD_MASK &
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c340be0a3f59..4e0578121d9a 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -422,7 +422,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
422 if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) 422 if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
423 goto out; 423 goto out;
424 err = path_lookup(buf, 0, &nd); 424 err = path_lookup(buf, 0, &nd);
425 if (err) goto out; 425 if (err) goto out_no_path;
426 426
427 exp.h.flags = 0; 427 exp.h.flags = 0;
428 exp.ex_client = dom; 428 exp.ex_client = dom;
@@ -475,6 +475,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
475 out: 475 out:
476 if (nd.dentry) 476 if (nd.dentry)
477 path_release(&nd); 477 path_release(&nd);
478 out_no_path:
478 if (dom) 479 if (dom)
479 auth_domain_put(dom); 480 auth_domain_put(dom);
480 kfree(buf); 481 kfree(buf);
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 6d2dfed1de08..f61142afea44 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -682,7 +682,7 @@ static struct svc_procedure nfsd_procedures3[22] = {
682 PROC(lookup, dirop, dirop, fhandle2, RC_NOCACHE, ST+FH+pAT+pAT), 682 PROC(lookup, dirop, dirop, fhandle2, RC_NOCACHE, ST+FH+pAT+pAT),
683 PROC(access, access, access, fhandle, RC_NOCACHE, ST+pAT+1), 683 PROC(access, access, access, fhandle, RC_NOCACHE, ST+pAT+1),
684 PROC(readlink, readlink, readlink, fhandle, RC_NOCACHE, ST+pAT+1+NFS3_MAXPATHLEN/4), 684 PROC(readlink, readlink, readlink, fhandle, RC_NOCACHE, ST+pAT+1+NFS3_MAXPATHLEN/4),
685 PROC(read, read, read, fhandle, RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE), 685 PROC(read, read, read, fhandle, RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE/4),
686 PROC(write, write, write, fhandle, RC_REPLBUFF, ST+WC+4), 686 PROC(write, write, write, fhandle, RC_REPLBUFF, ST+WC+4),
687 PROC(create, create, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), 687 PROC(create, create, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
688 PROC(mkdir, mkdir, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), 688 PROC(mkdir, mkdir, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 7391f4aabedb..edb107e61b91 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -710,9 +710,9 @@ calculate_posix_ace_count(struct nfs4_acl *n4acl)
710 /* Also, the remaining entries are for named users and 710 /* Also, the remaining entries are for named users and
711 * groups, and come in threes (mask, allow, deny): */ 711 * groups, and come in threes (mask, allow, deny): */
712 if (n4acl->naces < 7) 712 if (n4acl->naces < 7)
713 return -1; 713 return -EINVAL;
714 if ((n4acl->naces - 7) % 3) 714 if ((n4acl->naces - 7) % 3)
715 return -1; 715 return -EINVAL;
716 return 4 + (n4acl->naces - 7)/3; 716 return 4 + (n4acl->naces - 7)/3;
717 } 717 }
718} 718}
@@ -790,7 +790,7 @@ nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl)
790 continue; 790 continue;
791 791
792 error = nfs4_acl_add_ace(dacl, ace->type, ace->flag, 792 error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
793 ace->access_mask, ace->whotype, ace->who) == -1; 793 ace->access_mask, ace->whotype, ace->who);
794 if (error < 0) 794 if (error < 0)
795 goto out; 795 goto out;
796 796
@@ -866,7 +866,7 @@ nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask,
866 struct nfs4_ace *ace; 866 struct nfs4_ace *ace;
867 867
868 if ((ace = kmalloc(sizeof(*ace), GFP_KERNEL)) == NULL) 868 if ((ace = kmalloc(sizeof(*ace), GFP_KERNEL)) == NULL)
869 return -1; 869 return -ENOMEM;
870 870
871 ace->type = type; 871 ace->type = type;
872 ace->flag = flag; 872 ace->flag = flag;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index c872bd07fc10..dbaf3f93f328 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -441,8 +441,9 @@ nfsd4_probe_callback(struct nfs4_client *clp)
441 goto out_clnt; 441 goto out_clnt;
442 } 442 }
443 443
444 /* the task holds a reference to the nfs4_client struct */
445 cb->cb_client = clnt; 444 cb->cb_client = clnt;
445
446 /* the task holds a reference to the nfs4_client struct */
446 atomic_inc(&clp->cl_count); 447 atomic_inc(&clp->cl_count);
447 448
448 msg.rpc_cred = nfsd4_lookupcred(clp,0); 449 msg.rpc_cred = nfsd4_lookupcred(clp,0);
@@ -460,13 +461,12 @@ nfsd4_probe_callback(struct nfs4_client *clp)
460out_rpciod: 461out_rpciod:
461 atomic_dec(&clp->cl_count); 462 atomic_dec(&clp->cl_count);
462 rpciod_down(); 463 rpciod_down();
464 cb->cb_client = NULL;
463out_clnt: 465out_clnt:
464 rpc_shutdown_client(clnt); 466 rpc_shutdown_client(clnt);
465 goto out_err;
466out_err: 467out_err:
467 dprintk("NFSD: warning: no callback path to client %.*s\n", 468 dprintk("NFSD: warning: no callback path to client %.*s\n",
468 (int)clp->cl_name.len, clp->cl_name.data); 469 (int)clp->cl_name.len, clp->cl_name.data);
469 cb->cb_client = NULL;
470} 470}
471 471
472static void 472static void
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 6d63f1d9e5f5..b0e095ea0c03 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -288,8 +288,6 @@ nfsd4_putrootfh(struct svc_rqst *rqstp, struct svc_fh *current_fh)
288 fh_put(current_fh); 288 fh_put(current_fh);
289 status = exp_pseudoroot(rqstp->rq_client, current_fh, 289 status = exp_pseudoroot(rqstp->rq_client, current_fh,
290 &rqstp->rq_chandle); 290 &rqstp->rq_chandle);
291 if (!status)
292 status = nfserrno(nfsd_setuser(rqstp, current_fh->fh_export));
293 return status; 291 return status;
294} 292}
295 293
@@ -975,7 +973,7 @@ struct nfsd4_voidargs { int dummy; };
975 */ 973 */
976static struct svc_procedure nfsd_procedures4[2] = { 974static struct svc_procedure nfsd_procedures4[2] = {
977 PROC(null, void, void, void, RC_NOCACHE, 1), 975 PROC(null, void, void, void, RC_NOCACHE, 1),
978 PROC(compound, compound, compound, compound, RC_NOCACHE, NFSD_BUFSIZE) 976 PROC(compound, compound, compound, compound, RC_NOCACHE, NFSD_BUFSIZE/4)
979}; 977};
980 978
981struct svc_version nfsd_version4 = { 979struct svc_version nfsd_version4 = {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 47ec112b266c..96c7578cbe1e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -147,6 +147,42 @@ get_nfs4_file(struct nfs4_file *fi)
147 kref_get(&fi->fi_ref); 147 kref_get(&fi->fi_ref);
148} 148}
149 149
150static int num_delegations;
151
152/*
153 * Open owner state (share locks)
154 */
155
156/* hash tables for nfs4_stateowner */
157#define OWNER_HASH_BITS 8
158#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS)
159#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1)
160
161#define ownerid_hashval(id) \
162 ((id) & OWNER_HASH_MASK)
163#define ownerstr_hashval(clientid, ownername) \
164 (((clientid) + opaque_hashval((ownername.data), (ownername.len))) & OWNER_HASH_MASK)
165
166static struct list_head ownerid_hashtbl[OWNER_HASH_SIZE];
167static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE];
168
169/* hash table for nfs4_file */
170#define FILE_HASH_BITS 8
171#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
172#define FILE_HASH_MASK (FILE_HASH_SIZE - 1)
173/* hash table for (open)nfs4_stateid */
174#define STATEID_HASH_BITS 10
175#define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS)
176#define STATEID_HASH_MASK (STATEID_HASH_SIZE - 1)
177
178#define file_hashval(x) \
179 hash_ptr(x, FILE_HASH_BITS)
180#define stateid_hashval(owner_id, file_id) \
181 (((owner_id) + (file_id)) & STATEID_HASH_MASK)
182
183static struct list_head file_hashtbl[FILE_HASH_SIZE];
184static struct list_head stateid_hashtbl[STATEID_HASH_SIZE];
185
150static struct nfs4_delegation * 186static struct nfs4_delegation *
151alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type) 187alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type)
152{ 188{
@@ -155,9 +191,12 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
155 struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback; 191 struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback;
156 192
157 dprintk("NFSD alloc_init_deleg\n"); 193 dprintk("NFSD alloc_init_deleg\n");
194 if (num_delegations > STATEID_HASH_SIZE * 4)
195 return NULL;
158 dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL); 196 dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL);
159 if (dp == NULL) 197 if (dp == NULL)
160 return dp; 198 return dp;
199 num_delegations++;
161 INIT_LIST_HEAD(&dp->dl_perfile); 200 INIT_LIST_HEAD(&dp->dl_perfile);
162 INIT_LIST_HEAD(&dp->dl_perclnt); 201 INIT_LIST_HEAD(&dp->dl_perclnt);
163 INIT_LIST_HEAD(&dp->dl_recall_lru); 202 INIT_LIST_HEAD(&dp->dl_recall_lru);
@@ -192,6 +231,7 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
192 dprintk("NFSD: freeing dp %p\n",dp); 231 dprintk("NFSD: freeing dp %p\n",dp);
193 put_nfs4_file(dp->dl_file); 232 put_nfs4_file(dp->dl_file);
194 kmem_cache_free(deleg_slab, dp); 233 kmem_cache_free(deleg_slab, dp);
234 num_delegations--;
195 } 235 }
196} 236}
197 237
@@ -330,22 +370,29 @@ put_nfs4_client(struct nfs4_client *clp)
330} 370}
331 371
332static void 372static void
373shutdown_callback_client(struct nfs4_client *clp)
374{
375 struct rpc_clnt *clnt = clp->cl_callback.cb_client;
376
377 /* shutdown rpc client, ending any outstanding recall rpcs */
378 if (clnt) {
379 clp->cl_callback.cb_client = NULL;
380 rpc_shutdown_client(clnt);
381 rpciod_down();
382 }
383}
384
385static void
333expire_client(struct nfs4_client *clp) 386expire_client(struct nfs4_client *clp)
334{ 387{
335 struct nfs4_stateowner *sop; 388 struct nfs4_stateowner *sop;
336 struct nfs4_delegation *dp; 389 struct nfs4_delegation *dp;
337 struct nfs4_callback *cb = &clp->cl_callback;
338 struct rpc_clnt *clnt = clp->cl_callback.cb_client;
339 struct list_head reaplist; 390 struct list_head reaplist;
340 391
341 dprintk("NFSD: expire_client cl_count %d\n", 392 dprintk("NFSD: expire_client cl_count %d\n",
342 atomic_read(&clp->cl_count)); 393 atomic_read(&clp->cl_count));
343 394
344 /* shutdown rpc client, ending any outstanding recall rpcs */ 395 shutdown_callback_client(clp);
345 if (atomic_read(&cb->cb_set) == 1 && clnt) {
346 rpc_shutdown_client(clnt);
347 clnt = clp->cl_callback.cb_client = NULL;
348 }
349 396
350 INIT_LIST_HEAD(&reaplist); 397 INIT_LIST_HEAD(&reaplist);
351 spin_lock(&recall_lock); 398 spin_lock(&recall_lock);
@@ -936,40 +983,6 @@ out:
936 return status; 983 return status;
937} 984}
938 985
939/*
940 * Open owner state (share locks)
941 */
942
943/* hash tables for nfs4_stateowner */
944#define OWNER_HASH_BITS 8
945#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS)
946#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1)
947
948#define ownerid_hashval(id) \
949 ((id) & OWNER_HASH_MASK)
950#define ownerstr_hashval(clientid, ownername) \
951 (((clientid) + opaque_hashval((ownername.data), (ownername.len))) & OWNER_HASH_MASK)
952
953static struct list_head ownerid_hashtbl[OWNER_HASH_SIZE];
954static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE];
955
956/* hash table for nfs4_file */
957#define FILE_HASH_BITS 8
958#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
959#define FILE_HASH_MASK (FILE_HASH_SIZE - 1)
960/* hash table for (open)nfs4_stateid */
961#define STATEID_HASH_BITS 10
962#define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS)
963#define STATEID_HASH_MASK (STATEID_HASH_SIZE - 1)
964
965#define file_hashval(x) \
966 hash_ptr(x, FILE_HASH_BITS)
967#define stateid_hashval(owner_id, file_id) \
968 (((owner_id) + (file_id)) & STATEID_HASH_MASK)
969
970static struct list_head file_hashtbl[FILE_HASH_SIZE];
971static struct list_head stateid_hashtbl[STATEID_HASH_SIZE];
972
973/* OPEN Share state helper functions */ 986/* OPEN Share state helper functions */
974static inline struct nfs4_file * 987static inline struct nfs4_file *
975alloc_init_file(struct inode *ino) 988alloc_init_file(struct inode *ino)
@@ -1186,8 +1199,7 @@ move_to_close_lru(struct nfs4_stateowner *sop)
1186{ 1199{
1187 dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop); 1200 dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop);
1188 1201
1189 unhash_stateowner(sop); 1202 list_move_tail(&sop->so_close_lru, &close_lru);
1190 list_add_tail(&sop->so_close_lru, &close_lru);
1191 sop->so_time = get_seconds(); 1203 sop->so_time = get_seconds();
1192} 1204}
1193 1205
@@ -1916,8 +1928,7 @@ nfs4_laundromat(void)
1916 } 1928 }
1917 dprintk("NFSD: purging unused open stateowner (so_id %d)\n", 1929 dprintk("NFSD: purging unused open stateowner (so_id %d)\n",
1918 sop->so_id); 1930 sop->so_id);
1919 list_del(&sop->so_close_lru); 1931 release_stateowner(sop);
1920 nfs4_put_stateowner(sop);
1921 } 1932 }
1922 if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) 1933 if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT)
1923 clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; 1934 clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT;
@@ -2495,36 +2506,27 @@ nfs4_transform_lock_offset(struct file_lock *lock)
2495 lock->fl_end = OFFSET_MAX; 2506 lock->fl_end = OFFSET_MAX;
2496} 2507}
2497 2508
2498static int 2509/* Hack!: For now, we're defining this just so we can use a pointer to it
2499nfs4_verify_lock_stateowner(struct nfs4_stateowner *sop, unsigned int hashval) 2510 * as a unique cookie to identify our (NFSv4's) posix locks. */
2500{ 2511static struct lock_manager_operations nfsd_posix_mng_ops = {
2501 struct nfs4_stateowner *local = NULL; 2512};
2502 int status = 0;
2503
2504 if (hashval >= LOCK_HASH_SIZE)
2505 goto out;
2506 list_for_each_entry(local, &lock_ownerid_hashtbl[hashval], so_idhash) {
2507 if (local == sop) {
2508 status = 1;
2509 goto out;
2510 }
2511 }
2512out:
2513 return status;
2514}
2515
2516 2513
2517static inline void 2514static inline void
2518nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) 2515nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
2519{ 2516{
2520 struct nfs4_stateowner *sop = (struct nfs4_stateowner *) fl->fl_owner; 2517 struct nfs4_stateowner *sop;
2521 unsigned int hval = lockownerid_hashval(sop->so_id); 2518 unsigned int hval;
2522 2519
2523 deny->ld_sop = NULL; 2520 if (fl->fl_lmops == &nfsd_posix_mng_ops) {
2524 if (nfs4_verify_lock_stateowner(sop, hval)) { 2521 sop = (struct nfs4_stateowner *) fl->fl_owner;
2522 hval = lockownerid_hashval(sop->so_id);
2525 kref_get(&sop->so_ref); 2523 kref_get(&sop->so_ref);
2526 deny->ld_sop = sop; 2524 deny->ld_sop = sop;
2527 deny->ld_clientid = sop->so_client->cl_clientid; 2525 deny->ld_clientid = sop->so_client->cl_clientid;
2526 } else {
2527 deny->ld_sop = NULL;
2528 deny->ld_clientid.cl_boot = 0;
2529 deny->ld_clientid.cl_id = 0;
2528 } 2530 }
2529 deny->ld_start = fl->fl_start; 2531 deny->ld_start = fl->fl_start;
2530 deny->ld_length = ~(u64)0; 2532 deny->ld_length = ~(u64)0;
@@ -2736,6 +2738,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2736 file_lock.fl_pid = current->tgid; 2738 file_lock.fl_pid = current->tgid;
2737 file_lock.fl_file = filp; 2739 file_lock.fl_file = filp;
2738 file_lock.fl_flags = FL_POSIX; 2740 file_lock.fl_flags = FL_POSIX;
2741 file_lock.fl_lmops = &nfsd_posix_mng_ops;
2739 2742
2740 file_lock.fl_start = lock->lk_offset; 2743 file_lock.fl_start = lock->lk_offset;
2741 if ((lock->lk_length == ~(u64)0) || 2744 if ((lock->lk_length == ~(u64)0) ||
@@ -2841,6 +2844,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2841 file_lock.fl_owner = (fl_owner_t)lockt->lt_stateowner; 2844 file_lock.fl_owner = (fl_owner_t)lockt->lt_stateowner;
2842 file_lock.fl_pid = current->tgid; 2845 file_lock.fl_pid = current->tgid;
2843 file_lock.fl_flags = FL_POSIX; 2846 file_lock.fl_flags = FL_POSIX;
2847 file_lock.fl_lmops = &nfsd_posix_mng_ops;
2844 2848
2845 file_lock.fl_start = lockt->lt_offset; 2849 file_lock.fl_start = lockt->lt_offset;
2846 if ((lockt->lt_length == ~(u64)0) || LOFF_OVERFLOW(lockt->lt_offset, lockt->lt_length)) 2850 if ((lockt->lt_length == ~(u64)0) || LOFF_OVERFLOW(lockt->lt_offset, lockt->lt_length))
@@ -2900,6 +2904,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2900 file_lock.fl_pid = current->tgid; 2904 file_lock.fl_pid = current->tgid;
2901 file_lock.fl_file = filp; 2905 file_lock.fl_file = filp;
2902 file_lock.fl_flags = FL_POSIX; 2906 file_lock.fl_flags = FL_POSIX;
2907 file_lock.fl_lmops = &nfsd_posix_mng_ops;
2903 file_lock.fl_start = locku->lu_offset; 2908 file_lock.fl_start = locku->lu_offset;
2904 2909
2905 if ((locku->lu_length == ~(u64)0) || LOFF_OVERFLOW(locku->lu_offset, locku->lu_length)) 2910 if ((locku->lu_length == ~(u64)0) || LOFF_OVERFLOW(locku->lu_offset, locku->lu_length))
@@ -3211,15 +3216,8 @@ __nfs4_state_shutdown(void)
3211 int i; 3216 int i;
3212 struct nfs4_client *clp = NULL; 3217 struct nfs4_client *clp = NULL;
3213 struct nfs4_delegation *dp = NULL; 3218 struct nfs4_delegation *dp = NULL;
3214 struct nfs4_stateowner *sop = NULL;
3215 struct list_head *pos, *next, reaplist; 3219 struct list_head *pos, *next, reaplist;
3216 3220
3217 list_for_each_safe(pos, next, &close_lru) {
3218 sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
3219 list_del(&sop->so_close_lru);
3220 nfs4_put_stateowner(sop);
3221 }
3222
3223 for (i = 0; i < CLIENT_HASH_SIZE; i++) { 3221 for (i = 0; i < CLIENT_HASH_SIZE; i++) {
3224 while (!list_empty(&conf_id_hashtbl[i])) { 3222 while (!list_empty(&conf_id_hashtbl[i])) {
3225 clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); 3223 clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
@@ -3244,8 +3242,6 @@ __nfs4_state_shutdown(void)
3244 } 3242 }
3245 3243
3246 cancel_delayed_work(&laundromat_work); 3244 cancel_delayed_work(&laundromat_work);
3247 flush_workqueue(laundry_wq);
3248 destroy_workqueue(laundry_wq);
3249 nfsd4_shutdown_recdir(); 3245 nfsd4_shutdown_recdir();
3250 nfs4_init = 0; 3246 nfs4_init = 0;
3251} 3247}
@@ -3253,6 +3249,8 @@ __nfs4_state_shutdown(void)
3253void 3249void
3254nfs4_state_shutdown(void) 3250nfs4_state_shutdown(void)
3255{ 3251{
3252 cancel_rearming_delayed_workqueue(laundry_wq, &laundromat_work);
3253 destroy_workqueue(laundry_wq);
3256 nfs4_lock_state(); 3254 nfs4_lock_state();
3257 nfs4_release_reclaim(); 3255 nfs4_release_reclaim();
3258 __nfs4_state_shutdown(); 3256 __nfs4_state_shutdown();
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 03857fd81126..de3998f15f10 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -299,11 +299,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia
299 buf, dummy32, &ace.who); 299 buf, dummy32, &ace.who);
300 if (status) 300 if (status)
301 goto out_nfserr; 301 goto out_nfserr;
302 if (nfs4_acl_add_ace(*acl, ace.type, ace.flag, 302 status = nfs4_acl_add_ace(*acl, ace.type, ace.flag,
303 ace.access_mask, ace.whotype, ace.who) != 0) { 303 ace.access_mask, ace.whotype, ace.who);
304 status = -ENOMEM; 304 if (status)
305 goto out_nfserr; 305 goto out_nfserr;
306 }
307 } 306 }
308 } else 307 } else
309 *acl = NULL; 308 *acl = NULL;
@@ -2085,27 +2084,20 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read
2085 WRITE32(eof); 2084 WRITE32(eof);
2086 WRITE32(maxcount); 2085 WRITE32(maxcount);
2087 ADJUST_ARGS(); 2086 ADJUST_ARGS();
2088 resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; 2087 resp->xbuf->head[0].iov_len = (char*)p
2089 2088 - (char*)resp->xbuf->head[0].iov_base;
2090 resp->xbuf->page_len = maxcount; 2089 resp->xbuf->page_len = maxcount;
2091 2090
2092 /* read zero bytes -> don't set up tail */ 2091 /* Use rest of head for padding and remaining ops: */
2093 if(!maxcount) 2092 resp->rqstp->rq_restailpage = 0;
2094 return 0; 2093 resp->xbuf->tail[0].iov_base = p;
2095
2096 /* set up page for remaining responses */
2097 svc_take_page(resp->rqstp);
2098 resp->xbuf->tail[0].iov_base =
2099 page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
2100 resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1;
2101 resp->xbuf->tail[0].iov_len = 0; 2094 resp->xbuf->tail[0].iov_len = 0;
2102 resp->p = resp->xbuf->tail[0].iov_base;
2103 resp->end = resp->p + PAGE_SIZE/4;
2104
2105 if (maxcount&3) { 2095 if (maxcount&3) {
2106 *(resp->p)++ = 0; 2096 RESERVE_SPACE(4);
2097 WRITE32(0);
2107 resp->xbuf->tail[0].iov_base += maxcount&3; 2098 resp->xbuf->tail[0].iov_base += maxcount&3;
2108 resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); 2099 resp->xbuf->tail[0].iov_len = 4 - (maxcount&3);
2100 ADJUST_ARGS();
2109 } 2101 }
2110 return 0; 2102 return 0;
2111} 2103}
@@ -2142,21 +2134,20 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_r
2142 2134
2143 WRITE32(maxcount); 2135 WRITE32(maxcount);
2144 ADJUST_ARGS(); 2136 ADJUST_ARGS();
2145 resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; 2137 resp->xbuf->head[0].iov_len = (char*)p
2138 - (char*)resp->xbuf->head[0].iov_base;
2139 resp->xbuf->page_len = maxcount;
2146 2140
2147 svc_take_page(resp->rqstp); 2141 /* Use rest of head for padding and remaining ops: */
2148 resp->xbuf->tail[0].iov_base = 2142 resp->rqstp->rq_restailpage = 0;
2149 page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); 2143 resp->xbuf->tail[0].iov_base = p;
2150 resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1;
2151 resp->xbuf->tail[0].iov_len = 0; 2144 resp->xbuf->tail[0].iov_len = 0;
2152 resp->p = resp->xbuf->tail[0].iov_base;
2153 resp->end = resp->p + PAGE_SIZE/4;
2154
2155 resp->xbuf->page_len = maxcount;
2156 if (maxcount&3) { 2145 if (maxcount&3) {
2157 *(resp->p)++ = 0; 2146 RESERVE_SPACE(4);
2147 WRITE32(0);
2158 resp->xbuf->tail[0].iov_base += maxcount&3; 2148 resp->xbuf->tail[0].iov_base += maxcount&3;
2159 resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); 2149 resp->xbuf->tail[0].iov_len = 4 - (maxcount&3);
2150 ADJUST_ARGS();
2160 } 2151 }
2161 return 0; 2152 return 0;
2162} 2153}
@@ -2166,7 +2157,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
2166{ 2157{
2167 int maxcount; 2158 int maxcount;
2168 loff_t offset; 2159 loff_t offset;
2169 u32 *page, *savep; 2160 u32 *page, *savep, *tailbase;
2170 ENCODE_HEAD; 2161 ENCODE_HEAD;
2171 2162
2172 if (nfserr) 2163 if (nfserr)
@@ -2182,6 +2173,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
2182 WRITE32(0); 2173 WRITE32(0);
2183 ADJUST_ARGS(); 2174 ADJUST_ARGS();
2184 resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; 2175 resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base;
2176 tailbase = p;
2185 2177
2186 maxcount = PAGE_SIZE; 2178 maxcount = PAGE_SIZE;
2187 if (maxcount > readdir->rd_maxcount) 2179 if (maxcount > readdir->rd_maxcount)
@@ -2226,14 +2218,12 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
2226 *p++ = htonl(readdir->common.err == nfserr_eof); 2218 *p++ = htonl(readdir->common.err == nfserr_eof);
2227 resp->xbuf->page_len = ((char*)p) - (char*)page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); 2219 resp->xbuf->page_len = ((char*)p) - (char*)page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
2228 2220
2229 /* allocate a page for the tail */ 2221 /* Use rest of head for padding and remaining ops: */
2230 svc_take_page(resp->rqstp); 2222 resp->rqstp->rq_restailpage = 0;
2231 resp->xbuf->tail[0].iov_base = 2223 resp->xbuf->tail[0].iov_base = tailbase;
2232 page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
2233 resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1;
2234 resp->xbuf->tail[0].iov_len = 0; 2224 resp->xbuf->tail[0].iov_len = 0;
2235 resp->p = resp->xbuf->tail[0].iov_base; 2225 resp->p = resp->xbuf->tail[0].iov_base;
2236 resp->end = resp->p + PAGE_SIZE/4; 2226 resp->end = resp->p + (PAGE_SIZE - resp->xbuf->head[0].iov_len)/4;
2237 2227
2238 return 0; 2228 return 0;
2239err_no_verf: 2229err_no_verf:
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 3e6b75cd90fd..06cd0db0f32b 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -553,7 +553,7 @@ static struct svc_procedure nfsd_procedures2[18] = {
553 PROC(none, void, void, none, RC_NOCACHE, ST), 553 PROC(none, void, void, none, RC_NOCACHE, ST),
554 PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT), 554 PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT),
555 PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4), 555 PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4),
556 PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE), 556 PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE/4),
557 PROC(none, void, void, none, RC_NOCACHE, ST), 557 PROC(none, void, void, none, RC_NOCACHE, ST),
558 PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT), 558 PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT),
559 PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT), 559 PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT),
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 31018333dc38..6aa92d0e6876 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -371,7 +371,6 @@ out_nfserr:
371static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) 371static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
372{ 372{
373 ssize_t buflen; 373 ssize_t buflen;
374 int error;
375 374
376 buflen = vfs_getxattr(dentry, key, NULL, 0); 375 buflen = vfs_getxattr(dentry, key, NULL, 0);
377 if (buflen <= 0) 376 if (buflen <= 0)
@@ -381,10 +380,7 @@ static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
381 if (!*buf) 380 if (!*buf)
382 return -ENOMEM; 381 return -ENOMEM;
383 382
384 error = vfs_getxattr(dentry, key, *buf, buflen); 383 return vfs_getxattr(dentry, key, *buf, buflen);
385 if (error < 0)
386 return error;
387 return buflen;
388} 384}
389#endif 385#endif
390 386
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index bff0f0d06867..21f38accd039 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -153,6 +153,7 @@ struct o2hb_region {
153struct o2hb_bio_wait_ctxt { 153struct o2hb_bio_wait_ctxt {
154 atomic_t wc_num_reqs; 154 atomic_t wc_num_reqs;
155 struct completion wc_io_complete; 155 struct completion wc_io_complete;
156 int wc_error;
156}; 157};
157 158
158static void o2hb_write_timeout(void *arg) 159static void o2hb_write_timeout(void *arg)
@@ -186,6 +187,7 @@ static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc,
186{ 187{
187 atomic_set(&wc->wc_num_reqs, num_ios); 188 atomic_set(&wc->wc_num_reqs, num_ios);
188 init_completion(&wc->wc_io_complete); 189 init_completion(&wc->wc_io_complete);
190 wc->wc_error = 0;
189} 191}
190 192
191/* Used in error paths too */ 193/* Used in error paths too */
@@ -218,8 +220,10 @@ static int o2hb_bio_end_io(struct bio *bio,
218{ 220{
219 struct o2hb_bio_wait_ctxt *wc = bio->bi_private; 221 struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
220 222
221 if (error) 223 if (error) {
222 mlog(ML_ERROR, "IO Error %d\n", error); 224 mlog(ML_ERROR, "IO Error %d\n", error);
225 wc->wc_error = error;
226 }
223 227
224 if (bio->bi_size) 228 if (bio->bi_size)
225 return 1; 229 return 1;
@@ -390,6 +394,8 @@ static int o2hb_read_slots(struct o2hb_region *reg,
390 394
391bail_and_wait: 395bail_and_wait:
392 o2hb_wait_on_io(reg, &wc); 396 o2hb_wait_on_io(reg, &wc);
397 if (wc.wc_error && !status)
398 status = wc.wc_error;
393 399
394 if (bios) { 400 if (bios) {
395 for(i = 0; i < num_bios; i++) 401 for(i = 0; i < num_bios; i++)
@@ -790,20 +796,24 @@ static int o2hb_highest_node(unsigned long *nodes,
790 return highest; 796 return highest;
791} 797}
792 798
793static void o2hb_do_disk_heartbeat(struct o2hb_region *reg) 799static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
794{ 800{
795 int i, ret, highest_node, change = 0; 801 int i, ret, highest_node, change = 0;
796 unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; 802 unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
797 struct bio *write_bio; 803 struct bio *write_bio;
798 struct o2hb_bio_wait_ctxt write_wc; 804 struct o2hb_bio_wait_ctxt write_wc;
799 805
800 if (o2nm_configured_node_map(configured_nodes, sizeof(configured_nodes))) 806 ret = o2nm_configured_node_map(configured_nodes,
801 return; 807 sizeof(configured_nodes));
808 if (ret) {
809 mlog_errno(ret);
810 return ret;
811 }
802 812
803 highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); 813 highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES);
804 if (highest_node >= O2NM_MAX_NODES) { 814 if (highest_node >= O2NM_MAX_NODES) {
805 mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); 815 mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n");
806 return; 816 return -EINVAL;
807 } 817 }
808 818
809 /* No sense in reading the slots of nodes that don't exist 819 /* No sense in reading the slots of nodes that don't exist
@@ -813,7 +823,7 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
813 ret = o2hb_read_slots(reg, highest_node + 1); 823 ret = o2hb_read_slots(reg, highest_node + 1);
814 if (ret < 0) { 824 if (ret < 0) {
815 mlog_errno(ret); 825 mlog_errno(ret);
816 return; 826 return ret;
817 } 827 }
818 828
819 /* With an up to date view of the slots, we can check that no 829 /* With an up to date view of the slots, we can check that no
@@ -831,7 +841,7 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
831 ret = o2hb_issue_node_write(reg, &write_bio, &write_wc); 841 ret = o2hb_issue_node_write(reg, &write_bio, &write_wc);
832 if (ret < 0) { 842 if (ret < 0) {
833 mlog_errno(ret); 843 mlog_errno(ret);
834 return; 844 return ret;
835 } 845 }
836 846
837 i = -1; 847 i = -1;
@@ -847,6 +857,15 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
847 */ 857 */
848 o2hb_wait_on_io(reg, &write_wc); 858 o2hb_wait_on_io(reg, &write_wc);
849 bio_put(write_bio); 859 bio_put(write_bio);
860 if (write_wc.wc_error) {
861 /* Do not re-arm the write timeout on I/O error - we
862 * can't be sure that the new block ever made it to
863 * disk */
864 mlog(ML_ERROR, "Write error %d on device \"%s\"\n",
865 write_wc.wc_error, reg->hr_dev_name);
866 return write_wc.wc_error;
867 }
868
850 o2hb_arm_write_timeout(reg); 869 o2hb_arm_write_timeout(reg);
851 870
852 /* let the person who launched us know when things are steady */ 871 /* let the person who launched us know when things are steady */
@@ -854,6 +873,8 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
854 if (atomic_dec_and_test(&reg->hr_steady_iterations)) 873 if (atomic_dec_and_test(&reg->hr_steady_iterations))
855 wake_up(&o2hb_steady_queue); 874 wake_up(&o2hb_steady_queue);
856 } 875 }
876
877 return 0;
857} 878}
858 879
859/* Subtract b from a, storing the result in a. a *must* have a larger 880/* Subtract b from a, storing the result in a. a *must* have a larger
@@ -913,7 +934,10 @@ static int o2hb_thread(void *data)
913 * likely to time itself out. */ 934 * likely to time itself out. */
914 do_gettimeofday(&before_hb); 935 do_gettimeofday(&before_hb);
915 936
916 o2hb_do_disk_heartbeat(reg); 937 i = 0;
938 do {
939 ret = o2hb_do_disk_heartbeat(reg);
940 } while (ret && ++i < 2);
917 941
918 do_gettimeofday(&after_hb); 942 do_gettimeofday(&after_hb);
919 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb); 943 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c
index c3764f4744ee..74ca4e5f9765 100644
--- a/fs/ocfs2/dlm/userdlm.c
+++ b/fs/ocfs2/dlm/userdlm.c
@@ -139,6 +139,10 @@ static void user_ast(void *opaque)
139 return; 139 return;
140 } 140 }
141 141
142 mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE,
143 "Lockres %s, requested ivmode. flags 0x%x\n",
144 lockres->l_name, lockres->l_flags);
145
142 /* we're downconverting. */ 146 /* we're downconverting. */
143 if (lockres->l_requested < lockres->l_level) { 147 if (lockres->l_requested < lockres->l_level) {
144 if (lockres->l_requested <= 148 if (lockres->l_requested <=
@@ -229,23 +233,42 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
229 233
230 mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name); 234 mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name);
231 235
232 if (status != DLM_NORMAL) 236 if (status != DLM_NORMAL && status != DLM_CANCELGRANT)
233 mlog(ML_ERROR, "Dlm returns status %d\n", status); 237 mlog(ML_ERROR, "Dlm returns status %d\n", status);
234 238
235 spin_lock(&lockres->l_lock); 239 spin_lock(&lockres->l_lock);
236 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) 240 /* The teardown flag gets set early during the unlock process,
241 * so test the cancel flag to make sure that this ast isn't
242 * for a concurrent cancel. */
243 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN
244 && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) {
237 lockres->l_level = LKM_IVMODE; 245 lockres->l_level = LKM_IVMODE;
238 else { 246 } else if (status == DLM_CANCELGRANT) {
247 mlog(0, "Lock %s, cancel fails, flags 0x%x\n",
248 lockres->l_name, lockres->l_flags);
249 /* We tried to cancel a convert request, but it was
250 * already granted. Don't clear the busy flag - the
251 * ast should've done this already. */
252 BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
253 lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
254 goto out_noclear;
255 } else {
256 BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
257 /* Cancel succeeded, we want to re-queue */
258 mlog(0, "Lock %s, cancel succeeds, flags 0x%x\n",
259 lockres->l_name, lockres->l_flags);
239 lockres->l_requested = LKM_IVMODE; /* cancel an 260 lockres->l_requested = LKM_IVMODE; /* cancel an
240 * upconvert 261 * upconvert
241 * request. */ 262 * request. */
242 lockres->l_flags &= ~USER_LOCK_IN_CANCEL; 263 lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
243 /* we want the unblock thread to look at it again 264 /* we want the unblock thread to look at it again
244 * now. */ 265 * now. */
245 __user_dlm_queue_lockres(lockres); 266 if (lockres->l_flags & USER_LOCK_BLOCKED)
267 __user_dlm_queue_lockres(lockres);
246 } 268 }
247 269
248 lockres->l_flags &= ~USER_LOCK_BUSY; 270 lockres->l_flags &= ~USER_LOCK_BUSY;
271out_noclear:
249 spin_unlock(&lockres->l_lock); 272 spin_unlock(&lockres->l_lock);
250 273
251 wake_up(&lockres->l_event); 274 wake_up(&lockres->l_event);
@@ -268,13 +291,26 @@ static void user_dlm_unblock_lock(void *opaque)
268 291
269 spin_lock(&lockres->l_lock); 292 spin_lock(&lockres->l_lock);
270 293
271 BUG_ON(!(lockres->l_flags & USER_LOCK_BLOCKED)); 294 mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED),
272 BUG_ON(!(lockres->l_flags & USER_LOCK_QUEUED)); 295 "Lockres %s, flags 0x%x\n",
296 lockres->l_name, lockres->l_flags);
273 297
274 /* notice that we don't clear USER_LOCK_BLOCKED here. That's 298 /* notice that we don't clear USER_LOCK_BLOCKED here. If it's
275 * for user_ast to do. */ 299 * set, we want user_ast clear it. */
276 lockres->l_flags &= ~USER_LOCK_QUEUED; 300 lockres->l_flags &= ~USER_LOCK_QUEUED;
277 301
302 /* It's valid to get here and no longer be blocked - if we get
303 * several basts in a row, we might be queued by the first
304 * one, the unblock thread might run and clear the queued
305 * flag, and finally we might get another bast which re-queues
306 * us before our ast for the downconvert is called. */
307 if (!(lockres->l_flags & USER_LOCK_BLOCKED)) {
308 mlog(0, "Lockres %s, flags 0x%x: queued but not blocking\n",
309 lockres->l_name, lockres->l_flags);
310 spin_unlock(&lockres->l_lock);
311 goto drop_ref;
312 }
313
278 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { 314 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
279 mlog(0, "lock is in teardown so we do nothing\n"); 315 mlog(0, "lock is in teardown so we do nothing\n");
280 spin_unlock(&lockres->l_lock); 316 spin_unlock(&lockres->l_lock);
@@ -282,7 +318,9 @@ static void user_dlm_unblock_lock(void *opaque)
282 } 318 }
283 319
284 if (lockres->l_flags & USER_LOCK_BUSY) { 320 if (lockres->l_flags & USER_LOCK_BUSY) {
285 mlog(0, "BUSY flag detected...\n"); 321 mlog(0, "Cancel lock %s, flags 0x%x\n",
322 lockres->l_name, lockres->l_flags);
323
286 if (lockres->l_flags & USER_LOCK_IN_CANCEL) { 324 if (lockres->l_flags & USER_LOCK_IN_CANCEL) {
287 spin_unlock(&lockres->l_lock); 325 spin_unlock(&lockres->l_lock);
288 goto drop_ref; 326 goto drop_ref;
@@ -296,14 +334,7 @@ static void user_dlm_unblock_lock(void *opaque)
296 LKM_CANCEL, 334 LKM_CANCEL,
297 user_unlock_ast, 335 user_unlock_ast,
298 lockres); 336 lockres);
299 if (status == DLM_CANCELGRANT) { 337 if (status != DLM_NORMAL)
300 /* If we got this, then the ast was fired
301 * before we could cancel. We cleanup our
302 * state, and restart the function. */
303 spin_lock(&lockres->l_lock);
304 lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
305 spin_unlock(&lockres->l_lock);
306 } else if (status != DLM_NORMAL)
307 user_log_dlm_error("dlmunlock", status, lockres); 338 user_log_dlm_error("dlmunlock", status, lockres);
308 goto drop_ref; 339 goto drop_ref;
309 } 340 }
@@ -581,6 +612,14 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
581 mlog(0, "asked to destroy %s\n", lockres->l_name); 612 mlog(0, "asked to destroy %s\n", lockres->l_name);
582 613
583 spin_lock(&lockres->l_lock); 614 spin_lock(&lockres->l_lock);
615 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
616 mlog(0, "Lock is already torn down\n");
617 spin_unlock(&lockres->l_lock);
618 return 0;
619 }
620
621 lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
622
584 while (lockres->l_flags & USER_LOCK_BUSY) { 623 while (lockres->l_flags & USER_LOCK_BUSY) {
585 spin_unlock(&lockres->l_lock); 624 spin_unlock(&lockres->l_lock);
586 625
@@ -606,7 +645,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
606 645
607 lockres->l_flags &= ~USER_LOCK_ATTACHED; 646 lockres->l_flags &= ~USER_LOCK_ATTACHED;
608 lockres->l_flags |= USER_LOCK_BUSY; 647 lockres->l_flags |= USER_LOCK_BUSY;
609 lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
610 spin_unlock(&lockres->l_lock); 648 spin_unlock(&lockres->l_lock);
611 649
612 mlog(0, "unlocking lockres %s\n", lockres->l_name); 650 mlog(0, "unlocking lockres %s\n", lockres->l_name);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 34e903a6a46b..581eb451a41a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -260,6 +260,17 @@ static int ocfs2_truncate_file(struct inode *inode,
260 if (new_i_size == le64_to_cpu(fe->i_size)) 260 if (new_i_size == le64_to_cpu(fe->i_size))
261 goto bail; 261 goto bail;
262 262
263 /* This forces other nodes to sync and drop their pages. Do
264 * this even if we have a truncate without allocation change -
265 * ocfs2 cluster sizes can be much greater than page size, so
266 * we have to truncate them anyway. */
267 status = ocfs2_data_lock(inode, 1);
268 if (status < 0) {
269 mlog_errno(status);
270 goto bail;
271 }
272 ocfs2_data_unlock(inode, 1);
273
263 if (le32_to_cpu(fe->i_clusters) == 274 if (le32_to_cpu(fe->i_clusters) ==
264 ocfs2_clusters_for_bytes(osb->sb, new_i_size)) { 275 ocfs2_clusters_for_bytes(osb->sb, new_i_size)) {
265 mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n", 276 mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n",
@@ -272,14 +283,6 @@ static int ocfs2_truncate_file(struct inode *inode,
272 goto bail; 283 goto bail;
273 } 284 }
274 285
275 /* This forces other nodes to sync and drop their pages */
276 status = ocfs2_data_lock(inode, 1);
277 if (status < 0) {
278 mlog_errno(status);
279 goto bail;
280 }
281 ocfs2_data_unlock(inode, 1);
282
283 /* alright, we're going to need to do a full blown alloc size 286 /* alright, we're going to need to do a full blown alloc size
284 * change. Orphan the inode so that recovery can complete the 287 * change. Orphan the inode so that recovery can complete the
285 * truncate if necessary. This does the task of marking 288 * truncate if necessary. This does the task of marking
diff --git a/fs/pipe.c b/fs/pipe.c
index e2f4f1d9ffc2..e984beb93a0e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -15,6 +15,7 @@
15#include <linux/pipe_fs_i.h> 15#include <linux/pipe_fs_i.h>
16#include <linux/uio.h> 16#include <linux/uio.h>
17#include <linux/highmem.h> 17#include <linux/highmem.h>
18#include <linux/pagemap.h>
18 19
19#include <asm/uaccess.h> 20#include <asm/uaccess.h>
20#include <asm/ioctls.h> 21#include <asm/ioctls.h>
@@ -35,7 +36,7 @@
35 */ 36 */
36 37
37/* Drop the inode semaphore and wait for a pipe event, atomically */ 38/* Drop the inode semaphore and wait for a pipe event, atomically */
38void pipe_wait(struct inode * inode) 39void pipe_wait(struct pipe_inode_info *pipe)
39{ 40{
40 DEFINE_WAIT(wait); 41 DEFINE_WAIT(wait);
41 42
@@ -43,11 +44,14 @@ void pipe_wait(struct inode * inode)
43 * Pipes are system-local resources, so sleeping on them 44 * Pipes are system-local resources, so sleeping on them
44 * is considered a noninteractive wait: 45 * is considered a noninteractive wait:
45 */ 46 */
46 prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE); 47 prepare_to_wait(&pipe->wait, &wait,
47 mutex_unlock(PIPE_MUTEX(*inode)); 48 TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE);
49 if (pipe->inode)
50 mutex_unlock(&pipe->inode->i_mutex);
48 schedule(); 51 schedule();
49 finish_wait(PIPE_WAIT(*inode), &wait); 52 finish_wait(&pipe->wait, &wait);
50 mutex_lock(PIPE_MUTEX(*inode)); 53 if (pipe->inode)
54 mutex_lock(&pipe->inode->i_mutex);
51} 55}
52 56
53static int 57static int
@@ -90,32 +94,49 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
90 return 0; 94 return 0;
91} 95}
92 96
93static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf) 97static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
98 struct pipe_buffer *buf)
94{ 99{
95 struct page *page = buf->page; 100 struct page *page = buf->page;
96 101
97 if (info->tmp_page) { 102 buf->flags &= ~PIPE_BUF_FLAG_STOLEN;
98 __free_page(page); 103
99 return; 104 /*
100 } 105 * If nobody else uses this page, and we don't already have a
101 info->tmp_page = page; 106 * temporary page, let's keep track of it as a one-deep
107 * allocation cache. (Otherwise just release our reference to it)
108 */
109 if (page_count(page) == 1 && !pipe->tmp_page)
110 pipe->tmp_page = page;
111 else
112 page_cache_release(page);
102} 113}
103 114
104static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf) 115static void * anon_pipe_buf_map(struct file *file, struct pipe_inode_info *pipe,
116 struct pipe_buffer *buf)
105{ 117{
106 return kmap(buf->page); 118 return kmap(buf->page);
107} 119}
108 120
109static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf) 121static void anon_pipe_buf_unmap(struct pipe_inode_info *pipe,
122 struct pipe_buffer *buf)
110{ 123{
111 kunmap(buf->page); 124 kunmap(buf->page);
112} 125}
113 126
127static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
128 struct pipe_buffer *buf)
129{
130 buf->flags |= PIPE_BUF_FLAG_STOLEN;
131 return 0;
132}
133
114static struct pipe_buf_operations anon_pipe_buf_ops = { 134static struct pipe_buf_operations anon_pipe_buf_ops = {
115 .can_merge = 1, 135 .can_merge = 1,
116 .map = anon_pipe_buf_map, 136 .map = anon_pipe_buf_map,
117 .unmap = anon_pipe_buf_unmap, 137 .unmap = anon_pipe_buf_unmap,
118 .release = anon_pipe_buf_release, 138 .release = anon_pipe_buf_release,
139 .steal = anon_pipe_buf_steal,
119}; 140};
120 141
121static ssize_t 142static ssize_t
@@ -123,7 +144,7 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
123 unsigned long nr_segs, loff_t *ppos) 144 unsigned long nr_segs, loff_t *ppos)
124{ 145{
125 struct inode *inode = filp->f_dentry->d_inode; 146 struct inode *inode = filp->f_dentry->d_inode;
126 struct pipe_inode_info *info; 147 struct pipe_inode_info *pipe;
127 int do_wakeup; 148 int do_wakeup;
128 ssize_t ret; 149 ssize_t ret;
129 struct iovec *iov = (struct iovec *)_iov; 150 struct iovec *iov = (struct iovec *)_iov;
@@ -136,13 +157,13 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
136 157
137 do_wakeup = 0; 158 do_wakeup = 0;
138 ret = 0; 159 ret = 0;
139 mutex_lock(PIPE_MUTEX(*inode)); 160 mutex_lock(&inode->i_mutex);
140 info = inode->i_pipe; 161 pipe = inode->i_pipe;
141 for (;;) { 162 for (;;) {
142 int bufs = info->nrbufs; 163 int bufs = pipe->nrbufs;
143 if (bufs) { 164 if (bufs) {
144 int curbuf = info->curbuf; 165 int curbuf = pipe->curbuf;
145 struct pipe_buffer *buf = info->bufs + curbuf; 166 struct pipe_buffer *buf = pipe->bufs + curbuf;
146 struct pipe_buf_operations *ops = buf->ops; 167 struct pipe_buf_operations *ops = buf->ops;
147 void *addr; 168 void *addr;
148 size_t chars = buf->len; 169 size_t chars = buf->len;
@@ -151,11 +172,17 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
151 if (chars > total_len) 172 if (chars > total_len)
152 chars = total_len; 173 chars = total_len;
153 174
154 addr = ops->map(filp, info, buf); 175 addr = ops->map(filp, pipe, buf);
176 if (IS_ERR(addr)) {
177 if (!ret)
178 ret = PTR_ERR(addr);
179 break;
180 }
155 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars); 181 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
156 ops->unmap(info, buf); 182 ops->unmap(pipe, buf);
157 if (unlikely(error)) { 183 if (unlikely(error)) {
158 if (!ret) ret = -EFAULT; 184 if (!ret)
185 ret = -EFAULT;
159 break; 186 break;
160 } 187 }
161 ret += chars; 188 ret += chars;
@@ -163,10 +190,10 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
163 buf->len -= chars; 190 buf->len -= chars;
164 if (!buf->len) { 191 if (!buf->len) {
165 buf->ops = NULL; 192 buf->ops = NULL;
166 ops->release(info, buf); 193 ops->release(pipe, buf);
167 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1); 194 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
168 info->curbuf = curbuf; 195 pipe->curbuf = curbuf;
169 info->nrbufs = --bufs; 196 pipe->nrbufs = --bufs;
170 do_wakeup = 1; 197 do_wakeup = 1;
171 } 198 }
172 total_len -= chars; 199 total_len -= chars;
@@ -175,9 +202,9 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
175 } 202 }
176 if (bufs) /* More to do? */ 203 if (bufs) /* More to do? */
177 continue; 204 continue;
178 if (!PIPE_WRITERS(*inode)) 205 if (!pipe->writers)
179 break; 206 break;
180 if (!PIPE_WAITING_WRITERS(*inode)) { 207 if (!pipe->waiting_writers) {
181 /* syscall merging: Usually we must not sleep 208 /* syscall merging: Usually we must not sleep
182 * if O_NONBLOCK is set, or if we got some data. 209 * if O_NONBLOCK is set, or if we got some data.
183 * But if a writer sleeps in kernel space, then 210 * But if a writer sleeps in kernel space, then
@@ -191,20 +218,22 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
191 } 218 }
192 } 219 }
193 if (signal_pending(current)) { 220 if (signal_pending(current)) {
194 if (!ret) ret = -ERESTARTSYS; 221 if (!ret)
222 ret = -ERESTARTSYS;
195 break; 223 break;
196 } 224 }
197 if (do_wakeup) { 225 if (do_wakeup) {
198 wake_up_interruptible_sync(PIPE_WAIT(*inode)); 226 wake_up_interruptible_sync(&pipe->wait);
199 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 227 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
200 } 228 }
201 pipe_wait(inode); 229 pipe_wait(pipe);
202 } 230 }
203 mutex_unlock(PIPE_MUTEX(*inode)); 231 mutex_unlock(&inode->i_mutex);
204 /* Signal writers asynchronously that there is more room. */ 232
233 /* Signal writers asynchronously that there is more room. */
205 if (do_wakeup) { 234 if (do_wakeup) {
206 wake_up_interruptible(PIPE_WAIT(*inode)); 235 wake_up_interruptible(&pipe->wait);
207 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 236 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
208 } 237 }
209 if (ret > 0) 238 if (ret > 0)
210 file_accessed(filp); 239 file_accessed(filp);
@@ -215,6 +244,7 @@ static ssize_t
215pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) 244pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
216{ 245{
217 struct iovec iov = { .iov_base = buf, .iov_len = count }; 246 struct iovec iov = { .iov_base = buf, .iov_len = count };
247
218 return pipe_readv(filp, &iov, 1, ppos); 248 return pipe_readv(filp, &iov, 1, ppos);
219} 249}
220 250
@@ -223,7 +253,7 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
223 unsigned long nr_segs, loff_t *ppos) 253 unsigned long nr_segs, loff_t *ppos)
224{ 254{
225 struct inode *inode = filp->f_dentry->d_inode; 255 struct inode *inode = filp->f_dentry->d_inode;
226 struct pipe_inode_info *info; 256 struct pipe_inode_info *pipe;
227 ssize_t ret; 257 ssize_t ret;
228 int do_wakeup; 258 int do_wakeup;
229 struct iovec *iov = (struct iovec *)_iov; 259 struct iovec *iov = (struct iovec *)_iov;
@@ -237,10 +267,10 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
237 267
238 do_wakeup = 0; 268 do_wakeup = 0;
239 ret = 0; 269 ret = 0;
240 mutex_lock(PIPE_MUTEX(*inode)); 270 mutex_lock(&inode->i_mutex);
241 info = inode->i_pipe; 271 pipe = inode->i_pipe;
242 272
243 if (!PIPE_READERS(*inode)) { 273 if (!pipe->readers) {
244 send_sig(SIGPIPE, current, 0); 274 send_sig(SIGPIPE, current, 0);
245 ret = -EPIPE; 275 ret = -EPIPE;
246 goto out; 276 goto out;
@@ -248,15 +278,25 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
248 278
249 /* We try to merge small writes */ 279 /* We try to merge small writes */
250 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ 280 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
251 if (info->nrbufs && chars != 0) { 281 if (pipe->nrbufs && chars != 0) {
252 int lastbuf = (info->curbuf + info->nrbufs - 1) & (PIPE_BUFFERS-1); 282 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
253 struct pipe_buffer *buf = info->bufs + lastbuf; 283 (PIPE_BUFFERS-1);
284 struct pipe_buffer *buf = pipe->bufs + lastbuf;
254 struct pipe_buf_operations *ops = buf->ops; 285 struct pipe_buf_operations *ops = buf->ops;
255 int offset = buf->offset + buf->len; 286 int offset = buf->offset + buf->len;
287
256 if (ops->can_merge && offset + chars <= PAGE_SIZE) { 288 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
257 void *addr = ops->map(filp, info, buf); 289 void *addr;
258 int error = pipe_iov_copy_from_user(offset + addr, iov, chars); 290 int error;
259 ops->unmap(info, buf); 291
292 addr = ops->map(filp, pipe, buf);
293 if (IS_ERR(addr)) {
294 error = PTR_ERR(addr);
295 goto out;
296 }
297 error = pipe_iov_copy_from_user(offset + addr, iov,
298 chars);
299 ops->unmap(pipe, buf);
260 ret = error; 300 ret = error;
261 do_wakeup = 1; 301 do_wakeup = 1;
262 if (error) 302 if (error)
@@ -271,16 +311,18 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
271 311
272 for (;;) { 312 for (;;) {
273 int bufs; 313 int bufs;
274 if (!PIPE_READERS(*inode)) { 314
315 if (!pipe->readers) {
275 send_sig(SIGPIPE, current, 0); 316 send_sig(SIGPIPE, current, 0);
276 if (!ret) ret = -EPIPE; 317 if (!ret)
318 ret = -EPIPE;
277 break; 319 break;
278 } 320 }
279 bufs = info->nrbufs; 321 bufs = pipe->nrbufs;
280 if (bufs < PIPE_BUFFERS) { 322 if (bufs < PIPE_BUFFERS) {
281 int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS-1); 323 int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1);
282 struct pipe_buffer *buf = info->bufs + newbuf; 324 struct pipe_buffer *buf = pipe->bufs + newbuf;
283 struct page *page = info->tmp_page; 325 struct page *page = pipe->tmp_page;
284 int error; 326 int error;
285 327
286 if (!page) { 328 if (!page) {
@@ -289,9 +331,9 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
289 ret = ret ? : -ENOMEM; 331 ret = ret ? : -ENOMEM;
290 break; 332 break;
291 } 333 }
292 info->tmp_page = page; 334 pipe->tmp_page = page;
293 } 335 }
294 /* Always wakeup, even if the copy fails. Otherwise 336 /* Always wake up, even if the copy fails. Otherwise
295 * we lock up (O_NONBLOCK-)readers that sleep due to 337 * we lock up (O_NONBLOCK-)readers that sleep due to
296 * syscall merging. 338 * syscall merging.
297 * FIXME! Is this really true? 339 * FIXME! Is this really true?
@@ -304,7 +346,8 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
304 error = pipe_iov_copy_from_user(kmap(page), iov, chars); 346 error = pipe_iov_copy_from_user(kmap(page), iov, chars);
305 kunmap(page); 347 kunmap(page);
306 if (unlikely(error)) { 348 if (unlikely(error)) {
307 if (!ret) ret = -EFAULT; 349 if (!ret)
350 ret = -EFAULT;
308 break; 351 break;
309 } 352 }
310 ret += chars; 353 ret += chars;
@@ -314,8 +357,8 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
314 buf->ops = &anon_pipe_buf_ops; 357 buf->ops = &anon_pipe_buf_ops;
315 buf->offset = 0; 358 buf->offset = 0;
316 buf->len = chars; 359 buf->len = chars;
317 info->nrbufs = ++bufs; 360 pipe->nrbufs = ++bufs;
318 info->tmp_page = NULL; 361 pipe->tmp_page = NULL;
319 362
320 total_len -= chars; 363 total_len -= chars;
321 if (!total_len) 364 if (!total_len)
@@ -324,27 +367,29 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
324 if (bufs < PIPE_BUFFERS) 367 if (bufs < PIPE_BUFFERS)
325 continue; 368 continue;
326 if (filp->f_flags & O_NONBLOCK) { 369 if (filp->f_flags & O_NONBLOCK) {
327 if (!ret) ret = -EAGAIN; 370 if (!ret)
371 ret = -EAGAIN;
328 break; 372 break;
329 } 373 }
330 if (signal_pending(current)) { 374 if (signal_pending(current)) {
331 if (!ret) ret = -ERESTARTSYS; 375 if (!ret)
376 ret = -ERESTARTSYS;
332 break; 377 break;
333 } 378 }
334 if (do_wakeup) { 379 if (do_wakeup) {
335 wake_up_interruptible_sync(PIPE_WAIT(*inode)); 380 wake_up_interruptible_sync(&pipe->wait);
336 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 381 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
337 do_wakeup = 0; 382 do_wakeup = 0;
338 } 383 }
339 PIPE_WAITING_WRITERS(*inode)++; 384 pipe->waiting_writers++;
340 pipe_wait(inode); 385 pipe_wait(pipe);
341 PIPE_WAITING_WRITERS(*inode)--; 386 pipe->waiting_writers--;
342 } 387 }
343out: 388out:
344 mutex_unlock(PIPE_MUTEX(*inode)); 389 mutex_unlock(&inode->i_mutex);
345 if (do_wakeup) { 390 if (do_wakeup) {
346 wake_up_interruptible(PIPE_WAIT(*inode)); 391 wake_up_interruptible(&pipe->wait);
347 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 392 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
348 } 393 }
349 if (ret > 0) 394 if (ret > 0)
350 file_update_time(filp); 395 file_update_time(filp);
@@ -356,6 +401,7 @@ pipe_write(struct file *filp, const char __user *buf,
356 size_t count, loff_t *ppos) 401 size_t count, loff_t *ppos)
357{ 402{
358 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; 403 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
404
359 return pipe_writev(filp, &iov, 1, ppos); 405 return pipe_writev(filp, &iov, 1, ppos);
360} 406}
361 407
@@ -366,7 +412,8 @@ bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
366} 412}
367 413
368static ssize_t 414static ssize_t
369bad_pipe_w(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) 415bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
416 loff_t *ppos)
370{ 417{
371 return -EBADF; 418 return -EBADF;
372} 419}
@@ -376,21 +423,22 @@ pipe_ioctl(struct inode *pino, struct file *filp,
376 unsigned int cmd, unsigned long arg) 423 unsigned int cmd, unsigned long arg)
377{ 424{
378 struct inode *inode = filp->f_dentry->d_inode; 425 struct inode *inode = filp->f_dentry->d_inode;
379 struct pipe_inode_info *info; 426 struct pipe_inode_info *pipe;
380 int count, buf, nrbufs; 427 int count, buf, nrbufs;
381 428
382 switch (cmd) { 429 switch (cmd) {
383 case FIONREAD: 430 case FIONREAD:
384 mutex_lock(PIPE_MUTEX(*inode)); 431 mutex_lock(&inode->i_mutex);
385 info = inode->i_pipe; 432 pipe = inode->i_pipe;
386 count = 0; 433 count = 0;
387 buf = info->curbuf; 434 buf = pipe->curbuf;
388 nrbufs = info->nrbufs; 435 nrbufs = pipe->nrbufs;
389 while (--nrbufs >= 0) { 436 while (--nrbufs >= 0) {
390 count += info->bufs[buf].len; 437 count += pipe->bufs[buf].len;
391 buf = (buf+1) & (PIPE_BUFFERS-1); 438 buf = (buf+1) & (PIPE_BUFFERS-1);
392 } 439 }
393 mutex_unlock(PIPE_MUTEX(*inode)); 440 mutex_unlock(&inode->i_mutex);
441
394 return put_user(count, (int __user *)arg); 442 return put_user(count, (int __user *)arg);
395 default: 443 default:
396 return -EINVAL; 444 return -EINVAL;
@@ -403,17 +451,17 @@ pipe_poll(struct file *filp, poll_table *wait)
403{ 451{
404 unsigned int mask; 452 unsigned int mask;
405 struct inode *inode = filp->f_dentry->d_inode; 453 struct inode *inode = filp->f_dentry->d_inode;
406 struct pipe_inode_info *info = inode->i_pipe; 454 struct pipe_inode_info *pipe = inode->i_pipe;
407 int nrbufs; 455 int nrbufs;
408 456
409 poll_wait(filp, PIPE_WAIT(*inode), wait); 457 poll_wait(filp, &pipe->wait, wait);
410 458
411 /* Reading only -- no need for acquiring the semaphore. */ 459 /* Reading only -- no need for acquiring the semaphore. */
412 nrbufs = info->nrbufs; 460 nrbufs = pipe->nrbufs;
413 mask = 0; 461 mask = 0;
414 if (filp->f_mode & FMODE_READ) { 462 if (filp->f_mode & FMODE_READ) {
415 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0; 463 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
416 if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode)) 464 if (!pipe->writers && filp->f_version != pipe->w_counter)
417 mask |= POLLHUP; 465 mask |= POLLHUP;
418 } 466 }
419 467
@@ -423,7 +471,7 @@ pipe_poll(struct file *filp, poll_table *wait)
423 * Most Unices do not set POLLERR for FIFOs but on Linux they 471 * Most Unices do not set POLLERR for FIFOs but on Linux they
424 * behave exactly like pipes for poll(). 472 * behave exactly like pipes for poll().
425 */ 473 */
426 if (!PIPE_READERS(*inode)) 474 if (!pipe->readers)
427 mask |= POLLERR; 475 mask |= POLLERR;
428 } 476 }
429 477
@@ -433,17 +481,21 @@ pipe_poll(struct file *filp, poll_table *wait)
433static int 481static int
434pipe_release(struct inode *inode, int decr, int decw) 482pipe_release(struct inode *inode, int decr, int decw)
435{ 483{
436 mutex_lock(PIPE_MUTEX(*inode)); 484 struct pipe_inode_info *pipe;
437 PIPE_READERS(*inode) -= decr; 485
438 PIPE_WRITERS(*inode) -= decw; 486 mutex_lock(&inode->i_mutex);
439 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) { 487 pipe = inode->i_pipe;
488 pipe->readers -= decr;
489 pipe->writers -= decw;
490
491 if (!pipe->readers && !pipe->writers) {
440 free_pipe_info(inode); 492 free_pipe_info(inode);
441 } else { 493 } else {
442 wake_up_interruptible(PIPE_WAIT(*inode)); 494 wake_up_interruptible(&pipe->wait);
443 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 495 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
444 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 496 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
445 } 497 }
446 mutex_unlock(PIPE_MUTEX(*inode)); 498 mutex_unlock(&inode->i_mutex);
447 499
448 return 0; 500 return 0;
449} 501}
@@ -454,9 +506,9 @@ pipe_read_fasync(int fd, struct file *filp, int on)
454 struct inode *inode = filp->f_dentry->d_inode; 506 struct inode *inode = filp->f_dentry->d_inode;
455 int retval; 507 int retval;
456 508
457 mutex_lock(PIPE_MUTEX(*inode)); 509 mutex_lock(&inode->i_mutex);
458 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode)); 510 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
459 mutex_unlock(PIPE_MUTEX(*inode)); 511 mutex_unlock(&inode->i_mutex);
460 512
461 if (retval < 0) 513 if (retval < 0)
462 return retval; 514 return retval;
@@ -471,9 +523,9 @@ pipe_write_fasync(int fd, struct file *filp, int on)
471 struct inode *inode = filp->f_dentry->d_inode; 523 struct inode *inode = filp->f_dentry->d_inode;
472 int retval; 524 int retval;
473 525
474 mutex_lock(PIPE_MUTEX(*inode)); 526 mutex_lock(&inode->i_mutex);
475 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode)); 527 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers);
476 mutex_unlock(PIPE_MUTEX(*inode)); 528 mutex_unlock(&inode->i_mutex);
477 529
478 if (retval < 0) 530 if (retval < 0)
479 return retval; 531 return retval;
@@ -486,16 +538,17 @@ static int
486pipe_rdwr_fasync(int fd, struct file *filp, int on) 538pipe_rdwr_fasync(int fd, struct file *filp, int on)
487{ 539{
488 struct inode *inode = filp->f_dentry->d_inode; 540 struct inode *inode = filp->f_dentry->d_inode;
541 struct pipe_inode_info *pipe = inode->i_pipe;
489 int retval; 542 int retval;
490 543
491 mutex_lock(PIPE_MUTEX(*inode)); 544 mutex_lock(&inode->i_mutex);
492 545
493 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode)); 546 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
494 547
495 if (retval >= 0) 548 if (retval >= 0)
496 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode)); 549 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
497 550
498 mutex_unlock(PIPE_MUTEX(*inode)); 551 mutex_unlock(&inode->i_mutex);
499 552
500 if (retval < 0) 553 if (retval < 0)
501 return retval; 554 return retval;
@@ -534,9 +587,9 @@ pipe_read_open(struct inode *inode, struct file *filp)
534{ 587{
535 /* We could have perhaps used atomic_t, but this and friends 588 /* We could have perhaps used atomic_t, but this and friends
536 below are the only places. So it doesn't seem worthwhile. */ 589 below are the only places. So it doesn't seem worthwhile. */
537 mutex_lock(PIPE_MUTEX(*inode)); 590 mutex_lock(&inode->i_mutex);
538 PIPE_READERS(*inode)++; 591 inode->i_pipe->readers++;
539 mutex_unlock(PIPE_MUTEX(*inode)); 592 mutex_unlock(&inode->i_mutex);
540 593
541 return 0; 594 return 0;
542} 595}
@@ -544,9 +597,9 @@ pipe_read_open(struct inode *inode, struct file *filp)
544static int 597static int
545pipe_write_open(struct inode *inode, struct file *filp) 598pipe_write_open(struct inode *inode, struct file *filp)
546{ 599{
547 mutex_lock(PIPE_MUTEX(*inode)); 600 mutex_lock(&inode->i_mutex);
548 PIPE_WRITERS(*inode)++; 601 inode->i_pipe->writers++;
549 mutex_unlock(PIPE_MUTEX(*inode)); 602 mutex_unlock(&inode->i_mutex);
550 603
551 return 0; 604 return 0;
552} 605}
@@ -554,12 +607,12 @@ pipe_write_open(struct inode *inode, struct file *filp)
554static int 607static int
555pipe_rdwr_open(struct inode *inode, struct file *filp) 608pipe_rdwr_open(struct inode *inode, struct file *filp)
556{ 609{
557 mutex_lock(PIPE_MUTEX(*inode)); 610 mutex_lock(&inode->i_mutex);
558 if (filp->f_mode & FMODE_READ) 611 if (filp->f_mode & FMODE_READ)
559 PIPE_READERS(*inode)++; 612 inode->i_pipe->readers++;
560 if (filp->f_mode & FMODE_WRITE) 613 if (filp->f_mode & FMODE_WRITE)
561 PIPE_WRITERS(*inode)++; 614 inode->i_pipe->writers++;
562 mutex_unlock(PIPE_MUTEX(*inode)); 615 mutex_unlock(&inode->i_mutex);
563 616
564 return 0; 617 return 0;
565} 618}
@@ -642,37 +695,38 @@ static struct file_operations rdwr_pipe_fops = {
642 .fasync = pipe_rdwr_fasync, 695 .fasync = pipe_rdwr_fasync,
643}; 696};
644 697
645void free_pipe_info(struct inode *inode) 698struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
699{
700 struct pipe_inode_info *pipe;
701
702 pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
703 if (pipe) {
704 init_waitqueue_head(&pipe->wait);
705 pipe->r_counter = pipe->w_counter = 1;
706 pipe->inode = inode;
707 }
708
709 return pipe;
710}
711
712void __free_pipe_info(struct pipe_inode_info *pipe)
646{ 713{
647 int i; 714 int i;
648 struct pipe_inode_info *info = inode->i_pipe;
649 715
650 inode->i_pipe = NULL;
651 for (i = 0; i < PIPE_BUFFERS; i++) { 716 for (i = 0; i < PIPE_BUFFERS; i++) {
652 struct pipe_buffer *buf = info->bufs + i; 717 struct pipe_buffer *buf = pipe->bufs + i;
653 if (buf->ops) 718 if (buf->ops)
654 buf->ops->release(info, buf); 719 buf->ops->release(pipe, buf);
655 } 720 }
656 if (info->tmp_page) 721 if (pipe->tmp_page)
657 __free_page(info->tmp_page); 722 __free_page(pipe->tmp_page);
658 kfree(info); 723 kfree(pipe);
659} 724}
660 725
661struct inode* pipe_new(struct inode* inode) 726void free_pipe_info(struct inode *inode)
662{ 727{
663 struct pipe_inode_info *info; 728 __free_pipe_info(inode->i_pipe);
664 729 inode->i_pipe = NULL;
665 info = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
666 if (!info)
667 goto fail_page;
668 inode->i_pipe = info;
669
670 init_waitqueue_head(PIPE_WAIT(*inode));
671 PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
672
673 return inode;
674fail_page:
675 return NULL;
676} 730}
677 731
678static struct vfsmount *pipe_mnt __read_mostly; 732static struct vfsmount *pipe_mnt __read_mostly;
@@ -680,6 +734,7 @@ static int pipefs_delete_dentry(struct dentry *dentry)
680{ 734{
681 return 1; 735 return 1;
682} 736}
737
683static struct dentry_operations pipefs_dentry_operations = { 738static struct dentry_operations pipefs_dentry_operations = {
684 .d_delete = pipefs_delete_dentry, 739 .d_delete = pipefs_delete_dentry,
685}; 740};
@@ -687,13 +742,17 @@ static struct dentry_operations pipefs_dentry_operations = {
687static struct inode * get_pipe_inode(void) 742static struct inode * get_pipe_inode(void)
688{ 743{
689 struct inode *inode = new_inode(pipe_mnt->mnt_sb); 744 struct inode *inode = new_inode(pipe_mnt->mnt_sb);
745 struct pipe_inode_info *pipe;
690 746
691 if (!inode) 747 if (!inode)
692 goto fail_inode; 748 goto fail_inode;
693 749
694 if(!pipe_new(inode)) 750 pipe = alloc_pipe_info(inode);
751 if (!pipe)
695 goto fail_iput; 752 goto fail_iput;
696 PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1; 753 inode->i_pipe = pipe;
754
755 pipe->readers = pipe->writers = 1;
697 inode->i_fop = &rdwr_pipe_fops; 756 inode->i_fop = &rdwr_pipe_fops;
698 757
699 /* 758 /*
@@ -708,10 +767,12 @@ static struct inode * get_pipe_inode(void)
708 inode->i_gid = current->fsgid; 767 inode->i_gid = current->fsgid;
709 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 768 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
710 inode->i_blksize = PAGE_SIZE; 769 inode->i_blksize = PAGE_SIZE;
770
711 return inode; 771 return inode;
712 772
713fail_iput: 773fail_iput:
714 iput(inode); 774 iput(inode);
775
715fail_inode: 776fail_inode:
716 return NULL; 777 return NULL;
717} 778}
@@ -724,7 +785,7 @@ int do_pipe(int *fd)
724 struct inode * inode; 785 struct inode * inode;
725 struct file *f1, *f2; 786 struct file *f1, *f2;
726 int error; 787 int error;
727 int i,j; 788 int i, j;
728 789
729 error = -ENFILE; 790 error = -ENFILE;
730 f1 = get_empty_filp(); 791 f1 = get_empty_filp();
@@ -757,6 +818,7 @@ int do_pipe(int *fd)
757 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this); 818 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
758 if (!dentry) 819 if (!dentry)
759 goto close_f12_inode_i_j; 820 goto close_f12_inode_i_j;
821
760 dentry->d_op = &pipefs_dentry_operations; 822 dentry->d_op = &pipefs_dentry_operations;
761 d_add(dentry, inode); 823 d_add(dentry, inode);
762 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt)); 824 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
@@ -780,6 +842,7 @@ int do_pipe(int *fd)
780 fd_install(j, f2); 842 fd_install(j, f2);
781 fd[0] = i; 843 fd[0] = i;
782 fd[1] = j; 844 fd[1] = j;
845
783 return 0; 846 return 0;
784 847
785close_f12_inode_i_j: 848close_f12_inode_i_j:
@@ -804,8 +867,9 @@ no_files:
804 * d_name - pipe: will go nicely and kill the special-casing in procfs. 867 * d_name - pipe: will go nicely and kill the special-casing in procfs.
805 */ 868 */
806 869
807static struct super_block *pipefs_get_sb(struct file_system_type *fs_type, 870static struct super_block *
808 int flags, const char *dev_name, void *data) 871pipefs_get_sb(struct file_system_type *fs_type, int flags,
872 const char *dev_name, void *data)
809{ 873{
810 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC); 874 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
811} 875}
@@ -819,6 +883,7 @@ static struct file_system_type pipe_fs_type = {
819static int __init init_pipe_fs(void) 883static int __init init_pipe_fs(void)
820{ 884{
821 int err = register_filesystem(&pipe_fs_type); 885 int err = register_filesystem(&pipe_fs_type);
886
822 if (!err) { 887 if (!err) {
823 pipe_mnt = kern_mount(&pipe_fs_type); 888 pipe_mnt = kern_mount(&pipe_fs_type);
824 if (IS_ERR(pipe_mnt)) { 889 if (IS_ERR(pipe_mnt)) {
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8f1f49ceebec..a3a3eecef689 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -534,12 +534,15 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
534 534
535/* If the process being read is separated by chroot from the reading process, 535/* If the process being read is separated by chroot from the reading process,
536 * don't let the reader access the threads. 536 * don't let the reader access the threads.
537 *
538 * note: this does dput(root) and mntput(vfsmnt) on exit.
537 */ 539 */
538static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt) 540static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt)
539{ 541{
540 struct dentry *de, *base; 542 struct dentry *de, *base;
541 struct vfsmount *our_vfsmnt, *mnt; 543 struct vfsmount *our_vfsmnt, *mnt;
542 int res = 0; 544 int res = 0;
545
543 read_lock(&current->fs->lock); 546 read_lock(&current->fs->lock);
544 our_vfsmnt = mntget(current->fs->rootmnt); 547 our_vfsmnt = mntget(current->fs->rootmnt);
545 base = dget(current->fs->root); 548 base = dget(current->fs->root);
@@ -549,11 +552,11 @@ static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt)
549 de = root; 552 de = root;
550 mnt = vfsmnt; 553 mnt = vfsmnt;
551 554
552 while (vfsmnt != our_vfsmnt) { 555 while (mnt != our_vfsmnt) {
553 if (vfsmnt == vfsmnt->mnt_parent) 556 if (mnt == mnt->mnt_parent)
554 goto out; 557 goto out;
555 de = vfsmnt->mnt_mountpoint; 558 de = mnt->mnt_mountpoint;
556 vfsmnt = vfsmnt->mnt_parent; 559 mnt = mnt->mnt_parent;
557 } 560 }
558 561
559 if (!is_subdir(de, base)) 562 if (!is_subdir(de, base))
@@ -564,7 +567,7 @@ exit:
564 dput(base); 567 dput(base);
565 mntput(our_vfsmnt); 568 mntput(our_vfsmnt);
566 dput(root); 569 dput(root);
567 mntput(mnt); 570 mntput(vfsmnt);
568 return res; 571 return res;
569out: 572out:
570 spin_unlock(&vfsmount_lock); 573 spin_unlock(&vfsmount_lock);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index ef5a3323f4b5..5c10ea157425 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -249,144 +249,60 @@ static int cpuinfo_open(struct inode *inode, struct file *file)
249 return seq_open(file, &cpuinfo_op); 249 return seq_open(file, &cpuinfo_op);
250} 250}
251 251
252enum devinfo_states { 252static struct file_operations proc_cpuinfo_operations = {
253 CHR_HDR, 253 .open = cpuinfo_open,
254 CHR_LIST, 254 .read = seq_read,
255 BLK_HDR, 255 .llseek = seq_lseek,
256 BLK_LIST, 256 .release = seq_release,
257 DEVINFO_DONE
258};
259
260struct devinfo_state {
261 void *chrdev;
262 void *blkdev;
263 unsigned int num_records;
264 unsigned int cur_record;
265 enum devinfo_states state;
266}; 257};
267 258
268static void *devinfo_start(struct seq_file *f, loff_t *pos) 259static int devinfo_show(struct seq_file *f, void *v)
269{ 260{
270 struct devinfo_state *info = f->private; 261 int i = *(loff_t *) v;
271 262
272 if (*pos) { 263 if (i < CHRDEV_MAJOR_HASH_SIZE) {
273 if ((info) && (*pos <= info->num_records)) 264 if (i == 0)
274 return info; 265 seq_printf(f, "Character devices:\n");
275 return NULL; 266 chrdev_show(f, i);
267 } else {
268 i -= CHRDEV_MAJOR_HASH_SIZE;
269 if (i == 0)
270 seq_printf(f, "\nBlock devices:\n");
271 blkdev_show(f, i);
276 } 272 }
277 info = kmalloc(sizeof(*info), GFP_KERNEL); 273 return 0;
278 f->private = info;
279 info->chrdev = acquire_chrdev_list();
280 info->blkdev = acquire_blkdev_list();
281 info->state = CHR_HDR;
282 info->num_records = count_chrdev_list();
283 info->num_records += count_blkdev_list();
284 info->num_records += 2; /* Character and Block headers */
285 *pos = 1;
286 info->cur_record = *pos;
287 return info;
288} 274}
289 275
290static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos) 276static void *devinfo_start(struct seq_file *f, loff_t *pos)
291{ 277{
292 int idummy; 278 if (*pos < (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE))
293 char *ndummy; 279 return pos;
294 struct devinfo_state *info = f->private; 280 return NULL;
295
296 switch (info->state) {
297 case CHR_HDR:
298 info->state = CHR_LIST;
299 (*pos)++;
300 /*fallthrough*/
301 case CHR_LIST:
302 if (get_chrdev_info(info->chrdev,&idummy,&ndummy)) {
303 /*
304 * The character dev list is complete
305 */
306 info->state = BLK_HDR;
307 } else {
308 info->chrdev = get_next_chrdev(info->chrdev);
309 }
310 (*pos)++;
311 break;
312 case BLK_HDR:
313 info->state = BLK_LIST;
314 (*pos)++;
315 /*fallthrough*/
316 case BLK_LIST:
317 if (get_blkdev_info(info->blkdev,&idummy,&ndummy)) {
318 /*
319 * The block dev list is complete
320 */
321 info->state = DEVINFO_DONE;
322 } else {
323 info->blkdev = get_next_blkdev(info->blkdev);
324 }
325 (*pos)++;
326 break;
327 case DEVINFO_DONE:
328 (*pos)++;
329 info->cur_record = *pos;
330 info = NULL;
331 break;
332 default:
333 break;
334 }
335 if (info)
336 info->cur_record = *pos;
337 return info;
338} 281}
339 282
340static void devinfo_stop(struct seq_file *f, void *v) 283static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos)
341{ 284{
342 struct devinfo_state *info = f->private; 285 (*pos)++;
343 286 if (*pos >= (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE))
344 if (info) { 287 return NULL;
345 release_chrdev_list(info->chrdev); 288 return pos;
346 release_blkdev_list(info->blkdev);
347 f->private = NULL;
348 kfree(info);
349 }
350} 289}
351 290
352static int devinfo_show(struct seq_file *f, void *arg) 291static void devinfo_stop(struct seq_file *f, void *v)
353{ 292{
354 int major; 293 /* Nothing to do */
355 char *name;
356 struct devinfo_state *info = f->private;
357
358 switch(info->state) {
359 case CHR_HDR:
360 seq_printf(f,"Character devices:\n");
361 /* fallthrough */
362 case CHR_LIST:
363 if (!get_chrdev_info(info->chrdev,&major,&name))
364 seq_printf(f,"%3d %s\n",major,name);
365 break;
366 case BLK_HDR:
367 seq_printf(f,"\nBlock devices:\n");
368 /* fallthrough */
369 case BLK_LIST:
370 if (!get_blkdev_info(info->blkdev,&major,&name))
371 seq_printf(f,"%3d %s\n",major,name);
372 break;
373 default:
374 break;
375 }
376
377 return 0;
378} 294}
379 295
380static struct seq_operations devinfo_op = { 296static struct seq_operations devinfo_ops = {
381 .start = devinfo_start, 297 .start = devinfo_start,
382 .next = devinfo_next, 298 .next = devinfo_next,
383 .stop = devinfo_stop, 299 .stop = devinfo_stop,
384 .show = devinfo_show, 300 .show = devinfo_show
385}; 301};
386 302
387static int devinfo_open(struct inode *inode, struct file *file) 303static int devinfo_open(struct inode *inode, struct file *filp)
388{ 304{
389 return seq_open(file, &devinfo_op); 305 return seq_open(filp, &devinfo_ops);
390} 306}
391 307
392static struct file_operations proc_devinfo_operations = { 308static struct file_operations proc_devinfo_operations = {
@@ -396,13 +312,6 @@ static struct file_operations proc_devinfo_operations = {
396 .release = seq_release, 312 .release = seq_release,
397}; 313};
398 314
399static struct file_operations proc_cpuinfo_operations = {
400 .open = cpuinfo_open,
401 .read = seq_read,
402 .llseek = seq_lseek,
403 .release = seq_release,
404};
405
406extern struct seq_operations vmstat_op; 315extern struct seq_operations vmstat_op;
407static int vmstat_open(struct inode *inode, struct file *file) 316static int vmstat_open(struct inode *inode, struct file *file)
408{ 317{
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 7efa73d44c9a..20d4b2237fce 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -103,8 +103,8 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
103 size_t buflen, loff_t *fpos) 103 size_t buflen, loff_t *fpos)
104{ 104{
105 ssize_t acc = 0, tmp; 105 ssize_t acc = 0, tmp;
106 size_t tsz, nr_bytes; 106 size_t tsz;
107 u64 start; 107 u64 start, nr_bytes;
108 struct vmcore *curr_m = NULL; 108 struct vmcore *curr_m = NULL;
109 109
110 if (buflen == 0 || *fpos >= vmcore_size) 110 if (buflen == 0 || *fpos >= vmcore_size)
diff --git a/fs/read_write.c b/fs/read_write.c
index 6256ca81a718..5bc0e9234f9d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -202,7 +202,7 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
202 goto Einval; 202 goto Einval;
203 203
204 inode = file->f_dentry->d_inode; 204 inode = file->f_dentry->d_inode;
205 if (inode->i_flock && MANDATORY_LOCK(inode)) { 205 if (unlikely(inode->i_flock && MANDATORY_LOCK(inode))) {
206 int retval = locks_mandatory_area( 206 int retval = locks_mandatory_area(
207 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 207 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
208 inode, file, pos, count); 208 inode, file, pos, count);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 010094d14da6..cf6e1cf40351 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1576,6 +1576,8 @@ const struct file_operations reiserfs_file_operations = {
1576 .sendfile = generic_file_sendfile, 1576 .sendfile = generic_file_sendfile,
1577 .aio_read = generic_file_aio_read, 1577 .aio_read = generic_file_aio_read,
1578 .aio_write = reiserfs_aio_write, 1578 .aio_write = reiserfs_aio_write,
1579 .splice_read = generic_file_splice_read,
1580 .splice_write = generic_file_splice_write,
1579}; 1581};
1580 1582
1581struct inode_operations reiserfs_file_inode_operations = { 1583struct inode_operations reiserfs_file_inode_operations = {
diff --git a/fs/select.c b/fs/select.c
index b3a3a1326af6..a8109baa5e46 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -310,11 +310,12 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
310 fd_set __user *exp, s64 *timeout) 310 fd_set __user *exp, s64 *timeout)
311{ 311{
312 fd_set_bits fds; 312 fd_set_bits fds;
313 char *bits; 313 void *bits;
314 int ret, size, max_fdset; 314 int ret, max_fdset;
315 unsigned int size;
315 struct fdtable *fdt; 316 struct fdtable *fdt;
316 /* Allocate small arguments on the stack to save memory and be faster */ 317 /* Allocate small arguments on the stack to save memory and be faster */
317 char stack_fds[SELECT_STACK_ALLOC]; 318 long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
318 319
319 ret = -EINVAL; 320 ret = -EINVAL;
320 if (n < 0) 321 if (n < 0)
@@ -333,20 +334,21 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
333 * since we used fdset we need to allocate memory in units of 334 * since we used fdset we need to allocate memory in units of
334 * long-words. 335 * long-words.
335 */ 336 */
336 ret = -ENOMEM;
337 size = FDS_BYTES(n); 337 size = FDS_BYTES(n);
338 if (6*size < SELECT_STACK_ALLOC) 338 bits = stack_fds;
339 bits = stack_fds; 339 if (size > sizeof(stack_fds) / 6) {
340 else 340 /* Not enough space in on-stack array; must use kmalloc */
341 ret = -ENOMEM;
341 bits = kmalloc(6 * size, GFP_KERNEL); 342 bits = kmalloc(6 * size, GFP_KERNEL);
342 if (!bits) 343 if (!bits)
343 goto out_nofds; 344 goto out_nofds;
344 fds.in = (unsigned long *) bits; 345 }
345 fds.out = (unsigned long *) (bits + size); 346 fds.in = bits;
346 fds.ex = (unsigned long *) (bits + 2*size); 347 fds.out = bits + size;
347 fds.res_in = (unsigned long *) (bits + 3*size); 348 fds.ex = bits + 2*size;
348 fds.res_out = (unsigned long *) (bits + 4*size); 349 fds.res_in = bits + 3*size;
349 fds.res_ex = (unsigned long *) (bits + 5*size); 350 fds.res_out = bits + 4*size;
351 fds.res_ex = bits + 5*size;
350 352
351 if ((ret = get_fd_set(n, inp, fds.in)) || 353 if ((ret = get_fd_set(n, inp, fds.in)) ||
352 (ret = get_fd_set(n, outp, fds.out)) || 354 (ret = get_fd_set(n, outp, fds.out)) ||
@@ -639,8 +641,10 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
639 struct poll_list *walk; 641 struct poll_list *walk;
640 struct fdtable *fdt; 642 struct fdtable *fdt;
641 int max_fdset; 643 int max_fdset;
642 /* Allocate small arguments on the stack to save memory and be faster */ 644 /* Allocate small arguments on the stack to save memory and be
643 char stack_pps[POLL_STACK_ALLOC]; 645 faster - use long to make sure the buffer is aligned properly
646 on 64 bit archs to avoid unaligned access */
647 long stack_pps[POLL_STACK_ALLOC/sizeof(long)];
644 struct poll_list *stack_pp = NULL; 648 struct poll_list *stack_pp = NULL;
645 649
646 /* Do a sanity check on nfds ... */ 650 /* Do a sanity check on nfds ... */
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index c56bd99a9701..ed9a24d19d7d 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -178,11 +178,9 @@ smb_writepage(struct page *page, struct writeback_control *wbc)
178 unsigned offset = PAGE_CACHE_SIZE; 178 unsigned offset = PAGE_CACHE_SIZE;
179 int err; 179 int err;
180 180
181 if (!mapping) 181 BUG_ON(!mapping);
182 BUG();
183 inode = mapping->host; 182 inode = mapping->host;
184 if (!inode) 183 BUG_ON(!inode);
185 BUG();
186 184
187 end_index = inode->i_size >> PAGE_CACHE_SHIFT; 185 end_index = inode->i_size >> PAGE_CACHE_SHIFT;
188 186
diff --git a/fs/splice.c b/fs/splice.c
new file mode 100644
index 000000000000..e50a460239dd
--- /dev/null
+++ b/fs/splice.c
@@ -0,0 +1,963 @@
1/*
2 * "splice": joining two ropes together by interweaving their strands.
3 *
4 * This is the "extended pipe" functionality, where a pipe is used as
5 * an arbitrary in-memory buffer. Think of a pipe as a small kernel
6 * buffer that you can use to transfer data from one end to the other.
7 *
8 * The traditional unix read/write is extended with a "splice()" operation
9 * that transfers data buffers to or from a pipe buffer.
10 *
11 * Named by Larry McVoy, original implementation from Linus, extended by
12 * Jens to support splicing to files, network, direct splicing, etc and
13 * fixing lots of bugs.
14 *
15 * Copyright (C) 2005-2006 Jens Axboe <axboe@suse.de>
16 * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org>
17 * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu>
18 *
19 */
20#include <linux/fs.h>
21#include <linux/file.h>
22#include <linux/pagemap.h>
23#include <linux/pipe_fs_i.h>
24#include <linux/mm_inline.h>
25#include <linux/swap.h>
26#include <linux/writeback.h>
27#include <linux/buffer_head.h>
28#include <linux/module.h>
29#include <linux/syscalls.h>
30
31/*
32 * Passed to the actors
33 */
34struct splice_desc {
35 unsigned int len, total_len; /* current and remaining length */
36 unsigned int flags; /* splice flags */
37 struct file *file; /* file to read/write */
38 loff_t pos; /* file position */
39};
40
41/*
42 * Attempt to steal a page from a pipe buffer. This should perhaps go into
43 * a vm helper function, it's already simplified quite a bit by the
44 * addition of remove_mapping(). If success is returned, the caller may
45 * attempt to reuse this page for another destination.
46 */
47static int page_cache_pipe_buf_steal(struct pipe_inode_info *info,
48 struct pipe_buffer *buf)
49{
50 struct page *page = buf->page;
51 struct address_space *mapping = page_mapping(page);
52
53 WARN_ON(!PageLocked(page));
54 WARN_ON(!PageUptodate(page));
55
56 /*
57 * At least for ext2 with nobh option, we need to wait on writeback
58 * completing on this page, since we'll remove it from the pagecache.
59 * Otherwise truncate wont wait on the page, allowing the disk
60 * blocks to be reused by someone else before we actually wrote our
61 * data to them. fs corruption ensues.
62 */
63 wait_on_page_writeback(page);
64
65 if (PagePrivate(page))
66 try_to_release_page(page, mapping_gfp_mask(mapping));
67
68 if (!remove_mapping(mapping, page))
69 return 1;
70
71 buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU;
72 return 0;
73}
74
75static void page_cache_pipe_buf_release(struct pipe_inode_info *info,
76 struct pipe_buffer *buf)
77{
78 page_cache_release(buf->page);
79 buf->page = NULL;
80 buf->flags &= ~(PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU);
81}
82
83static void *page_cache_pipe_buf_map(struct file *file,
84 struct pipe_inode_info *info,
85 struct pipe_buffer *buf)
86{
87 struct page *page = buf->page;
88 int err;
89
90 if (!PageUptodate(page)) {
91 lock_page(page);
92
93 /*
94 * Page got truncated/unhashed. This will cause a 0-byte
95 * splice, if this is the first page.
96 */
97 if (!page->mapping) {
98 err = -ENODATA;
99 goto error;
100 }
101
102 /*
103 * Uh oh, read-error from disk.
104 */
105 if (!PageUptodate(page)) {
106 err = -EIO;
107 goto error;
108 }
109
110 /*
111 * Page is ok afterall, fall through to mapping.
112 */
113 unlock_page(page);
114 }
115
116 return kmap(page);
117error:
118 unlock_page(page);
119 return ERR_PTR(err);
120}
121
122static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
123 struct pipe_buffer *buf)
124{
125 kunmap(buf->page);
126}
127
128static struct pipe_buf_operations page_cache_pipe_buf_ops = {
129 .can_merge = 0,
130 .map = page_cache_pipe_buf_map,
131 .unmap = page_cache_pipe_buf_unmap,
132 .release = page_cache_pipe_buf_release,
133 .steal = page_cache_pipe_buf_steal,
134};
135
136/*
137 * Pipe output worker. This sets up our pipe format with the page cache
138 * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
139 */
140static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
141 int nr_pages, unsigned long offset,
142 unsigned long len, unsigned int flags)
143{
144 int ret, do_wakeup, i;
145
146 ret = 0;
147 do_wakeup = 0;
148 i = 0;
149
150 if (pipe->inode)
151 mutex_lock(&pipe->inode->i_mutex);
152
153 for (;;) {
154 if (!pipe->readers) {
155 send_sig(SIGPIPE, current, 0);
156 if (!ret)
157 ret = -EPIPE;
158 break;
159 }
160
161 if (pipe->nrbufs < PIPE_BUFFERS) {
162 int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1);
163 struct pipe_buffer *buf = pipe->bufs + newbuf;
164 struct page *page = pages[i++];
165 unsigned long this_len;
166
167 this_len = PAGE_CACHE_SIZE - offset;
168 if (this_len > len)
169 this_len = len;
170
171 buf->page = page;
172 buf->offset = offset;
173 buf->len = this_len;
174 buf->ops = &page_cache_pipe_buf_ops;
175 pipe->nrbufs++;
176 if (pipe->inode)
177 do_wakeup = 1;
178
179 ret += this_len;
180 len -= this_len;
181 offset = 0;
182 if (!--nr_pages)
183 break;
184 if (!len)
185 break;
186 if (pipe->nrbufs < PIPE_BUFFERS)
187 continue;
188
189 break;
190 }
191
192 if (flags & SPLICE_F_NONBLOCK) {
193 if (!ret)
194 ret = -EAGAIN;
195 break;
196 }
197
198 if (signal_pending(current)) {
199 if (!ret)
200 ret = -ERESTARTSYS;
201 break;
202 }
203
204 if (do_wakeup) {
205 smp_mb();
206 if (waitqueue_active(&pipe->wait))
207 wake_up_interruptible_sync(&pipe->wait);
208 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
209 do_wakeup = 0;
210 }
211
212 pipe->waiting_writers++;
213 pipe_wait(pipe);
214 pipe->waiting_writers--;
215 }
216
217 if (pipe->inode)
218 mutex_unlock(&pipe->inode->i_mutex);
219
220 if (do_wakeup) {
221 smp_mb();
222 if (waitqueue_active(&pipe->wait))
223 wake_up_interruptible(&pipe->wait);
224 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
225 }
226
227 while (i < nr_pages)
228 page_cache_release(pages[i++]);
229
230 return ret;
231}
232
233static int
234__generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe,
235 size_t len, unsigned int flags)
236{
237 struct address_space *mapping = in->f_mapping;
238 unsigned int offset, nr_pages;
239 struct page *pages[PIPE_BUFFERS];
240 struct page *page;
241 pgoff_t index;
242 int i, error;
243
244 index = in->f_pos >> PAGE_CACHE_SHIFT;
245 offset = in->f_pos & ~PAGE_CACHE_MASK;
246 nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
247
248 if (nr_pages > PIPE_BUFFERS)
249 nr_pages = PIPE_BUFFERS;
250
251 /*
252 * Initiate read-ahead on this page range. however, don't call into
253 * read-ahead if this is a non-zero offset (we are likely doing small
254 * chunk splice and the page is already there) for a single page.
255 */
256 if (!offset || nr_pages > 1)
257 do_page_cache_readahead(mapping, in, index, nr_pages);
258
259 /*
260 * Now fill in the holes:
261 */
262 error = 0;
263 for (i = 0; i < nr_pages; i++, index++) {
264find_page:
265 /*
266 * lookup the page for this index
267 */
268 page = find_get_page(mapping, index);
269 if (!page) {
270 /*
271 * If in nonblock mode then dont block on
272 * readpage (we've kicked readahead so there
273 * will be asynchronous progress):
274 */
275 if (flags & SPLICE_F_NONBLOCK)
276 break;
277
278 /*
279 * page didn't exist, allocate one
280 */
281 page = page_cache_alloc_cold(mapping);
282 if (!page)
283 break;
284
285 error = add_to_page_cache_lru(page, mapping, index,
286 mapping_gfp_mask(mapping));
287 if (unlikely(error)) {
288 page_cache_release(page);
289 break;
290 }
291
292 goto readpage;
293 }
294
295 /*
296 * If the page isn't uptodate, we may need to start io on it
297 */
298 if (!PageUptodate(page)) {
299 lock_page(page);
300
301 /*
302 * page was truncated, stop here. if this isn't the
303 * first page, we'll just complete what we already
304 * added
305 */
306 if (!page->mapping) {
307 unlock_page(page);
308 page_cache_release(page);
309 break;
310 }
311 /*
312 * page was already under io and is now done, great
313 */
314 if (PageUptodate(page)) {
315 unlock_page(page);
316 goto fill_it;
317 }
318
319readpage:
320 /*
321 * need to read in the page
322 */
323 error = mapping->a_ops->readpage(in, page);
324
325 if (unlikely(error)) {
326 page_cache_release(page);
327 if (error == AOP_TRUNCATED_PAGE)
328 goto find_page;
329 break;
330 }
331 }
332fill_it:
333 pages[i] = page;
334 }
335
336 if (i)
337 return move_to_pipe(pipe, pages, i, offset, len, flags);
338
339 return error;
340}
341
342/**
343 * generic_file_splice_read - splice data from file to a pipe
344 * @in: file to splice from
345 * @pipe: pipe to splice to
346 * @len: number of bytes to splice
347 * @flags: splice modifier flags
348 *
349 * Will read pages from given file and fill them into a pipe.
350 */
351ssize_t generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe,
352 size_t len, unsigned int flags)
353{
354 ssize_t spliced;
355 int ret;
356
357 ret = 0;
358 spliced = 0;
359
360 while (len) {
361 ret = __generic_file_splice_read(in, pipe, len, flags);
362
363 if (ret <= 0)
364 break;
365
366 in->f_pos += ret;
367 len -= ret;
368 spliced += ret;
369
370 if (!(flags & SPLICE_F_NONBLOCK))
371 continue;
372 ret = -EAGAIN;
373 break;
374 }
375
376 if (spliced)
377 return spliced;
378
379 return ret;
380}
381
382EXPORT_SYMBOL(generic_file_splice_read);
383
384/*
385 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
386 * using sendpage().
387 */
388static int pipe_to_sendpage(struct pipe_inode_info *info,
389 struct pipe_buffer *buf, struct splice_desc *sd)
390{
391 struct file *file = sd->file;
392 loff_t pos = sd->pos;
393 unsigned int offset;
394 ssize_t ret;
395 void *ptr;
396 int more;
397
398 /*
399 * Sub-optimal, but we are limited by the pipe ->map. We don't
400 * need a kmap'ed buffer here, we just want to make sure we
401 * have the page pinned if the pipe page originates from the
402 * page cache.
403 */
404 ptr = buf->ops->map(file, info, buf);
405 if (IS_ERR(ptr))
406 return PTR_ERR(ptr);
407
408 offset = pos & ~PAGE_CACHE_MASK;
409 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
410
411 ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,more);
412
413 buf->ops->unmap(info, buf);
414 if (ret == sd->len)
415 return 0;
416
417 return -EIO;
418}
419
420/*
421 * This is a little more tricky than the file -> pipe splicing. There are
422 * basically three cases:
423 *
424 * - Destination page already exists in the address space and there
425 * are users of it. For that case we have no other option that
426 * copying the data. Tough luck.
427 * - Destination page already exists in the address space, but there
428 * are no users of it. Make sure it's uptodate, then drop it. Fall
429 * through to last case.
430 * - Destination page does not exist, we can add the pipe page to
431 * the page cache and avoid the copy.
432 *
433 * If asked to move pages to the output file (SPLICE_F_MOVE is set in
434 * sd->flags), we attempt to migrate pages from the pipe to the output
435 * file address space page cache. This is possible if no one else has
436 * the pipe page referenced outside of the pipe and page cache. If
437 * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
438 * a new page in the output file page cache and fill/dirty that.
439 */
440static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
441 struct splice_desc *sd)
442{
443 struct file *file = sd->file;
444 struct address_space *mapping = file->f_mapping;
445 gfp_t gfp_mask = mapping_gfp_mask(mapping);
446 unsigned int offset;
447 struct page *page;
448 pgoff_t index;
449 char *src;
450 int ret;
451
452 /*
453 * make sure the data in this buffer is uptodate
454 */
455 src = buf->ops->map(file, info, buf);
456 if (IS_ERR(src))
457 return PTR_ERR(src);
458
459 index = sd->pos >> PAGE_CACHE_SHIFT;
460 offset = sd->pos & ~PAGE_CACHE_MASK;
461
462 /*
463 * Reuse buf page, if SPLICE_F_MOVE is set.
464 */
465 if (sd->flags & SPLICE_F_MOVE) {
466 /*
467 * If steal succeeds, buf->page is now pruned from the vm
468 * side (LRU and page cache) and we can reuse it.
469 */
470 if (buf->ops->steal(info, buf))
471 goto find_page;
472
473 /*
474 * this will also set the page locked
475 */
476 page = buf->page;
477 if (add_to_page_cache(page, mapping, index, gfp_mask))
478 goto find_page;
479
480 if (!(buf->flags & PIPE_BUF_FLAG_LRU))
481 lru_cache_add(page);
482 } else {
483find_page:
484 ret = -ENOMEM;
485 page = find_or_create_page(mapping, index, gfp_mask);
486 if (!page)
487 goto out_nomem;
488
489 /*
490 * If the page is uptodate, it is also locked. If it isn't
491 * uptodate, we can mark it uptodate if we are filling the
492 * full page. Otherwise we need to read it in first...
493 */
494 if (!PageUptodate(page)) {
495 if (sd->len < PAGE_CACHE_SIZE) {
496 ret = mapping->a_ops->readpage(file, page);
497 if (unlikely(ret))
498 goto out;
499
500 lock_page(page);
501
502 if (!PageUptodate(page)) {
503 /*
504 * Page got invalidated, repeat.
505 */
506 if (!page->mapping) {
507 unlock_page(page);
508 page_cache_release(page);
509 goto find_page;
510 }
511 ret = -EIO;
512 goto out;
513 }
514 } else {
515 WARN_ON(!PageLocked(page));
516 SetPageUptodate(page);
517 }
518 }
519 }
520
521 ret = mapping->a_ops->prepare_write(file, page, 0, sd->len);
522 if (ret == AOP_TRUNCATED_PAGE) {
523 page_cache_release(page);
524 goto find_page;
525 } else if (ret)
526 goto out;
527
528 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) {
529 char *dst = kmap_atomic(page, KM_USER0);
530
531 memcpy(dst + offset, src + buf->offset, sd->len);
532 flush_dcache_page(page);
533 kunmap_atomic(dst, KM_USER0);
534 }
535
536 ret = mapping->a_ops->commit_write(file, page, 0, sd->len);
537 if (ret == AOP_TRUNCATED_PAGE) {
538 page_cache_release(page);
539 goto find_page;
540 } else if (ret)
541 goto out;
542
543 mark_page_accessed(page);
544 balance_dirty_pages_ratelimited(mapping);
545out:
546 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) {
547 page_cache_release(page);
548 unlock_page(page);
549 }
550out_nomem:
551 buf->ops->unmap(info, buf);
552 return ret;
553}
554
555typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
556 struct splice_desc *);
557
558/*
559 * Pipe input worker. Most of this logic works like a regular pipe, the
560 * key here is the 'actor' worker passed in that actually moves the data
561 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
562 */
563static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
564 size_t len, unsigned int flags,
565 splice_actor *actor)
566{
567 int ret, do_wakeup, err;
568 struct splice_desc sd;
569
570 ret = 0;
571 do_wakeup = 0;
572
573 sd.total_len = len;
574 sd.flags = flags;
575 sd.file = out;
576 sd.pos = out->f_pos;
577
578 if (pipe->inode)
579 mutex_lock(&pipe->inode->i_mutex);
580
581 for (;;) {
582 if (pipe->nrbufs) {
583 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
584 struct pipe_buf_operations *ops = buf->ops;
585
586 sd.len = buf->len;
587 if (sd.len > sd.total_len)
588 sd.len = sd.total_len;
589
590 err = actor(pipe, buf, &sd);
591 if (err) {
592 if (!ret && err != -ENODATA)
593 ret = err;
594
595 break;
596 }
597
598 ret += sd.len;
599 buf->offset += sd.len;
600 buf->len -= sd.len;
601
602 if (!buf->len) {
603 buf->ops = NULL;
604 ops->release(pipe, buf);
605 pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
606 pipe->nrbufs--;
607 if (pipe->inode)
608 do_wakeup = 1;
609 }
610
611 sd.pos += sd.len;
612 sd.total_len -= sd.len;
613 if (!sd.total_len)
614 break;
615 }
616
617 if (pipe->nrbufs)
618 continue;
619 if (!pipe->writers)
620 break;
621 if (!pipe->waiting_writers) {
622 if (ret)
623 break;
624 }
625
626 if (flags & SPLICE_F_NONBLOCK) {
627 if (!ret)
628 ret = -EAGAIN;
629 break;
630 }
631
632 if (signal_pending(current)) {
633 if (!ret)
634 ret = -ERESTARTSYS;
635 break;
636 }
637
638 if (do_wakeup) {
639 smp_mb();
640 if (waitqueue_active(&pipe->wait))
641 wake_up_interruptible_sync(&pipe->wait);
642 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
643 do_wakeup = 0;
644 }
645
646 pipe_wait(pipe);
647 }
648
649 if (pipe->inode)
650 mutex_unlock(&pipe->inode->i_mutex);
651
652 if (do_wakeup) {
653 smp_mb();
654 if (waitqueue_active(&pipe->wait))
655 wake_up_interruptible(&pipe->wait);
656 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
657 }
658
659 out->f_pos = sd.pos;
660 return ret;
661
662}
663
664/**
665 * generic_file_splice_write - splice data from a pipe to a file
666 * @pipe: pipe info
667 * @out: file to write to
668 * @len: number of bytes to splice
669 * @flags: splice modifier flags
670 *
671 * Will either move or copy pages (determined by @flags options) from
672 * the given pipe inode to the given file.
673 *
674 */
675ssize_t
676generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
677 size_t len, unsigned int flags)
678{
679 struct address_space *mapping = out->f_mapping;
680 ssize_t ret;
681
682 ret = move_from_pipe(pipe, out, len, flags, pipe_to_file);
683
684 /*
685 * If file or inode is SYNC and we actually wrote some data, sync it.
686 */
687 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host))
688 && ret > 0) {
689 struct inode *inode = mapping->host;
690 int err;
691
692 mutex_lock(&inode->i_mutex);
693 err = generic_osync_inode(mapping->host, mapping,
694 OSYNC_METADATA|OSYNC_DATA);
695 mutex_unlock(&inode->i_mutex);
696
697 if (err)
698 ret = err;
699 }
700
701 return ret;
702}
703
704EXPORT_SYMBOL(generic_file_splice_write);
705
706/**
707 * generic_splice_sendpage - splice data from a pipe to a socket
708 * @inode: pipe inode
709 * @out: socket to write to
710 * @len: number of bytes to splice
711 * @flags: splice modifier flags
712 *
713 * Will send @len bytes from the pipe to a network socket. No data copying
714 * is involved.
715 *
716 */
717ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
718 size_t len, unsigned int flags)
719{
720 return move_from_pipe(pipe, out, len, flags, pipe_to_sendpage);
721}
722
723EXPORT_SYMBOL(generic_splice_sendpage);
724
725/*
726 * Attempt to initiate a splice from pipe to file.
727 */
728static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
729 size_t len, unsigned int flags)
730{
731 loff_t pos;
732 int ret;
733
734 if (unlikely(!out->f_op || !out->f_op->splice_write))
735 return -EINVAL;
736
737 if (unlikely(!(out->f_mode & FMODE_WRITE)))
738 return -EBADF;
739
740 pos = out->f_pos;
741
742 ret = rw_verify_area(WRITE, out, &pos, len);
743 if (unlikely(ret < 0))
744 return ret;
745
746 return out->f_op->splice_write(pipe, out, len, flags);
747}
748
749/*
750 * Attempt to initiate a splice from a file to a pipe.
751 */
752static long do_splice_to(struct file *in, struct pipe_inode_info *pipe,
753 size_t len, unsigned int flags)
754{
755 loff_t pos, isize, left;
756 int ret;
757
758 if (unlikely(!in->f_op || !in->f_op->splice_read))
759 return -EINVAL;
760
761 if (unlikely(!(in->f_mode & FMODE_READ)))
762 return -EBADF;
763
764 pos = in->f_pos;
765
766 ret = rw_verify_area(READ, in, &pos, len);
767 if (unlikely(ret < 0))
768 return ret;
769
770 isize = i_size_read(in->f_mapping->host);
771 if (unlikely(in->f_pos >= isize))
772 return 0;
773
774 left = isize - in->f_pos;
775 if (unlikely(left < len))
776 len = left;
777
778 return in->f_op->splice_read(in, pipe, len, flags);
779}
780
781long do_splice_direct(struct file *in, struct file *out, size_t len,
782 unsigned int flags)
783{
784 struct pipe_inode_info *pipe;
785 long ret, bytes;
786 umode_t i_mode;
787 int i;
788
789 /*
790 * We require the input being a regular file, as we don't want to
791 * randomly drop data for eg socket -> socket splicing. Use the
792 * piped splicing for that!
793 */
794 i_mode = in->f_dentry->d_inode->i_mode;
795 if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode)))
796 return -EINVAL;
797
798 /*
799 * neither in nor out is a pipe, setup an internal pipe attached to
800 * 'out' and transfer the wanted data from 'in' to 'out' through that
801 */
802 pipe = current->splice_pipe;
803 if (unlikely(!pipe)) {
804 pipe = alloc_pipe_info(NULL);
805 if (!pipe)
806 return -ENOMEM;
807
808 /*
809 * We don't have an immediate reader, but we'll read the stuff
810 * out of the pipe right after the move_to_pipe(). So set
811 * PIPE_READERS appropriately.
812 */
813 pipe->readers = 1;
814
815 current->splice_pipe = pipe;
816 }
817
818 /*
819 * Do the splice.
820 */
821 ret = 0;
822 bytes = 0;
823
824 while (len) {
825 size_t read_len, max_read_len;
826
827 /*
828 * Do at most PIPE_BUFFERS pages worth of transfer:
829 */
830 max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
831
832 ret = do_splice_to(in, pipe, max_read_len, flags);
833 if (unlikely(ret < 0))
834 goto out_release;
835
836 read_len = ret;
837
838 /*
839 * NOTE: nonblocking mode only applies to the input. We
840 * must not do the output in nonblocking mode as then we
841 * could get stuck data in the internal pipe:
842 */
843 ret = do_splice_from(pipe, out, read_len,
844 flags & ~SPLICE_F_NONBLOCK);
845 if (unlikely(ret < 0))
846 goto out_release;
847
848 bytes += ret;
849 len -= ret;
850
851 /*
852 * In nonblocking mode, if we got back a short read then
853 * that was due to either an IO error or due to the
854 * pagecache entry not being there. In the IO error case
855 * the _next_ splice attempt will produce a clean IO error
856 * return value (not a short read), so in both cases it's
857 * correct to break out of the loop here:
858 */
859 if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len))
860 break;
861 }
862
863 pipe->nrbufs = pipe->curbuf = 0;
864
865 return bytes;
866
867out_release:
868 /*
869 * If we did an incomplete transfer we must release
870 * the pipe buffers in question:
871 */
872 for (i = 0; i < PIPE_BUFFERS; i++) {
873 struct pipe_buffer *buf = pipe->bufs + i;
874
875 if (buf->ops) {
876 buf->ops->release(pipe, buf);
877 buf->ops = NULL;
878 }
879 }
880 pipe->nrbufs = pipe->curbuf = 0;
881
882 /*
883 * If we transferred some data, return the number of bytes:
884 */
885 if (bytes > 0)
886 return bytes;
887
888 return ret;
889}
890
891EXPORT_SYMBOL(do_splice_direct);
892
893/*
894 * Determine where to splice to/from.
895 */
896static long do_splice(struct file *in, loff_t __user *off_in,
897 struct file *out, loff_t __user *off_out,
898 size_t len, unsigned int flags)
899{
900 struct pipe_inode_info *pipe;
901
902 pipe = in->f_dentry->d_inode->i_pipe;
903 if (pipe) {
904 if (off_in)
905 return -ESPIPE;
906 if (off_out) {
907 if (out->f_op->llseek == no_llseek)
908 return -EINVAL;
909 if (copy_from_user(&out->f_pos, off_out,
910 sizeof(loff_t)))
911 return -EFAULT;
912 }
913
914 return do_splice_from(pipe, out, len, flags);
915 }
916
917 pipe = out->f_dentry->d_inode->i_pipe;
918 if (pipe) {
919 if (off_out)
920 return -ESPIPE;
921 if (off_in) {
922 if (in->f_op->llseek == no_llseek)
923 return -EINVAL;
924 if (copy_from_user(&in->f_pos, off_in, sizeof(loff_t)))
925 return -EFAULT;
926 }
927
928 return do_splice_to(in, pipe, len, flags);
929 }
930
931 return -EINVAL;
932}
933
934asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
935 int fd_out, loff_t __user *off_out,
936 size_t len, unsigned int flags)
937{
938 long error;
939 struct file *in, *out;
940 int fput_in, fput_out;
941
942 if (unlikely(!len))
943 return 0;
944
945 error = -EBADF;
946 in = fget_light(fd_in, &fput_in);
947 if (in) {
948 if (in->f_mode & FMODE_READ) {
949 out = fget_light(fd_out, &fput_out);
950 if (out) {
951 if (out->f_mode & FMODE_WRITE)
952 error = do_splice(in, off_in,
953 out, off_out,
954 len, flags);
955 fput_light(out, fput_out);
956 }
957 }
958
959 fput_light(in, fput_in);
960 }
961
962 return error;
963}
diff --git a/fs/sync.c b/fs/sync.c
new file mode 100644
index 000000000000..aab5ffe77e9f
--- /dev/null
+++ b/fs/sync.c
@@ -0,0 +1,164 @@
1/*
2 * High-level sync()-related operations
3 */
4
5#include <linux/kernel.h>
6#include <linux/file.h>
7#include <linux/fs.h>
8#include <linux/module.h>
9#include <linux/writeback.h>
10#include <linux/syscalls.h>
11#include <linux/linkage.h>
12#include <linux/pagemap.h>
13
14#define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
15 SYNC_FILE_RANGE_WAIT_AFTER)
16
17/*
18 * sys_sync_file_range() permits finely controlled syncing over a segment of
19 * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is
20 * zero then sys_sync_file_range() will operate from offset out to EOF.
21 *
22 * The flag bits are:
23 *
24 * SYNC_FILE_RANGE_WAIT_BEFORE: wait upon writeout of all pages in the range
25 * before performing the write.
26 *
27 * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the
28 * range which are not presently under writeback.
29 *
30 * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range
31 * after performing the write.
32 *
33 * Useful combinations of the flag bits are:
34 *
35 * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages
36 * in the range which were dirty on entry to sys_sync_file_range() are placed
37 * under writeout. This is a start-write-for-data-integrity operation.
38 *
39 * SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which
40 * are not presently under writeout. This is an asynchronous flush-to-disk
41 * operation. Not suitable for data integrity operations.
42 *
43 * SYNC_FILE_RANGE_WAIT_BEFORE (or SYNC_FILE_RANGE_WAIT_AFTER): wait for
44 * completion of writeout of all pages in the range. This will be used after an
45 * earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait
46 * for that operation to complete and to return the result.
47 *
48 * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER:
49 * a traditional sync() operation. This is a write-for-data-integrity operation
50 * which will ensure that all pages in the range which were dirty on entry to
51 * sys_sync_file_range() are committed to disk.
52 *
53 *
54 * SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any
55 * I/O errors or ENOSPC conditions and will return those to the caller, after
56 * clearing the EIO and ENOSPC flags in the address_space.
57 *
58 * It should be noted that none of these operations write out the file's
59 * metadata. So unless the application is strictly performing overwrites of
60 * already-instantiated disk blocks, there are no guarantees here that the data
61 * will be available after a crash.
62 */
63asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
64 unsigned int flags)
65{
66 int ret;
67 struct file *file;
68 loff_t endbyte; /* inclusive */
69 int fput_needed;
70 umode_t i_mode;
71
72 ret = -EINVAL;
73 if (flags & ~VALID_FLAGS)
74 goto out;
75
76 endbyte = offset + nbytes;
77
78 if ((s64)offset < 0)
79 goto out;
80 if ((s64)endbyte < 0)
81 goto out;
82 if (endbyte < offset)
83 goto out;
84
85 if (sizeof(pgoff_t) == 4) {
86 if (offset >= (0x100000000ULL << PAGE_CACHE_SHIFT)) {
87 /*
88 * The range starts outside a 32 bit machine's
89 * pagecache addressing capabilities. Let it "succeed"
90 */
91 ret = 0;
92 goto out;
93 }
94 if (endbyte >= (0x100000000ULL << PAGE_CACHE_SHIFT)) {
95 /*
96 * Out to EOF
97 */
98 nbytes = 0;
99 }
100 }
101
102 if (nbytes == 0)
103 endbyte = -1;
104 else
105 endbyte--; /* inclusive */
106
107 ret = -EBADF;
108 file = fget_light(fd, &fput_needed);
109 if (!file)
110 goto out;
111
112 i_mode = file->f_dentry->d_inode->i_mode;
113 ret = -ESPIPE;
114 if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) &&
115 !S_ISLNK(i_mode))
116 goto out_put;
117
118 ret = do_sync_file_range(file, offset, endbyte, flags);
119out_put:
120 fput_light(file, fput_needed);
121out:
122 return ret;
123}
124
125/*
126 * `endbyte' is inclusive
127 */
128int do_sync_file_range(struct file *file, loff_t offset, loff_t endbyte,
129 unsigned int flags)
130{
131 int ret;
132 struct address_space *mapping;
133
134 mapping = file->f_mapping;
135 if (!mapping) {
136 ret = -EINVAL;
137 goto out;
138 }
139
140 ret = 0;
141 if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
142 ret = wait_on_page_writeback_range(mapping,
143 offset >> PAGE_CACHE_SHIFT,
144 endbyte >> PAGE_CACHE_SHIFT);
145 if (ret < 0)
146 goto out;
147 }
148
149 if (flags & SYNC_FILE_RANGE_WRITE) {
150 ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
151 WB_SYNC_NONE);
152 if (ret < 0)
153 goto out;
154 }
155
156 if (flags & SYNC_FILE_RANGE_WAIT_AFTER) {
157 ret = wait_on_page_writeback_range(mapping,
158 offset >> PAGE_CACHE_SHIFT,
159 endbyte >> PAGE_CACHE_SHIFT);
160 }
161out:
162 return ret;
163}
164EXPORT_SYMBOL_GPL(do_sync_file_range);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index f26880a4785e..6cfdc9a87772 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -50,7 +50,7 @@ static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent * parent_sd,
50 return sd; 50 return sd;
51} 51}
52 52
53/** 53/*
54 * 54 *
55 * Return -EEXIST if there is already a sysfs element with the same name for 55 * Return -EEXIST if there is already a sysfs element with the same name for
56 * the same parent. 56 * the same parent.
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 830f76fa098c..f1cb1ddde511 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -183,7 +183,7 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t
183 return -ENOMEM; 183 return -ENOMEM;
184 184
185 if (count >= PAGE_SIZE) 185 if (count >= PAGE_SIZE)
186 count = PAGE_SIZE; 186 count = PAGE_SIZE - 1;
187 error = copy_from_user(buffer->page,buf,count); 187 error = copy_from_user(buffer->page,buf,count);
188 buffer->needs_read_fill = 1; 188 buffer->needs_read_fill = 1;
189 return error ? -EFAULT : count; 189 return error ? -EFAULT : count;
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 4c29ac41ac3e..f0b347bd12ca 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -175,8 +175,7 @@ const unsigned char * sysfs_get_name(struct sysfs_dirent *sd)
175 struct bin_attribute * bin_attr; 175 struct bin_attribute * bin_attr;
176 struct sysfs_symlink * sl; 176 struct sysfs_symlink * sl;
177 177
178 if (!sd || !sd->s_element) 178 BUG_ON(!sd || !sd->s_element);
179 BUG();
180 179
181 switch (sd->s_type) { 180 switch (sd->s_type) {
182 case SYSFS_DIR: 181 case SYSFS_DIR:
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 8c66e9270dd6..d7074341ee87 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -253,8 +253,7 @@ int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page)
253 253
254 lock_page(page); 254 lock_page(page);
255 err = mapping->a_ops->prepare_write(NULL, page, from, to); 255 err = mapping->a_ops->prepare_write(NULL, page, from, to);
256 if (err) 256 BUG_ON(err);
257 BUG();
258 de->inode = 0; 257 de->inode = 0;
259 err = dir_commit_chunk(page, from, to); 258 err = dir_commit_chunk(page, from, to);
260 dir_put_page(page); 259 dir_put_page(page);
@@ -353,8 +352,7 @@ void sysv_set_link(struct sysv_dir_entry *de, struct page *page,
353 352
354 lock_page(page); 353 lock_page(page);
355 err = page->mapping->a_ops->prepare_write(NULL, page, from, to); 354 err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
356 if (err) 355 BUG_ON(err);
357 BUG();
358 de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino); 356 de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
359 err = dir_commit_chunk(page, from, to); 357 err = dir_commit_chunk(page, from, to);
360 dir_put_page(page); 358 dir_put_page(page);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 81e0e8459af1..2983afd5e7fd 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -312,12 +312,10 @@ static int udf_get_block(struct inode *inode, sector_t block, struct buffer_head
312 err = 0; 312 err = 0;
313 313
314 bh = inode_getblk(inode, block, &err, &phys, &new); 314 bh = inode_getblk(inode, block, &err, &phys, &new);
315 if (bh) 315 BUG_ON(bh);
316 BUG();
317 if (err) 316 if (err)
318 goto abort; 317 goto abort;
319 if (!phys) 318 BUG_ON(!phys);
320 BUG();
321 319
322 if (new) 320 if (new)
323 set_buffer_new(bh_result); 321 set_buffer_new(bh_result);
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index ef46939c0c1a..a56cec3be5f0 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -185,24 +185,6 @@ static int vfat_valid_longname(const unsigned char *name, unsigned int len)
185 return -EINVAL; 185 return -EINVAL;
186 if (len >= 256) 186 if (len >= 256)
187 return -ENAMETOOLONG; 187 return -ENAMETOOLONG;
188
189 /* MS-DOS "device special files" */
190 if (len == 3 || (len > 3 && name[3] == '.')) { /* basename == 3 */
191 if (!strnicmp(name, "aux", 3) ||
192 !strnicmp(name, "con", 3) ||
193 !strnicmp(name, "nul", 3) ||
194 !strnicmp(name, "prn", 3))
195 return -EINVAL;
196 }
197 if (len == 4 || (len > 4 && name[4] == '.')) { /* basename == 4 */
198 /* "com1", "com2", ... */
199 if ('1' <= name[3] && name[3] <= '9') {
200 if (!strnicmp(name, "com", 3) ||
201 !strnicmp(name, "lpt", 3))
202 return -EINVAL;
203 }
204 }
205
206 return 0; 188 return 0;
207} 189}
208 190
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 6cbbd165c60d..4d191ef39b67 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -870,12 +870,14 @@ xfs_page_state_convert(
870 pgoff_t end_index, last_index, tlast; 870 pgoff_t end_index, last_index, tlast;
871 ssize_t size, len; 871 ssize_t size, len;
872 int flags, err, iomap_valid = 0, uptodate = 1; 872 int flags, err, iomap_valid = 0, uptodate = 1;
873 int page_dirty, count = 0, trylock_flag = 0; 873 int page_dirty, count = 0;
874 int trylock = 0;
874 int all_bh = unmapped; 875 int all_bh = unmapped;
875 876
876 /* wait for other IO threads? */ 877 if (startio) {
877 if (startio && (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)) 878 if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)
878 trylock_flag |= BMAPI_TRYLOCK; 879 trylock |= BMAPI_TRYLOCK;
880 }
879 881
880 /* Is this page beyond the end of the file? */ 882 /* Is this page beyond the end of the file? */
881 offset = i_size_read(inode); 883 offset = i_size_read(inode);
@@ -956,15 +958,13 @@ xfs_page_state_convert(
956 958
957 if (buffer_unwritten(bh)) { 959 if (buffer_unwritten(bh)) {
958 type = IOMAP_UNWRITTEN; 960 type = IOMAP_UNWRITTEN;
959 flags = BMAPI_WRITE|BMAPI_IGNSTATE; 961 flags = BMAPI_WRITE | BMAPI_IGNSTATE;
960 } else if (buffer_delay(bh)) { 962 } else if (buffer_delay(bh)) {
961 type = IOMAP_DELAY; 963 type = IOMAP_DELAY;
962 flags = BMAPI_ALLOCATE; 964 flags = BMAPI_ALLOCATE | trylock;
963 if (!startio)
964 flags |= trylock_flag;
965 } else { 965 } else {
966 type = IOMAP_NEW; 966 type = IOMAP_NEW;
967 flags = BMAPI_WRITE|BMAPI_MMAP; 967 flags = BMAPI_WRITE | BMAPI_MMAP;
968 } 968 }
969 969
970 if (!iomap_valid) { 970 if (!iomap_valid) {
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 9fb0312665ca..26fed0756f01 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -182,7 +182,7 @@ free_address(
182{ 182{
183 a_list_t *aentry; 183 a_list_t *aentry;
184 184
185 aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC & ~__GFP_HIGH); 185 aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT);
186 if (likely(aentry)) { 186 if (likely(aentry)) {
187 spin_lock(&as_lock); 187 spin_lock(&as_lock);
188 aentry->next = as_free_head; 188 aentry->next = as_free_head;
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 85997b1205f5..269721af02f3 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -69,7 +69,6 @@ __xfs_file_read(
69 return rval; 69 return rval;
70} 70}
71 71
72
73STATIC ssize_t 72STATIC ssize_t
74xfs_file_aio_read( 73xfs_file_aio_read(
75 struct kiocb *iocb, 74 struct kiocb *iocb,
@@ -90,7 +89,6 @@ xfs_file_aio_read_invis(
90 return __xfs_file_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos); 89 return __xfs_file_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
91} 90}
92 91
93
94STATIC inline ssize_t 92STATIC inline ssize_t
95__xfs_file_write( 93__xfs_file_write(
96 struct kiocb *iocb, 94 struct kiocb *iocb,
@@ -113,7 +111,6 @@ __xfs_file_write(
113 return rval; 111 return rval;
114} 112}
115 113
116
117STATIC ssize_t 114STATIC ssize_t
118xfs_file_aio_write( 115xfs_file_aio_write(
119 struct kiocb *iocb, 116 struct kiocb *iocb,
@@ -134,7 +131,6 @@ xfs_file_aio_write_invis(
134 return __xfs_file_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos); 131 return __xfs_file_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
135} 132}
136 133
137
138STATIC inline ssize_t 134STATIC inline ssize_t
139__xfs_file_readv( 135__xfs_file_readv(
140 struct file *file, 136 struct file *file,
@@ -179,7 +175,6 @@ xfs_file_readv_invis(
179 return __xfs_file_readv(file, iov, IO_INVIS, nr_segs, ppos); 175 return __xfs_file_readv(file, iov, IO_INVIS, nr_segs, ppos);
180} 176}
181 177
182
183STATIC inline ssize_t 178STATIC inline ssize_t
184__xfs_file_writev( 179__xfs_file_writev(
185 struct file *file, 180 struct file *file,
@@ -204,7 +199,6 @@ __xfs_file_writev(
204 return rval; 199 return rval;
205} 200}
206 201
207
208STATIC ssize_t 202STATIC ssize_t
209xfs_file_writev( 203xfs_file_writev(
210 struct file *file, 204 struct file *file,
@@ -228,7 +222,7 @@ xfs_file_writev_invis(
228STATIC ssize_t 222STATIC ssize_t
229xfs_file_sendfile( 223xfs_file_sendfile(
230 struct file *filp, 224 struct file *filp,
231 loff_t *ppos, 225 loff_t *pos,
232 size_t count, 226 size_t count,
233 read_actor_t actor, 227 read_actor_t actor,
234 void *target) 228 void *target)
@@ -236,10 +230,80 @@ xfs_file_sendfile(
236 vnode_t *vp = vn_from_inode(filp->f_dentry->d_inode); 230 vnode_t *vp = vn_from_inode(filp->f_dentry->d_inode);
237 ssize_t rval; 231 ssize_t rval;
238 232
239 VOP_SENDFILE(vp, filp, ppos, 0, count, actor, target, NULL, rval); 233 VOP_SENDFILE(vp, filp, pos, 0, count, actor, target, NULL, rval);
240 return rval; 234 return rval;
241} 235}
242 236
237STATIC ssize_t
238xfs_file_sendfile_invis(
239 struct file *filp,
240 loff_t *pos,
241 size_t count,
242 read_actor_t actor,
243 void *target)
244{
245 vnode_t *vp = vn_from_inode(filp->f_dentry->d_inode);
246 ssize_t rval;
247
248 VOP_SENDFILE(vp, filp, pos, IO_INVIS, count, actor, target, NULL, rval);
249 return rval;
250}
251
252STATIC ssize_t
253xfs_file_splice_read(
254 struct file *infilp,
255 struct pipe_inode_info *pipe,
256 size_t len,
257 unsigned int flags)
258{
259 vnode_t *vp = vn_from_inode(infilp->f_dentry->d_inode);
260 ssize_t rval;
261
262 VOP_SPLICE_READ(vp, infilp, pipe, len, flags, 0, NULL, rval);
263 return rval;
264}
265
266STATIC ssize_t
267xfs_file_splice_read_invis(
268 struct file *infilp,
269 struct pipe_inode_info *pipe,
270 size_t len,
271 unsigned int flags)
272{
273 vnode_t *vp = vn_from_inode(infilp->f_dentry->d_inode);
274 ssize_t rval;
275
276 VOP_SPLICE_READ(vp, infilp, pipe, len, flags, IO_INVIS, NULL, rval);
277 return rval;
278}
279
280STATIC ssize_t
281xfs_file_splice_write(
282 struct pipe_inode_info *pipe,
283 struct file *outfilp,
284 size_t len,
285 unsigned int flags)
286{
287 vnode_t *vp = vn_from_inode(outfilp->f_dentry->d_inode);
288 ssize_t rval;
289
290 VOP_SPLICE_WRITE(vp, pipe, outfilp, len, flags, 0, NULL, rval);
291 return rval;
292}
293
294STATIC ssize_t
295xfs_file_splice_write_invis(
296 struct pipe_inode_info *pipe,
297 struct file *outfilp,
298 size_t len,
299 unsigned int flags)
300{
301 vnode_t *vp = vn_from_inode(outfilp->f_dentry->d_inode);
302 ssize_t rval;
303
304 VOP_SPLICE_WRITE(vp, pipe, outfilp, len, flags, IO_INVIS, NULL, rval);
305 return rval;
306}
243 307
244STATIC int 308STATIC int
245xfs_file_open( 309xfs_file_open(
@@ -251,13 +315,10 @@ xfs_file_open(
251 315
252 if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) 316 if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
253 return -EFBIG; 317 return -EFBIG;
254
255 ASSERT(vp);
256 VOP_OPEN(vp, NULL, error); 318 VOP_OPEN(vp, NULL, error);
257 return -error; 319 return -error;
258} 320}
259 321
260
261STATIC int 322STATIC int
262xfs_file_release( 323xfs_file_release(
263 struct inode *inode, 324 struct inode *inode,
@@ -271,7 +332,6 @@ xfs_file_release(
271 return -error; 332 return -error;
272} 333}
273 334
274
275STATIC int 335STATIC int
276xfs_file_fsync( 336xfs_file_fsync(
277 struct file *filp, 337 struct file *filp,
@@ -285,21 +345,11 @@ xfs_file_fsync(
285 345
286 if (datasync) 346 if (datasync)
287 flags |= FSYNC_DATA; 347 flags |= FSYNC_DATA;
288
289 ASSERT(vp);
290 VOP_FSYNC(vp, flags, NULL, (xfs_off_t)0, (xfs_off_t)-1, error); 348 VOP_FSYNC(vp, flags, NULL, (xfs_off_t)0, (xfs_off_t)-1, error);
291 return -error; 349 return -error;
292} 350}
293 351
294/*
295 * xfs_file_readdir maps to VOP_READDIR().
296 * We need to build a uio, cred, ...
297 */
298
299#define nextdp(dp) ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen))
300
301#ifdef CONFIG_XFS_DMAPI 352#ifdef CONFIG_XFS_DMAPI
302
303STATIC struct page * 353STATIC struct page *
304xfs_vm_nopage( 354xfs_vm_nopage(
305 struct vm_area_struct *area, 355 struct vm_area_struct *area,
@@ -319,10 +369,8 @@ xfs_vm_nopage(
319 369
320 return filemap_nopage(area, address, type); 370 return filemap_nopage(area, address, type);
321} 371}
322
323#endif /* CONFIG_XFS_DMAPI */ 372#endif /* CONFIG_XFS_DMAPI */
324 373
325
326STATIC int 374STATIC int
327xfs_file_readdir( 375xfs_file_readdir(
328 struct file *filp, 376 struct file *filp,
@@ -330,7 +378,7 @@ xfs_file_readdir(
330 filldir_t filldir) 378 filldir_t filldir)
331{ 379{
332 int error = 0; 380 int error = 0;
333 vnode_t *vp; 381 vnode_t *vp = vn_from_inode(filp->f_dentry->d_inode);
334 uio_t uio; 382 uio_t uio;
335 iovec_t iov; 383 iovec_t iov;
336 int eof = 0; 384 int eof = 0;
@@ -340,9 +388,6 @@ xfs_file_readdir(
340 xfs_off_t start_offset, curr_offset; 388 xfs_off_t start_offset, curr_offset;
341 xfs_dirent_t *dbp = NULL; 389 xfs_dirent_t *dbp = NULL;
342 390
343 vp = vn_from_inode(filp->f_dentry->d_inode);
344 ASSERT(vp);
345
346 /* Try fairly hard to get memory */ 391 /* Try fairly hard to get memory */
347 do { 392 do {
348 if ((read_buf = (caddr_t)kmalloc(rlen, GFP_KERNEL))) 393 if ((read_buf = (caddr_t)kmalloc(rlen, GFP_KERNEL)))
@@ -387,7 +432,7 @@ xfs_file_readdir(
387 } 432 }
388 size -= dbp->d_reclen; 433 size -= dbp->d_reclen;
389 curr_offset = (loff_t)dbp->d_off /* & 0x7fffffff */; 434 curr_offset = (loff_t)dbp->d_off /* & 0x7fffffff */;
390 dbp = nextdp(dbp); 435 dbp = (xfs_dirent_t *)((char *)dbp + dbp->d_reclen);
391 } 436 }
392 } 437 }
393done: 438done:
@@ -402,7 +447,6 @@ done:
402 return -error; 447 return -error;
403} 448}
404 449
405
406STATIC int 450STATIC int
407xfs_file_mmap( 451xfs_file_mmap(
408 struct file *filp, 452 struct file *filp,
@@ -457,11 +501,10 @@ xfs_file_ioctl_invis(
457 unsigned int cmd, 501 unsigned int cmd,
458 unsigned long arg) 502 unsigned long arg)
459{ 503{
460 int error;
461 struct inode *inode = filp->f_dentry->d_inode; 504 struct inode *inode = filp->f_dentry->d_inode;
462 vnode_t *vp = vn_from_inode(inode); 505 vnode_t *vp = vn_from_inode(inode);
506 int error;
463 507
464 ASSERT(vp);
465 VOP_IOCTL(vp, inode, filp, IO_INVIS, cmd, (void __user *)arg, error); 508 VOP_IOCTL(vp, inode, filp, IO_INVIS, cmd, (void __user *)arg, error);
466 VMODIFY(vp); 509 VMODIFY(vp);
467 510
@@ -537,6 +580,8 @@ const struct file_operations xfs_file_operations = {
537 .aio_read = xfs_file_aio_read, 580 .aio_read = xfs_file_aio_read,
538 .aio_write = xfs_file_aio_write, 581 .aio_write = xfs_file_aio_write,
539 .sendfile = xfs_file_sendfile, 582 .sendfile = xfs_file_sendfile,
583 .splice_read = xfs_file_splice_read,
584 .splice_write = xfs_file_splice_write,
540 .unlocked_ioctl = xfs_file_ioctl, 585 .unlocked_ioctl = xfs_file_ioctl,
541#ifdef CONFIG_COMPAT 586#ifdef CONFIG_COMPAT
542 .compat_ioctl = xfs_file_compat_ioctl, 587 .compat_ioctl = xfs_file_compat_ioctl,
@@ -558,7 +603,9 @@ const struct file_operations xfs_invis_file_operations = {
558 .writev = xfs_file_writev_invis, 603 .writev = xfs_file_writev_invis,
559 .aio_read = xfs_file_aio_read_invis, 604 .aio_read = xfs_file_aio_read_invis,
560 .aio_write = xfs_file_aio_write_invis, 605 .aio_write = xfs_file_aio_write_invis,
561 .sendfile = xfs_file_sendfile, 606 .sendfile = xfs_file_sendfile_invis,
607 .splice_read = xfs_file_splice_read_invis,
608 .splice_write = xfs_file_splice_write_invis,
562 .unlocked_ioctl = xfs_file_ioctl_invis, 609 .unlocked_ioctl = xfs_file_ioctl_invis,
563#ifdef CONFIG_COMPAT 610#ifdef CONFIG_COMPAT
564 .compat_ioctl = xfs_file_compat_invis_ioctl, 611 .compat_ioctl = xfs_file_compat_invis_ioctl,
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 149237304fb6..2e2e275c786f 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -673,8 +673,7 @@ xfs_vn_setattr(
673 if (ia_valid & ATTR_ATIME) { 673 if (ia_valid & ATTR_ATIME) {
674 vattr.va_mask |= XFS_AT_ATIME; 674 vattr.va_mask |= XFS_AT_ATIME;
675 vattr.va_atime = attr->ia_atime; 675 vattr.va_atime = attr->ia_atime;
676 if (ia_valid & ATTR_ATIME_SET) 676 inode->i_atime = attr->ia_atime;
677 inode->i_atime = attr->ia_atime;
678 } 677 }
679 if (ia_valid & ATTR_MTIME) { 678 if (ia_valid & ATTR_MTIME) {
680 vattr.va_mask |= XFS_AT_MTIME; 679 vattr.va_mask |= XFS_AT_MTIME;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 1fe09f2d6519..e9fe43d74768 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -103,6 +103,7 @@
103 */ 103 */
104#undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */ 104#undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */
105#define HAVE_SENDFILE /* sendfile(2) exists in 2.6, but not in 2.4 */ 105#define HAVE_SENDFILE /* sendfile(2) exists in 2.6, but not in 2.4 */
106#define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */
106#ifdef CONFIG_SMP 107#ifdef CONFIG_SMP
107#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ 108#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
108#else 109#else
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 84ddf1893894..74a52937f208 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -301,36 +301,23 @@ xfs_sendfile(
301 void *target, 301 void *target,
302 cred_t *credp) 302 cred_t *credp)
303{ 303{
304 xfs_inode_t *ip = XFS_BHVTOI(bdp);
305 xfs_mount_t *mp = ip->i_mount;
304 ssize_t ret; 306 ssize_t ret;
305 xfs_fsize_t n;
306 xfs_inode_t *ip;
307 xfs_mount_t *mp;
308 vnode_t *vp;
309
310 ip = XFS_BHVTOI(bdp);
311 vp = BHV_TO_VNODE(bdp);
312 mp = ip->i_mount;
313 307
314 XFS_STATS_INC(xs_read_calls); 308 XFS_STATS_INC(xs_read_calls);
315 309 if (XFS_FORCED_SHUTDOWN(mp))
316 n = XFS_MAXIOFFSET(mp) - *offset;
317 if ((n <= 0) || (count == 0))
318 return 0;
319
320 if (n < count)
321 count = n;
322
323 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
324 return -EIO; 310 return -EIO;
325 311
326 xfs_ilock(ip, XFS_IOLOCK_SHARED); 312 xfs_ilock(ip, XFS_IOLOCK_SHARED);
327 313
328 if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && 314 if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
329 (!(ioflags & IO_INVIS))) { 315 (!(ioflags & IO_INVIS))) {
330 vrwlock_t locktype = VRWLOCK_READ; 316 vrwlock_t locktype = VRWLOCK_READ;
331 int error; 317 int error;
332 318
333 error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, count, 319 error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp),
320 *offset, count,
334 FILP_DELAY_FLAG(filp), &locktype); 321 FILP_DELAY_FLAG(filp), &locktype);
335 if (error) { 322 if (error) {
336 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 323 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
@@ -340,12 +327,96 @@ xfs_sendfile(
340 xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore, 327 xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore,
341 (void *)(unsigned long)target, count, *offset, ioflags); 328 (void *)(unsigned long)target, count, *offset, ioflags);
342 ret = generic_file_sendfile(filp, offset, count, actor, target); 329 ret = generic_file_sendfile(filp, offset, count, actor, target);
330 if (ret > 0)
331 XFS_STATS_ADD(xs_read_bytes, ret);
343 332
344 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 333 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
334 return ret;
335}
345 336
337ssize_t
338xfs_splice_read(
339 bhv_desc_t *bdp,
340 struct file *infilp,
341 struct pipe_inode_info *pipe,
342 size_t count,
343 int flags,
344 int ioflags,
345 cred_t *credp)
346{
347 xfs_inode_t *ip = XFS_BHVTOI(bdp);
348 xfs_mount_t *mp = ip->i_mount;
349 ssize_t ret;
350
351 XFS_STATS_INC(xs_read_calls);
352 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
353 return -EIO;
354
355 xfs_ilock(ip, XFS_IOLOCK_SHARED);
356
357 if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
358 (!(ioflags & IO_INVIS))) {
359 vrwlock_t locktype = VRWLOCK_READ;
360 int error;
361
362 error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp),
363 infilp->f_pos, count,
364 FILP_DELAY_FLAG(infilp), &locktype);
365 if (error) {
366 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
367 return -error;
368 }
369 }
370 xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, &ip->i_iocore,
371 pipe, count, infilp->f_pos, ioflags);
372 ret = generic_file_splice_read(infilp, pipe, count, flags);
346 if (ret > 0) 373 if (ret > 0)
347 XFS_STATS_ADD(xs_read_bytes, ret); 374 XFS_STATS_ADD(xs_read_bytes, ret);
348 375
376 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
377 return ret;
378}
379
380ssize_t
381xfs_splice_write(
382 bhv_desc_t *bdp,
383 struct pipe_inode_info *pipe,
384 struct file *outfilp,
385 size_t count,
386 int flags,
387 int ioflags,
388 cred_t *credp)
389{
390 xfs_inode_t *ip = XFS_BHVTOI(bdp);
391 xfs_mount_t *mp = ip->i_mount;
392 ssize_t ret;
393
394 XFS_STATS_INC(xs_write_calls);
395 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
396 return -EIO;
397
398 xfs_ilock(ip, XFS_IOLOCK_EXCL);
399
400 if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_WRITE) &&
401 (!(ioflags & IO_INVIS))) {
402 vrwlock_t locktype = VRWLOCK_WRITE;
403 int error;
404
405 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, BHV_TO_VNODE(bdp),
406 outfilp->f_pos, count,
407 FILP_DELAY_FLAG(outfilp), &locktype);
408 if (error) {
409 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
410 return -error;
411 }
412 }
413 xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, &ip->i_iocore,
414 pipe, count, outfilp->f_pos, ioflags);
415 ret = generic_file_splice_write(pipe, outfilp, count, flags);
416 if (ret > 0)
417 XFS_STATS_ADD(xs_write_bytes, ret);
418
419 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
349 return ret; 420 return ret;
350} 421}
351 422
@@ -363,7 +434,7 @@ xfs_zero_last_block(
363 xfs_fsize_t end_size) 434 xfs_fsize_t end_size)
364{ 435{
365 xfs_fileoff_t last_fsb; 436 xfs_fileoff_t last_fsb;
366 xfs_mount_t *mp; 437 xfs_mount_t *mp = io->io_mount;
367 int nimaps; 438 int nimaps;
368 int zero_offset; 439 int zero_offset;
369 int zero_len; 440 int zero_len;
@@ -373,8 +444,6 @@ xfs_zero_last_block(
373 444
374 ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0); 445 ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0);
375 446
376 mp = io->io_mount;
377
378 zero_offset = XFS_B_FSB_OFFSET(mp, isize); 447 zero_offset = XFS_B_FSB_OFFSET(mp, isize);
379 if (zero_offset == 0) { 448 if (zero_offset == 0) {
380 /* 449 /*
@@ -405,10 +474,9 @@ xfs_zero_last_block(
405 * don't deadlock when the buffer cache calls back to us. 474 * don't deadlock when the buffer cache calls back to us.
406 */ 475 */
407 XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); 476 XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
408 loff = XFS_FSB_TO_B(mp, last_fsb);
409 477
478 loff = XFS_FSB_TO_B(mp, last_fsb);
410 zero_len = mp->m_sb.sb_blocksize - zero_offset; 479 zero_len = mp->m_sb.sb_blocksize - zero_offset;
411
412 error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size); 480 error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size);
413 481
414 XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 482 XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
@@ -441,7 +509,7 @@ xfs_zero_eof(
441 xfs_fileoff_t zero_count_fsb; 509 xfs_fileoff_t zero_count_fsb;
442 xfs_fileoff_t last_fsb; 510 xfs_fileoff_t last_fsb;
443 xfs_extlen_t buf_len_fsb; 511 xfs_extlen_t buf_len_fsb;
444 xfs_mount_t *mp; 512 xfs_mount_t *mp = io->io_mount;
445 int nimaps; 513 int nimaps;
446 int error = 0; 514 int error = 0;
447 xfs_bmbt_irec_t imap; 515 xfs_bmbt_irec_t imap;
@@ -450,8 +518,6 @@ xfs_zero_eof(
450 ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); 518 ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
451 ASSERT(offset > isize); 519 ASSERT(offset > isize);
452 520
453 mp = io->io_mount;
454
455 /* 521 /*
456 * First handle zeroing the block on which isize resides. 522 * First handle zeroing the block on which isize resides.
457 * We only zero a part of that block so it is handled specially. 523 * We only zero a part of that block so it is handled specially.
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index 38864a88d42d..55c689a86ad2 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -60,6 +60,8 @@ struct xfs_iomap;
60#define XFS_IOMAP_ALLOC_ENTER 25 60#define XFS_IOMAP_ALLOC_ENTER 25
61#define XFS_IOMAP_ALLOC_MAP 26 61#define XFS_IOMAP_ALLOC_MAP 26
62#define XFS_IOMAP_UNWRITTEN 27 62#define XFS_IOMAP_UNWRITTEN 27
63#define XFS_SPLICE_READ_ENTER 28
64#define XFS_SPLICE_WRITE_ENTER 29
63extern void xfs_rw_enter_trace(int, struct xfs_iocore *, 65extern void xfs_rw_enter_trace(int, struct xfs_iocore *,
64 void *, size_t, loff_t, int); 66 void *, size_t, loff_t, int);
65extern void xfs_inval_cached_trace(struct xfs_iocore *, 67extern void xfs_inval_cached_trace(struct xfs_iocore *,
@@ -78,6 +80,7 @@ extern int xfs_bmap(struct bhv_desc *, xfs_off_t, ssize_t, int,
78 struct xfs_iomap *, int *); 80 struct xfs_iomap *, int *);
79extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *); 81extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
80extern int xfs_bdstrat_cb(struct xfs_buf *); 82extern int xfs_bdstrat_cb(struct xfs_buf *);
83extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
81 84
82extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t, 85extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t,
83 xfs_fsize_t, xfs_fsize_t); 86 xfs_fsize_t, xfs_fsize_t);
@@ -90,7 +93,11 @@ extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *,
90extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *, 93extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
91 loff_t *, int, size_t, read_actor_t, 94 loff_t *, int, size_t, read_actor_t,
92 void *, struct cred *); 95 void *, struct cred *);
93 96extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *,
94extern int xfs_dev_is_read_only(struct xfs_mount *, char *); 97 struct pipe_inode_info *, size_t, int, int,
98 struct cred *);
99extern ssize_t xfs_splice_write(struct bhv_desc *, struct pipe_inode_info *,
100 struct file *, size_t, int, int,
101 struct cred *);
95 102
96#endif /* __XFS_LRW_H__ */ 103#endif /* __XFS_LRW_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 1884300417e3..68f4793e8a11 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -67,7 +67,8 @@ mempool_t *xfs_ioend_pool;
67 67
68STATIC struct xfs_mount_args * 68STATIC struct xfs_mount_args *
69xfs_args_allocate( 69xfs_args_allocate(
70 struct super_block *sb) 70 struct super_block *sb,
71 int silent)
71{ 72{
72 struct xfs_mount_args *args; 73 struct xfs_mount_args *args;
73 74
@@ -80,8 +81,8 @@ xfs_args_allocate(
80 args->flags |= XFSMNT_DIRSYNC; 81 args->flags |= XFSMNT_DIRSYNC;
81 if (sb->s_flags & MS_SYNCHRONOUS) 82 if (sb->s_flags & MS_SYNCHRONOUS)
82 args->flags |= XFSMNT_WSYNC; 83 args->flags |= XFSMNT_WSYNC;
83 84 if (silent)
84 /* Default to 32 bit inodes on Linux all the time */ 85 args->flags |= XFSMNT_QUIET;
85 args->flags |= XFSMNT_32BITINODES; 86 args->flags |= XFSMNT_32BITINODES;
86 87
87 return args; 88 return args;
@@ -719,7 +720,7 @@ xfs_fs_remount(
719 char *options) 720 char *options)
720{ 721{
721 vfs_t *vfsp = vfs_from_sb(sb); 722 vfs_t *vfsp = vfs_from_sb(sb);
722 struct xfs_mount_args *args = xfs_args_allocate(sb); 723 struct xfs_mount_args *args = xfs_args_allocate(sb, 0);
723 int error; 724 int error;
724 725
725 VFS_PARSEARGS(vfsp, options, args, 1, error); 726 VFS_PARSEARGS(vfsp, options, args, 1, error);
@@ -825,7 +826,7 @@ xfs_fs_fill_super(
825{ 826{
826 vnode_t *rootvp; 827 vnode_t *rootvp;
827 struct vfs *vfsp = vfs_allocate(sb); 828 struct vfs *vfsp = vfs_allocate(sb);
828 struct xfs_mount_args *args = xfs_args_allocate(sb); 829 struct xfs_mount_args *args = xfs_args_allocate(sb, silent);
829 struct kstatfs statvfs; 830 struct kstatfs statvfs;
830 int error, error2; 831 int error, error2;
831 832
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 06f5845e9568..88b09f186289 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -173,6 +173,12 @@ typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *,
173typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *, 173typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
174 loff_t *, int, size_t, read_actor_t, 174 loff_t *, int, size_t, read_actor_t,
175 void *, struct cred *); 175 void *, struct cred *);
176typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *,
177 struct pipe_inode_info *, size_t, int, int,
178 struct cred *);
179typedef ssize_t (*vop_splice_write_t)(bhv_desc_t *, struct pipe_inode_info *,
180 struct file *, size_t, int, int,
181 struct cred *);
176typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *, 182typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *,
177 int, unsigned int, void __user *); 183 int, unsigned int, void __user *);
178typedef int (*vop_getattr_t)(bhv_desc_t *, struct vattr *, int, 184typedef int (*vop_getattr_t)(bhv_desc_t *, struct vattr *, int,
@@ -231,6 +237,8 @@ typedef struct vnodeops {
231 vop_read_t vop_read; 237 vop_read_t vop_read;
232 vop_write_t vop_write; 238 vop_write_t vop_write;
233 vop_sendfile_t vop_sendfile; 239 vop_sendfile_t vop_sendfile;
240 vop_splice_read_t vop_splice_read;
241 vop_splice_write_t vop_splice_write;
234 vop_ioctl_t vop_ioctl; 242 vop_ioctl_t vop_ioctl;
235 vop_getattr_t vop_getattr; 243 vop_getattr_t vop_getattr;
236 vop_setattr_t vop_setattr; 244 vop_setattr_t vop_setattr;
@@ -276,6 +284,10 @@ typedef struct vnodeops {
276 rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr) 284 rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
277#define VOP_SENDFILE(vp,f,off,ioflags,cnt,act,targ,cr,rv) \ 285#define VOP_SENDFILE(vp,f,off,ioflags,cnt,act,targ,cr,rv) \
278 rv = _VOP_(vop_sendfile, vp)((vp)->v_fbhv,f,off,ioflags,cnt,act,targ,cr) 286 rv = _VOP_(vop_sendfile, vp)((vp)->v_fbhv,f,off,ioflags,cnt,act,targ,cr)
287#define VOP_SPLICE_READ(vp,f,pipe,cnt,fl,iofl,cr,rv) \
288 rv = _VOP_(vop_splice_read, vp)((vp)->v_fbhv,f,pipe,cnt,fl,iofl,cr)
289#define VOP_SPLICE_WRITE(vp,f,pipe,cnt,fl,iofl,cr,rv) \
290 rv = _VOP_(vop_splice_write, vp)((vp)->v_fbhv,f,pipe,cnt,fl,iofl,cr)
279#define VOP_BMAP(vp,of,sz,rw,b,n,rv) \ 291#define VOP_BMAP(vp,of,sz,rw,b,n,rv) \
280 rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n) 292 rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n)
281#define VOP_OPEN(vp, cr, rv) \ 293#define VOP_OPEN(vp, cr, rv) \
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 73c1e5e80c07..7fb5eca9bd50 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -2624,7 +2624,7 @@ xfs_qm_vop_chown_reserve(
2624{ 2624{
2625 int error; 2625 int error;
2626 xfs_mount_t *mp; 2626 xfs_mount_t *mp;
2627 uint delblks, blkflags; 2627 uint delblks, blkflags, prjflags = 0;
2628 xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; 2628 xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq;
2629 2629
2630 ASSERT(XFS_ISLOCKED_INODE(ip)); 2630 ASSERT(XFS_ISLOCKED_INODE(ip));
@@ -2650,10 +2650,13 @@ xfs_qm_vop_chown_reserve(
2650 } 2650 }
2651 } 2651 }
2652 if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { 2652 if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
2653 if ((XFS_IS_GQUOTA_ON(ip->i_mount) && 2653 if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
2654 ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id)) || 2654 ip->i_d.di_projid != be32_to_cpu(gdqp->q_core.d_id))
2655 (XFS_IS_PQUOTA_ON(ip->i_mount) && 2655 prjflags = XFS_QMOPT_ENOSPC;
2656 ip->i_d.di_projid != be32_to_cpu(gdqp->q_core.d_id))) { 2656
2657 if (prjflags ||
2658 (XFS_IS_GQUOTA_ON(ip->i_mount) &&
2659 ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) {
2657 delblksgdq = gdqp; 2660 delblksgdq = gdqp;
2658 if (delblks) { 2661 if (delblks) {
2659 ASSERT(ip->i_gdquot); 2662 ASSERT(ip->i_gdquot);
@@ -2664,7 +2667,7 @@ xfs_qm_vop_chown_reserve(
2664 2667
2665 if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, 2668 if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
2666 delblksudq, delblksgdq, ip->i_d.di_nblocks, 1, 2669 delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
2667 flags | blkflags))) 2670 flags | blkflags | prjflags)))
2668 return (error); 2671 return (error);
2669 2672
2670 /* 2673 /*
@@ -2681,7 +2684,7 @@ xfs_qm_vop_chown_reserve(
2681 ASSERT(unresudq || unresgdq); 2684 ASSERT(unresudq || unresgdq);
2682 if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, 2685 if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2683 delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0, 2686 delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
2684 flags | blkflags))) 2687 flags | blkflags | prjflags)))
2685 return (error); 2688 return (error);
2686 xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, 2689 xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2687 unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0, 2690 unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index d8e131ec0aa8..9168918db252 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -595,12 +595,19 @@ xfs_trans_unreserve_and_mod_dquots(
595 } 595 }
596} 596}
597 597
598STATIC int
599xfs_quota_error(uint flags)
600{
601 if (flags & XFS_QMOPT_ENOSPC)
602 return ENOSPC;
603 return EDQUOT;
604}
605
598/* 606/*
599 * This reserves disk blocks and inodes against a dquot. 607 * This reserves disk blocks and inodes against a dquot.
600 * Flags indicate if the dquot is to be locked here and also 608 * Flags indicate if the dquot is to be locked here and also
601 * if the blk reservation is for RT or regular blocks. 609 * if the blk reservation is for RT or regular blocks.
602 * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check. 610 * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check.
603 * Returns EDQUOT if quota is exceeded.
604 */ 611 */
605STATIC int 612STATIC int
606xfs_trans_dqresv( 613xfs_trans_dqresv(
@@ -666,19 +673,15 @@ xfs_trans_dqresv(
666 */ 673 */
667 if (hardlimit > 0ULL && 674 if (hardlimit > 0ULL &&
668 (hardlimit <= nblks + *resbcountp)) { 675 (hardlimit <= nblks + *resbcountp)) {
669 error = EDQUOT; 676 error = xfs_quota_error(flags);
670 goto error_return; 677 goto error_return;
671 } 678 }
672 679
673 if (softlimit > 0ULL && 680 if (softlimit > 0ULL &&
674 (softlimit <= nblks + *resbcountp)) { 681 (softlimit <= nblks + *resbcountp)) {
675 /*
676 * If timer or warnings has expired,
677 * return EDQUOT
678 */
679 if ((timer != 0 && get_seconds() > timer) || 682 if ((timer != 0 && get_seconds() > timer) ||
680 (warns != 0 && warns >= warnlimit)) { 683 (warns != 0 && warns >= warnlimit)) {
681 error = EDQUOT; 684 error = xfs_quota_error(flags);
682 goto error_return; 685 goto error_return;
683 } 686 }
684 } 687 }
@@ -695,16 +698,12 @@ xfs_trans_dqresv(
695 if (!softlimit) 698 if (!softlimit)
696 softlimit = q->qi_isoftlimit; 699 softlimit = q->qi_isoftlimit;
697 if (hardlimit > 0ULL && count >= hardlimit) { 700 if (hardlimit > 0ULL && count >= hardlimit) {
698 error = EDQUOT; 701 error = xfs_quota_error(flags);
699 goto error_return; 702 goto error_return;
700 } else if (softlimit > 0ULL && count >= softlimit) { 703 } else if (softlimit > 0ULL && count >= softlimit) {
701 /*
702 * If timer or warnings has expired,
703 * return EDQUOT
704 */
705 if ((timer != 0 && get_seconds() > timer) || 704 if ((timer != 0 && get_seconds() > timer) ||
706 (warns != 0 && warns >= warnlimit)) { 705 (warns != 0 && warns >= warnlimit)) {
707 error = EDQUOT; 706 error = xfs_quota_error(flags);
708 goto error_return; 707 goto error_return;
709 } 708 }
710 } 709 }
@@ -751,13 +750,14 @@ error_return:
751 750
752 751
753/* 752/*
754 * Given a dquot(s), make disk block and/or inode reservations against them. 753 * Given dquot(s), make disk block and/or inode reservations against them.
755 * The fact that this does the reservation against both the usr and 754 * The fact that this does the reservation against both the usr and
756 * grp quotas is important, because this follows a both-or-nothing 755 * grp/prj quotas is important, because this follows a both-or-nothing
757 * approach. 756 * approach.
758 * 757 *
759 * flags = XFS_QMOPT_DQLOCK indicate if dquot(s) need to be locked. 758 * flags = XFS_QMOPT_DQLOCK indicate if dquot(s) need to be locked.
760 * XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown. 759 * XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown.
760 * XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT. Used by pquota.
761 * XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks 761 * XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks
762 * XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks 762 * XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks
763 * dquots are unlocked on return, if they were not locked by caller. 763 * dquots are unlocked on return, if they were not locked by caller.
@@ -772,25 +772,27 @@ xfs_trans_reserve_quota_bydquots(
772 long ninos, 772 long ninos,
773 uint flags) 773 uint flags)
774{ 774{
775 int resvd; 775 int resvd = 0, error;
776 776
777 if (! XFS_IS_QUOTA_ON(mp)) 777 if (!XFS_IS_QUOTA_ON(mp))
778 return (0); 778 return 0;
779 779
780 if (tp && tp->t_dqinfo == NULL) 780 if (tp && tp->t_dqinfo == NULL)
781 xfs_trans_alloc_dqinfo(tp); 781 xfs_trans_alloc_dqinfo(tp);
782 782
783 ASSERT(flags & XFS_QMOPT_RESBLK_MASK); 783 ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
784 resvd = 0;
785 784
786 if (udqp) { 785 if (udqp) {
787 if (xfs_trans_dqresv(tp, mp, udqp, nblks, ninos, flags)) 786 error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos,
788 return (EDQUOT); 787 (flags & ~XFS_QMOPT_ENOSPC));
788 if (error)
789 return error;
789 resvd = 1; 790 resvd = 1;
790 } 791 }
791 792
792 if (gdqp) { 793 if (gdqp) {
793 if (xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags)) { 794 error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags);
795 if (error) {
794 /* 796 /*
795 * can't do it, so backout previous reservation 797 * can't do it, so backout previous reservation
796 */ 798 */
@@ -799,14 +801,14 @@ xfs_trans_reserve_quota_bydquots(
799 xfs_trans_dqresv(tp, mp, udqp, 801 xfs_trans_dqresv(tp, mp, udqp,
800 -nblks, -ninos, flags); 802 -nblks, -ninos, flags);
801 } 803 }
802 return (EDQUOT); 804 return error;
803 } 805 }
804 } 806 }
805 807
806 /* 808 /*
807 * Didn't change anything critical, so, no need to log 809 * Didn't change anything critical, so, no need to log
808 */ 810 */
809 return (0); 811 return 0;
810} 812}
811 813
812 814
@@ -814,8 +816,6 @@ xfs_trans_reserve_quota_bydquots(
814 * Lock the dquot and change the reservation if we can. 816 * Lock the dquot and change the reservation if we can.
815 * This doesn't change the actual usage, just the reservation. 817 * This doesn't change the actual usage, just the reservation.
816 * The inode sent in is locked. 818 * The inode sent in is locked.
817 *
818 * Returns 0 on success, EDQUOT or other errors otherwise
819 */ 819 */
820STATIC int 820STATIC int
821xfs_trans_reserve_quota_nblks( 821xfs_trans_reserve_quota_nblks(
@@ -824,20 +824,24 @@ xfs_trans_reserve_quota_nblks(
824 xfs_inode_t *ip, 824 xfs_inode_t *ip,
825 long nblks, 825 long nblks,
826 long ninos, 826 long ninos,
827 uint type) 827 uint flags)
828{ 828{
829 int error; 829 int error;
830 830
831 if (!XFS_IS_QUOTA_ON(mp)) 831 if (!XFS_IS_QUOTA_ON(mp))
832 return (0); 832 return 0;
833 if (XFS_IS_PQUOTA_ON(mp))
834 flags |= XFS_QMOPT_ENOSPC;
833 835
834 ASSERT(ip->i_ino != mp->m_sb.sb_uquotino); 836 ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
835 ASSERT(ip->i_ino != mp->m_sb.sb_gquotino); 837 ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
836 838
837 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 839 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
838 ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); 840 ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
839 ASSERT((type & ~XFS_QMOPT_FORCE_RES) == XFS_TRANS_DQ_RES_RTBLKS || 841 ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
840 (type & ~XFS_QMOPT_FORCE_RES) == XFS_TRANS_DQ_RES_BLKS); 842 XFS_TRANS_DQ_RES_RTBLKS ||
843 (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
844 XFS_TRANS_DQ_RES_BLKS);
841 845
842 /* 846 /*
843 * Reserve nblks against these dquots, with trans as the mediator. 847 * Reserve nblks against these dquots, with trans as the mediator.
@@ -845,8 +849,8 @@ xfs_trans_reserve_quota_nblks(
845 error = xfs_trans_reserve_quota_bydquots(tp, mp, 849 error = xfs_trans_reserve_quota_bydquots(tp, mp,
846 ip->i_udquot, ip->i_gdquot, 850 ip->i_udquot, ip->i_gdquot,
847 nblks, ninos, 851 nblks, ninos,
848 type); 852 flags);
849 return (error); 853 return error;
850} 854}
851 855
852/* 856/*
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index d384e489705f..26939d364bc4 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4719,18 +4719,17 @@ xfs_bmapi(
4719 /* 4719 /*
4720 * Make a transaction-less quota reservation for 4720 * Make a transaction-less quota reservation for
4721 * delayed allocation blocks. This number gets 4721 * delayed allocation blocks. This number gets
4722 * adjusted later. 4722 * adjusted later. We return if we haven't
4723 * We return EDQUOT if we haven't allocated 4723 * allocated blocks already inside this loop.
4724 * blks already inside this loop;
4725 */ 4724 */
4726 if (XFS_TRANS_RESERVE_QUOTA_NBLKS( 4725 if ((error = XFS_TRANS_RESERVE_QUOTA_NBLKS(
4727 mp, NULL, ip, (long)alen, 0, 4726 mp, NULL, ip, (long)alen, 0,
4728 rt ? XFS_QMOPT_RES_RTBLKS : 4727 rt ? XFS_QMOPT_RES_RTBLKS :
4729 XFS_QMOPT_RES_REGBLKS)) { 4728 XFS_QMOPT_RES_REGBLKS))) {
4730 if (n == 0) { 4729 if (n == 0) {
4731 *nmap = 0; 4730 *nmap = 0;
4732 ASSERT(cur == NULL); 4731 ASSERT(cur == NULL);
4733 return XFS_ERROR(EDQUOT); 4732 return error;
4734 } 4733 }
4735 break; 4734 break;
4736 } 4735 }
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index f83399c89ce3..8e0d73d9ccc4 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -353,10 +353,11 @@ xfs_check_nostate_extents(
353 xfs_extnum_t num); 353 xfs_extnum_t num);
354 354
355/* 355/*
356 * Call xfs_bmap_do_search_extents() to search for the extent 356 * Search the extent records for the entry containing block bno.
357 * record containing block bno. If in multi-level in-core extent 357 * If bno lies in a hole, point to the next entry. If bno lies
358 * allocation mode, find and extract the target extent buffer, 358 * past eof, *eofp will be set, and *prevp will contain the last
359 * otherwise just use the direct extent list. 359 * entry (null if none). Else, *lastxp will be set to the index
360 * of the found entry; *gotp will contain the entry.
360 */ 361 */
361xfs_bmbt_rec_t * 362xfs_bmbt_rec_t *
362xfs_bmap_search_multi_extents(struct xfs_ifork *, xfs_fileoff_t, int *, 363xfs_bmap_search_multi_extents(struct xfs_ifork *, xfs_fileoff_t, int *,
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index 022fff62085b..5b7eb81453be 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -68,6 +68,7 @@ struct xfs_mount_args {
68 * enforcement */ 68 * enforcement */
69#define XFSMNT_PQUOTAENF 0x00000040 /* IRIX project quota limit 69#define XFSMNT_PQUOTAENF 0x00000040 /* IRIX project quota limit
70 * enforcement */ 70 * enforcement */
71#define XFSMNT_QUIET 0x00000080 /* don't report mount errors */
71#define XFSMNT_NOALIGN 0x00000200 /* don't allocate at 72#define XFSMNT_NOALIGN 0x00000200 /* don't allocate at
72 * stripe boundaries*/ 73 * stripe boundaries*/
73#define XFSMNT_RETERR 0x00000400 /* return error to user */ 74#define XFSMNT_RETERR 0x00000400 /* return error to user */
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 26b8e709a569..bc43163456ef 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -186,4 +186,7 @@ extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...);
186#define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \ 186#define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \
187 xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args) 187 xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args)
188 188
189#define xfs_fs_mount_cmn_err(f, fmt, args...) \
190 ((f & XFS_MFSI_QUIET)? cmn_err(CE_WARN, "XFS: " fmt, ## args) : (void)0)
191
189#endif /* __XFS_ERROR_H__ */ 192#endif /* __XFS_ERROR_H__ */
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 4eeb856183b1..deddbd03c166 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -158,9 +158,10 @@ xfs_ialloc_ag_alloc(
158 */ 158 */
159 agi = XFS_BUF_TO_AGI(agbp); 159 agi = XFS_BUF_TO_AGI(agbp);
160 newino = be32_to_cpu(agi->agi_newino); 160 newino = be32_to_cpu(agi->agi_newino);
161 if(likely(newino != NULLAGINO)) { 161 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
162 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + 162 XFS_IALLOC_BLOCKS(args.mp);
163 XFS_IALLOC_BLOCKS(args.mp); 163 if (likely(newino != NULLAGINO &&
164 (args.agbno < be32_to_cpu(agi->agi_length)))) {
164 args.fsbno = XFS_AGB_TO_FSB(args.mp, 165 args.fsbno = XFS_AGB_TO_FSB(args.mp,
165 be32_to_cpu(agi->agi_seqno), args.agbno); 166 be32_to_cpu(agi->agi_seqno), args.agbno);
166 args.type = XFS_ALLOCTYPE_THIS_BNO; 167 args.type = XFS_ALLOCTYPE_THIS_BNO;
@@ -182,8 +183,8 @@ xfs_ialloc_ag_alloc(
182 * Set the alignment for the allocation. 183 * Set the alignment for the allocation.
183 * If stripe alignment is turned on then align at stripe unit 184 * If stripe alignment is turned on then align at stripe unit
184 * boundary. 185 * boundary.
185 * If the cluster size is smaller than a filesystem block 186 * If the cluster size is smaller than a filesystem block
186 * then we're doing I/O for inodes in filesystem block size 187 * then we're doing I/O for inodes in filesystem block size
187 * pieces, so don't need alignment anyway. 188 * pieces, so don't need alignment anyway.
188 */ 189 */
189 isaligned = 0; 190 isaligned = 0;
@@ -192,7 +193,7 @@ xfs_ialloc_ag_alloc(
192 args.alignment = args.mp->m_dalign; 193 args.alignment = args.mp->m_dalign;
193 isaligned = 1; 194 isaligned = 1;
194 } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) && 195 } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
195 args.mp->m_sb.sb_inoalignmt >= 196 args.mp->m_sb.sb_inoalignmt >=
196 XFS_B_TO_FSBT(args.mp, 197 XFS_B_TO_FSBT(args.mp,
197 XFS_INODE_CLUSTER_SIZE(args.mp))) 198 XFS_INODE_CLUSTER_SIZE(args.mp)))
198 args.alignment = args.mp->m_sb.sb_inoalignmt; 199 args.alignment = args.mp->m_sb.sb_inoalignmt;
@@ -220,7 +221,7 @@ xfs_ialloc_ag_alloc(
220 if ((error = xfs_alloc_vextent(&args))) 221 if ((error = xfs_alloc_vextent(&args)))
221 return error; 222 return error;
222 } 223 }
223 224
224 /* 225 /*
225 * If stripe alignment is turned on, then try again with cluster 226 * If stripe alignment is turned on, then try again with cluster
226 * alignment. 227 * alignment.
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index bb33113eef9f..b53854325266 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -421,7 +421,10 @@ finish_inode:
421 ip->i_chash = chlnew; 421 ip->i_chash = chlnew;
422 chlnew->chl_ip = ip; 422 chlnew->chl_ip = ip;
423 chlnew->chl_blkno = ip->i_blkno; 423 chlnew->chl_blkno = ip->i_blkno;
424 if (ch->ch_list)
425 ch->ch_list->chl_prev = chlnew;
424 chlnew->chl_next = ch->ch_list; 426 chlnew->chl_next = ch->ch_list;
427 chlnew->chl_prev = NULL;
425 ch->ch_list = chlnew; 428 ch->ch_list = chlnew;
426 chlnew = NULL; 429 chlnew = NULL;
427 } 430 }
@@ -723,23 +726,15 @@ xfs_iextract(
723 ASSERT(ip->i_cnext == ip && ip->i_cprev == ip); 726 ASSERT(ip->i_cnext == ip && ip->i_cprev == ip);
724 ASSERT(ip->i_chash != NULL); 727 ASSERT(ip->i_chash != NULL);
725 chm=NULL; 728 chm=NULL;
726 for (chl = ch->ch_list; chl != NULL; chl = chl->chl_next) { 729 chl = ip->i_chash;
727 if (chl->chl_blkno == ip->i_blkno) { 730 if (chl->chl_prev)
728 if (chm == NULL) { 731 chl->chl_prev->chl_next = chl->chl_next;
729 /* first item on the list */ 732 else
730 ch->ch_list = chl->chl_next; 733 ch->ch_list = chl->chl_next;
731 } else { 734 if (chl->chl_next)
732 chm->chl_next = chl->chl_next; 735 chl->chl_next->chl_prev = chl->chl_prev;
733 } 736 kmem_zone_free(xfs_chashlist_zone, chl);
734 kmem_zone_free(xfs_chashlist_zone, chl); 737 } else {
735 break;
736 } else {
737 ASSERT(chl->chl_ip != ip);
738 chm = chl;
739 }
740 }
741 ASSERT_ALWAYS(chl != NULL);
742 } else {
743 /* delete one inode from a non-empty list */ 738 /* delete one inode from a non-empty list */
744 iq = ip->i_cnext; 739 iq = ip->i_cnext;
745 iq->i_cprev = ip->i_cprev; 740 iq->i_cprev = ip->i_cprev;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 48146bdc6bdd..94b60dd03801 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2732,16 +2732,29 @@ xfs_iunpin(
2732 ASSERT(atomic_read(&ip->i_pincount) > 0); 2732 ASSERT(atomic_read(&ip->i_pincount) > 0);
2733 2733
2734 if (atomic_dec_and_test(&ip->i_pincount)) { 2734 if (atomic_dec_and_test(&ip->i_pincount)) {
2735 vnode_t *vp = XFS_ITOV_NULL(ip); 2735 /*
2736 * If the inode is currently being reclaimed, the
2737 * linux inode _and_ the xfs vnode may have been
2738 * freed so we cannot reference either of them safely.
2739 * Hence we should not try to do anything to them
2740 * if the xfs inode is currently in the reclaim
2741 * path.
2742 *
2743 * However, we still need to issue the unpin wakeup
2744 * call as the inode reclaim may be blocked waiting for
2745 * the inode to become unpinned.
2746 */
2747 if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) {
2748 vnode_t *vp = XFS_ITOV_NULL(ip);
2736 2749
2737 /* make sync come back and flush this inode */ 2750 /* make sync come back and flush this inode */
2738 if (vp) { 2751 if (vp) {
2739 struct inode *inode = vn_to_inode(vp); 2752 struct inode *inode = vn_to_inode(vp);
2740 2753
2741 if (!(inode->i_state & I_NEW)) 2754 if (!(inode->i_state & I_NEW))
2742 mark_inode_dirty_sync(inode); 2755 mark_inode_dirty_sync(inode);
2756 }
2743 } 2757 }
2744
2745 wake_up(&ip->i_ipin_wait); 2758 wake_up(&ip->i_ipin_wait);
2746 } 2759 }
2747} 2760}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 39ef9c36ea55..3b544db1790b 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -189,6 +189,7 @@ typedef struct xfs_ihash {
189 */ 189 */
190typedef struct xfs_chashlist { 190typedef struct xfs_chashlist {
191 struct xfs_chashlist *chl_next; 191 struct xfs_chashlist *chl_next;
192 struct xfs_chashlist *chl_prev;
192 struct xfs_inode *chl_ip; 193 struct xfs_inode *chl_ip;
193 xfs_daddr_t chl_blkno; /* starting block number of 194 xfs_daddr_t chl_blkno; /* starting block number of
194 * the cluster */ 195 * the cluster */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 72e7e78bfff8..c0b1c2906880 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -213,7 +213,8 @@ xfs_mount_free(
213STATIC int 213STATIC int
214xfs_mount_validate_sb( 214xfs_mount_validate_sb(
215 xfs_mount_t *mp, 215 xfs_mount_t *mp,
216 xfs_sb_t *sbp) 216 xfs_sb_t *sbp,
217 int flags)
217{ 218{
218 /* 219 /*
219 * If the log device and data device have the 220 * If the log device and data device have the
@@ -223,33 +224,29 @@ xfs_mount_validate_sb(
223 * a volume filesystem in a non-volume manner. 224 * a volume filesystem in a non-volume manner.
224 */ 225 */
225 if (sbp->sb_magicnum != XFS_SB_MAGIC) { 226 if (sbp->sb_magicnum != XFS_SB_MAGIC) {
226 cmn_err(CE_WARN, "XFS: bad magic number"); 227 xfs_fs_mount_cmn_err(flags, "bad magic number");
227 return XFS_ERROR(EWRONGFS); 228 return XFS_ERROR(EWRONGFS);
228 } 229 }
229 230
230 if (!XFS_SB_GOOD_VERSION(sbp)) { 231 if (!XFS_SB_GOOD_VERSION(sbp)) {
231 cmn_err(CE_WARN, "XFS: bad version"); 232 xfs_fs_mount_cmn_err(flags, "bad version");
232 return XFS_ERROR(EWRONGFS); 233 return XFS_ERROR(EWRONGFS);
233 } 234 }
234 235
235 if (unlikely( 236 if (unlikely(
236 sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { 237 sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
237 cmn_err(CE_WARN, 238 xfs_fs_mount_cmn_err(flags,
238 "XFS: filesystem is marked as having an external log; " 239 "filesystem is marked as having an external log; "
239 "specify logdev on the\nmount command line."); 240 "specify logdev on the\nmount command line.");
240 XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(1)", 241 return XFS_ERROR(EINVAL);
241 XFS_ERRLEVEL_HIGH, mp, sbp);
242 return XFS_ERROR(EFSCORRUPTED);
243 } 242 }
244 243
245 if (unlikely( 244 if (unlikely(
246 sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { 245 sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) {
247 cmn_err(CE_WARN, 246 xfs_fs_mount_cmn_err(flags,
248 "XFS: filesystem is marked as having an internal log; " 247 "filesystem is marked as having an internal log; "
249 "don't specify logdev on\nthe mount command line."); 248 "do not specify logdev on\nthe mount command line.");
250 XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(2)", 249 return XFS_ERROR(EINVAL);
251 XFS_ERRLEVEL_HIGH, mp, sbp);
252 return XFS_ERROR(EFSCORRUPTED);
253 } 250 }
254 251
255 /* 252 /*
@@ -273,10 +270,8 @@ xfs_mount_validate_sb(
273 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || 270 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) ||
274 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 271 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) ||
275 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || 272 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) ||
276 (sbp->sb_imax_pct > 100 || sbp->sb_imax_pct < 1))) { 273 (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) {
277 cmn_err(CE_WARN, "XFS: SB sanity check 1 failed"); 274 xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed");
278 XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(3)",
279 XFS_ERRLEVEL_LOW, mp, sbp);
280 return XFS_ERROR(EFSCORRUPTED); 275 return XFS_ERROR(EFSCORRUPTED);
281 } 276 }
282 277
@@ -289,9 +284,7 @@ xfs_mount_validate_sb(
289 (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks || 284 (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks ||
290 sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) * 285 sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) *
291 sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) { 286 sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) {
292 cmn_err(CE_WARN, "XFS: SB sanity check 2 failed"); 287 xfs_fs_mount_cmn_err(flags, "SB sanity check 2 failed");
293 XFS_ERROR_REPORT("xfs_mount_validate_sb(4)",
294 XFS_ERRLEVEL_LOW, mp);
295 return XFS_ERROR(EFSCORRUPTED); 288 return XFS_ERROR(EFSCORRUPTED);
296 } 289 }
297 290
@@ -307,15 +300,13 @@ xfs_mount_validate_sb(
307 (sbp->sb_dblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX || 300 (sbp->sb_dblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX ||
308 (sbp->sb_rblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX)) { 301 (sbp->sb_rblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX)) {
309#endif 302#endif
310 cmn_err(CE_WARN, 303 xfs_fs_mount_cmn_err(flags,
311 "XFS: File system is too large to be mounted on this system."); 304 "file system too large to be mounted on this system.");
312 return XFS_ERROR(E2BIG); 305 return XFS_ERROR(E2BIG);
313 } 306 }
314 307
315 if (unlikely(sbp->sb_inprogress)) { 308 if (unlikely(sbp->sb_inprogress)) {
316 cmn_err(CE_WARN, "XFS: file system busy"); 309 xfs_fs_mount_cmn_err(flags, "file system busy");
317 XFS_ERROR_REPORT("xfs_mount_validate_sb(5)",
318 XFS_ERRLEVEL_LOW, mp);
319 return XFS_ERROR(EFSCORRUPTED); 310 return XFS_ERROR(EFSCORRUPTED);
320 } 311 }
321 312
@@ -323,8 +314,8 @@ xfs_mount_validate_sb(
323 * Version 1 directory format has never worked on Linux. 314 * Version 1 directory format has never worked on Linux.
324 */ 315 */
325 if (unlikely(!XFS_SB_VERSION_HASDIRV2(sbp))) { 316 if (unlikely(!XFS_SB_VERSION_HASDIRV2(sbp))) {
326 cmn_err(CE_WARN, 317 xfs_fs_mount_cmn_err(flags,
327 "XFS: Attempted to mount file system using version 1 directory format"); 318 "file system using version 1 directory format");
328 return XFS_ERROR(ENOSYS); 319 return XFS_ERROR(ENOSYS);
329 } 320 }
330 321
@@ -332,11 +323,11 @@ xfs_mount_validate_sb(
332 * Until this is fixed only page-sized or smaller data blocks work. 323 * Until this is fixed only page-sized or smaller data blocks work.
333 */ 324 */
334 if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { 325 if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
335 cmn_err(CE_WARN, 326 xfs_fs_mount_cmn_err(flags,
336 "XFS: Attempted to mount file system with blocksize %d bytes", 327 "file system with blocksize %d bytes",
337 sbp->sb_blocksize); 328 sbp->sb_blocksize);
338 cmn_err(CE_WARN, 329 xfs_fs_mount_cmn_err(flags,
339 "XFS: Only page-sized (%ld) or less blocksizes currently work.", 330 "only pagesize (%ld) or less will currently work.",
340 PAGE_SIZE); 331 PAGE_SIZE);
341 return XFS_ERROR(ENOSYS); 332 return XFS_ERROR(ENOSYS);
342 } 333 }
@@ -484,7 +475,7 @@ xfs_xlatesb(
484 * Does the initial read of the superblock. 475 * Does the initial read of the superblock.
485 */ 476 */
486int 477int
487xfs_readsb(xfs_mount_t *mp) 478xfs_readsb(xfs_mount_t *mp, int flags)
488{ 479{
489 unsigned int sector_size; 480 unsigned int sector_size;
490 unsigned int extra_flags; 481 unsigned int extra_flags;
@@ -506,7 +497,7 @@ xfs_readsb(xfs_mount_t *mp)
506 bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, 497 bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
507 BTOBB(sector_size), extra_flags); 498 BTOBB(sector_size), extra_flags);
508 if (!bp || XFS_BUF_ISERROR(bp)) { 499 if (!bp || XFS_BUF_ISERROR(bp)) {
509 cmn_err(CE_WARN, "XFS: SB read failed"); 500 xfs_fs_mount_cmn_err(flags, "SB read failed");
510 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; 501 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
511 goto fail; 502 goto fail;
512 } 503 }
@@ -520,9 +511,9 @@ xfs_readsb(xfs_mount_t *mp)
520 sbp = XFS_BUF_TO_SBP(bp); 511 sbp = XFS_BUF_TO_SBP(bp);
521 xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), 1, XFS_SB_ALL_BITS); 512 xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), 1, XFS_SB_ALL_BITS);
522 513
523 error = xfs_mount_validate_sb(mp, &(mp->m_sb)); 514 error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags);
524 if (error) { 515 if (error) {
525 cmn_err(CE_WARN, "XFS: SB validate failed"); 516 xfs_fs_mount_cmn_err(flags, "SB validate failed");
526 goto fail; 517 goto fail;
527 } 518 }
528 519
@@ -530,8 +521,8 @@ xfs_readsb(xfs_mount_t *mp)
530 * We must be able to do sector-sized and sector-aligned IO. 521 * We must be able to do sector-sized and sector-aligned IO.
531 */ 522 */
532 if (sector_size > mp->m_sb.sb_sectsize) { 523 if (sector_size > mp->m_sb.sb_sectsize) {
533 cmn_err(CE_WARN, 524 xfs_fs_mount_cmn_err(flags,
534 "XFS: device supports only %u byte sectors (not %u)", 525 "device supports only %u byte sectors (not %u)",
535 sector_size, mp->m_sb.sb_sectsize); 526 sector_size, mp->m_sb.sb_sectsize);
536 error = ENOSYS; 527 error = ENOSYS;
537 goto fail; 528 goto fail;
@@ -548,7 +539,7 @@ xfs_readsb(xfs_mount_t *mp)
548 bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, 539 bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
549 BTOBB(sector_size), extra_flags); 540 BTOBB(sector_size), extra_flags);
550 if (!bp || XFS_BUF_ISERROR(bp)) { 541 if (!bp || XFS_BUF_ISERROR(bp)) {
551 cmn_err(CE_WARN, "XFS: SB re-read failed"); 542 xfs_fs_mount_cmn_err(flags, "SB re-read failed");
552 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; 543 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
553 goto fail; 544 goto fail;
554 } 545 }
@@ -678,7 +669,7 @@ xfs_mountfs(
678 int error = 0; 669 int error = 0;
679 670
680 if (mp->m_sb_bp == NULL) { 671 if (mp->m_sb_bp == NULL) {
681 if ((error = xfs_readsb(mp))) { 672 if ((error = xfs_readsb(mp, mfsi_flags))) {
682 return error; 673 return error;
683 } 674 }
684 } 675 }
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 66cbee79864e..668ad23fd37c 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -510,9 +510,12 @@ xfs_preferred_iosize(xfs_mount_t *mp)
510 */ 510 */
511#define XFS_MFSI_SECOND 0x01 /* Secondary mount -- skip stuff */ 511#define XFS_MFSI_SECOND 0x01 /* Secondary mount -- skip stuff */
512#define XFS_MFSI_CLIENT 0x02 /* Is a client -- skip lots of stuff */ 512#define XFS_MFSI_CLIENT 0x02 /* Is a client -- skip lots of stuff */
513/* XFS_MFSI_RRINODES */
513#define XFS_MFSI_NOUNLINK 0x08 /* Skip unlinked inode processing in */ 514#define XFS_MFSI_NOUNLINK 0x08 /* Skip unlinked inode processing in */
514 /* log recovery */ 515 /* log recovery */
515#define XFS_MFSI_NO_QUOTACHECK 0x10 /* Skip quotacheck processing */ 516#define XFS_MFSI_NO_QUOTACHECK 0x10 /* Skip quotacheck processing */
517/* XFS_MFSI_CONVERT_SUNIT */
518#define XFS_MFSI_QUIET 0x40 /* Be silent if mount errors found */
516 519
517/* 520/*
518 * Macros for getting from mount to vfs and back. 521 * Macros for getting from mount to vfs and back.
@@ -581,7 +584,7 @@ extern int xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t,
581extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, 584extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
582 uint, int); 585 uint, int);
583extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); 586extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
584extern int xfs_readsb(xfs_mount_t *mp); 587extern int xfs_readsb(xfs_mount_t *, int);
585extern void xfs_freesb(xfs_mount_t *); 588extern void xfs_freesb(xfs_mount_t *);
586extern void xfs_do_force_shutdown(bhv_desc_t *, int, char *, int); 589extern void xfs_do_force_shutdown(bhv_desc_t *, int, char *, int);
587extern int xfs_syncsub(xfs_mount_t *, int, int, int *); 590extern int xfs_syncsub(xfs_mount_t *, int, int, int *);
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 4f6a034de7f7..7fbef974bce6 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -196,10 +196,11 @@ typedef struct xfs_qoff_logformat {
196#define XFS_QMOPT_QUOTAOFF 0x0000080 /* quotas are being turned off */ 196#define XFS_QMOPT_QUOTAOFF 0x0000080 /* quotas are being turned off */
197#define XFS_QMOPT_UMOUNTING 0x0000100 /* filesys is being unmounted */ 197#define XFS_QMOPT_UMOUNTING 0x0000100 /* filesys is being unmounted */
198#define XFS_QMOPT_DOLOG 0x0000200 /* log buf changes (in quotacheck) */ 198#define XFS_QMOPT_DOLOG 0x0000200 /* log buf changes (in quotacheck) */
199#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if necessary */ 199#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */
200#define XFS_QMOPT_ILOCKED 0x0000800 /* inode is already locked (excl) */ 200#define XFS_QMOPT_ILOCKED 0x0000800 /* inode is already locked (excl) */
201#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot, if damaged. */ 201#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */
202#define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ 202#define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */
203#define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */
203 204
204/* 205/*
205 * flags to xfs_trans_mod_dquot to indicate which field needs to be 206 * flags to xfs_trans_mod_dquot to indicate which field needs to be
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 504d2a80747a..f0e09ca14139 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -442,6 +442,9 @@ xfs_mount(
442 p = vfs_bhv_lookup(vfsp, VFS_POSITION_IO); 442 p = vfs_bhv_lookup(vfsp, VFS_POSITION_IO);
443 mp->m_io_ops = p ? *(xfs_ioops_t *) vfs_bhv_custom(p) : xfs_iocore_xfs; 443 mp->m_io_ops = p ? *(xfs_ioops_t *) vfs_bhv_custom(p) : xfs_iocore_xfs;
444 444
445 if (args->flags & XFSMNT_QUIET)
446 flags |= XFS_MFSI_QUIET;
447
445 /* 448 /*
446 * Open real time and log devices - order is important. 449 * Open real time and log devices - order is important.
447 */ 450 */
@@ -492,7 +495,7 @@ xfs_mount(
492 error = xfs_start_flags(vfsp, args, mp); 495 error = xfs_start_flags(vfsp, args, mp);
493 if (error) 496 if (error)
494 goto error1; 497 goto error1;
495 error = xfs_readsb(mp); 498 error = xfs_readsb(mp, flags);
496 if (error) 499 if (error)
497 goto error1; 500 goto error1;
498 error = xfs_finish_flags(vfsp, args, mp); 501 error = xfs_finish_flags(vfsp, args, mp);
@@ -1697,8 +1700,9 @@ xfs_parseargs(
1697 int dsunit, dswidth, vol_dsunit, vol_dswidth; 1700 int dsunit, dswidth, vol_dsunit, vol_dswidth;
1698 int iosize; 1701 int iosize;
1699 1702
1700 args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
1701 args->flags |= XFSMNT_IDELETE; 1703 args->flags |= XFSMNT_IDELETE;
1704 args->flags |= XFSMNT_BARRIER;
1705 args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
1702 1706
1703 if (!options) 1707 if (!options)
1704 goto done; 1708 goto done;
@@ -1947,8 +1951,6 @@ xfs_showargs(
1947 seq_printf(m, "," MNTOPT_IKEEP); 1951 seq_printf(m, "," MNTOPT_IKEEP);
1948 if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE)) 1952 if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE))
1949 seq_printf(m, "," MNTOPT_LARGEIO); 1953 seq_printf(m, "," MNTOPT_LARGEIO);
1950 if (mp->m_flags & XFS_MOUNT_BARRIER)
1951 seq_printf(m, "," MNTOPT_BARRIER);
1952 1954
1953 if (!(vfsp->vfs_flag & VFS_32BITINODES)) 1955 if (!(vfsp->vfs_flag & VFS_32BITINODES))
1954 seq_printf(m, "," MNTOPT_64BITINODE); 1956 seq_printf(m, "," MNTOPT_64BITINODE);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index de49601919c1..fa71b305ba5c 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -4649,6 +4649,10 @@ vnodeops_t xfs_vnodeops = {
4649#ifdef HAVE_SENDFILE 4649#ifdef HAVE_SENDFILE
4650 .vop_sendfile = xfs_sendfile, 4650 .vop_sendfile = xfs_sendfile,
4651#endif 4651#endif
4652#ifdef HAVE_SPLICE
4653 .vop_splice_read = xfs_splice_read,
4654 .vop_splice_write = xfs_splice_write,
4655#endif
4652 .vop_write = xfs_write, 4656 .vop_write = xfs_write,
4653 .vop_ioctl = xfs_ioctl, 4657 .vop_ioctl = xfs_ioctl,
4654 .vop_getattr = xfs_getattr, 4658 .vop_getattr = xfs_getattr,