aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-07-21 00:21:46 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-21 00:21:46 -0400
commit14b395e35d1afdd8019d11b92e28041fad591b71 (patch)
treecff7ba9bed7a38300b19a5bacc632979d64fd9c8
parent734b397cd14f3340394a8dd3266bec97d01f034b (diff)
parent5108b27651727b5aba0826e8fd7be71b42428701 (diff)
Merge branch 'for-2.6.27' of git://linux-nfs.org/~bfields/linux
* 'for-2.6.27' of git://linux-nfs.org/~bfields/linux: (51 commits) nfsd: nfs4xdr.c do-while is not a compound statement nfsd: Use C99 initializers in fs/nfsd/nfs4xdr.c lockd: Pass "struct sockaddr *" to new failover-by-IP function lockd: get host reference in nlmsvc_create_block() instead of callers lockd: minor svclock.c style fixes lockd: eliminate duplicate nlmsvc_lookup_host call from nlmsvc_lock lockd: eliminate duplicate nlmsvc_lookup_host call from nlmsvc_testlock lockd: nlm_release_host() checks for NULL, caller needn't file lock: reorder struct file_lock to save space on 64 bit builds nfsd: take file and mnt write in nfs4_upgrade_open nfsd: document open share bit tracking nfsd: tabulate nfs4 xdr encoding functions nfsd: dprint operation names svcrdma: Change WR context get/put to use the kmem cache svcrdma: Create a kmem cache for the WR contexts svcrdma: Add flush_scheduled_work to module exit function svcrdma: Limit ORD based on client's advertised IRD svcrdma: Remove unused wait q from svcrdma_xprt structure svcrdma: Remove unneeded spin locks from __svc_rdma_free svcrdma: Add dma map count and WARN_ON ...
-rw-r--r--Documentation/filesystems/nfs-rdma.txt103
-rw-r--r--fs/lockd/svc.c33
-rw-r--r--fs/lockd/svc4proc.c7
-rw-r--r--fs/lockd/svclock.c33
-rw-r--r--fs/lockd/svcproc.c7
-rw-r--r--fs/lockd/svcsubs.c32
-rw-r--r--fs/nfsd/lockd.c2
-rw-r--r--fs/nfsd/nfs2acl.c7
-rw-r--r--fs/nfsd/nfs3acl.c5
-rw-r--r--fs/nfsd/nfs3proc.c8
-rw-r--r--fs/nfsd/nfs4proc.c76
-rw-r--r--fs/nfsd/nfs4state.c49
-rw-r--r--fs/nfsd/nfs4xdr.c392
-rw-r--r--fs/nfsd/nfsctl.c118
-rw-r--r--fs/nfsd/nfsfh.c31
-rw-r--r--fs/nfsd/nfsproc.c9
-rw-r--r--fs/nfsd/nfssvc.c148
-rw-r--r--fs/nfsd/vfs.c140
-rw-r--r--include/linux/fs.h4
-rw-r--r--include/linux/lockd/lockd.h8
-rw-r--r--include/linux/nfs4.h3
-rw-r--r--include/linux/nfsd/nfsd.h27
-rw-r--r--include/linux/nfsd/state.h2
-rw-r--r--include/linux/sunrpc/gss_krb5.h3
-rw-r--r--include/linux/sunrpc/svc.h7
-rw-r--r--include/linux/sunrpc/svc_rdma.h36
-rw-r--r--net/sunrpc/auth_gss/Makefile4
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c10
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c26
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_unseal.c16
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c72
-rw-r--r--net/sunrpc/svc.c110
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c35
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c84
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c166
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c195
36 files changed, 1042 insertions, 966 deletions
diff --git a/Documentation/filesystems/nfs-rdma.txt b/Documentation/filesystems/nfs-rdma.txt
index d0ec45ae4e7d..44bd766f2e5d 100644
--- a/Documentation/filesystems/nfs-rdma.txt
+++ b/Documentation/filesystems/nfs-rdma.txt
@@ -5,7 +5,7 @@
5################################################################################ 5################################################################################
6 6
7 Author: NetApp and Open Grid Computing 7 Author: NetApp and Open Grid Computing
8 Date: April 15, 2008 8 Date: May 29, 2008
9 9
10Table of Contents 10Table of Contents
11~~~~~~~~~~~~~~~~~ 11~~~~~~~~~~~~~~~~~
@@ -60,16 +60,18 @@ Installation
60 The procedures described in this document have been tested with 60 The procedures described in this document have been tested with
61 distributions from Red Hat's Fedora Project (http://fedora.redhat.com/). 61 distributions from Red Hat's Fedora Project (http://fedora.redhat.com/).
62 62
63 - Install nfs-utils-1.1.1 or greater on the client 63 - Install nfs-utils-1.1.2 or greater on the client
64 64
65 An NFS/RDMA mount point can only be obtained by using the mount.nfs 65 An NFS/RDMA mount point can be obtained by using the mount.nfs command in
66 command in nfs-utils-1.1.1 or greater. To see which version of mount.nfs 66 nfs-utils-1.1.2 or greater (nfs-utils-1.1.1 was the first nfs-utils
67 you are using, type: 67 version with support for NFS/RDMA mounts, but for various reasons we
68 recommend using nfs-utils-1.1.2 or greater). To see which version of
69 mount.nfs you are using, type:
68 70
69 > /sbin/mount.nfs -V 71 $ /sbin/mount.nfs -V
70 72
71 If the version is less than 1.1.1 or the command does not exist, 73 If the version is less than 1.1.2 or the command does not exist,
72 then you will need to install the latest version of nfs-utils. 74 you should install the latest version of nfs-utils.
73 75
74 Download the latest package from: 76 Download the latest package from:
75 77
@@ -77,22 +79,33 @@ Installation
77 79
78 Uncompress the package and follow the installation instructions. 80 Uncompress the package and follow the installation instructions.
79 81
80 If you will not be using GSS and NFSv4, the installation process 82 If you will not need the idmapper and gssd executables (you do not need
81 can be simplified by disabling these features when running configure: 83 these to create an NFS/RDMA enabled mount command), the installation
84 process can be simplified by disabling these features when running
85 configure:
82 86
83 > ./configure --disable-gss --disable-nfsv4 87 $ ./configure --disable-gss --disable-nfsv4
84 88
85 For more information on this see the package's README and INSTALL files. 89 To build nfs-utils you will need the tcp_wrappers package installed. For
90 more information on this see the package's README and INSTALL files.
86 91
87 After building the nfs-utils package, there will be a mount.nfs binary in 92 After building the nfs-utils package, there will be a mount.nfs binary in
88 the utils/mount directory. This binary can be used to initiate NFS v2, v3, 93 the utils/mount directory. This binary can be used to initiate NFS v2, v3,
89 or v4 mounts. To initiate a v4 mount, the binary must be called mount.nfs4. 94 or v4 mounts. To initiate a v4 mount, the binary must be called
90 The standard technique is to create a symlink called mount.nfs4 to mount.nfs. 95 mount.nfs4. The standard technique is to create a symlink called
96 mount.nfs4 to mount.nfs.
91 97
92 NOTE: mount.nfs and therefore nfs-utils-1.1.1 or greater is only needed 98 This mount.nfs binary should be installed at /sbin/mount.nfs as follows:
99
100 $ sudo cp utils/mount/mount.nfs /sbin/mount.nfs
101
102 In this location, mount.nfs will be invoked automatically for NFS mounts
103 by the system mount commmand.
104
105 NOTE: mount.nfs and therefore nfs-utils-1.1.2 or greater is only needed
93 on the NFS client machine. You do not need this specific version of 106 on the NFS client machine. You do not need this specific version of
94 nfs-utils on the server. Furthermore, only the mount.nfs command from 107 nfs-utils on the server. Furthermore, only the mount.nfs command from
95 nfs-utils-1.1.1 is needed on the client. 108 nfs-utils-1.1.2 is needed on the client.
96 109
97 - Install a Linux kernel with NFS/RDMA 110 - Install a Linux kernel with NFS/RDMA
98 111
@@ -156,8 +169,8 @@ Check RDMA and NFS Setup
156 this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel 169 this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel
157 card: 170 card:
158 171
159 > modprobe ib_mthca 172 $ modprobe ib_mthca
160 > modprobe ib_ipoib 173 $ modprobe ib_ipoib
161 174
162 If you are using InfiniBand, make sure there is a Subnet Manager (SM) 175 If you are using InfiniBand, make sure there is a Subnet Manager (SM)
163 running on the network. If your IB switch has an embedded SM, you can 176 running on the network. If your IB switch has an embedded SM, you can
@@ -166,7 +179,7 @@ Check RDMA and NFS Setup
166 179
167 If an SM is running on your network, you should see the following: 180 If an SM is running on your network, you should see the following:
168 181
169 > cat /sys/class/infiniband/driverX/ports/1/state 182 $ cat /sys/class/infiniband/driverX/ports/1/state
170 4: ACTIVE 183 4: ACTIVE
171 184
172 where driverX is mthca0, ipath5, ehca3, etc. 185 where driverX is mthca0, ipath5, ehca3, etc.
@@ -174,10 +187,10 @@ Check RDMA and NFS Setup
174 To further test the InfiniBand software stack, use IPoIB (this 187 To further test the InfiniBand software stack, use IPoIB (this
175 assumes you have two IB hosts named host1 and host2): 188 assumes you have two IB hosts named host1 and host2):
176 189
177 host1> ifconfig ib0 a.b.c.x 190 host1$ ifconfig ib0 a.b.c.x
178 host2> ifconfig ib0 a.b.c.y 191 host2$ ifconfig ib0 a.b.c.y
179 host1> ping a.b.c.y 192 host1$ ping a.b.c.y
180 host2> ping a.b.c.x 193 host2$ ping a.b.c.x
181 194
182 For other device types, follow the appropriate procedures. 195 For other device types, follow the appropriate procedures.
183 196
@@ -202,11 +215,11 @@ NFS/RDMA Setup
202 /vol0 192.168.0.47(fsid=0,rw,async,insecure,no_root_squash) 215 /vol0 192.168.0.47(fsid=0,rw,async,insecure,no_root_squash)
203 /vol0 192.168.0.0/255.255.255.0(fsid=0,rw,async,insecure,no_root_squash) 216 /vol0 192.168.0.0/255.255.255.0(fsid=0,rw,async,insecure,no_root_squash)
204 217
205 The IP address(es) is(are) the client's IPoIB address for an InfiniBand HCA or the 218 The IP address(es) is(are) the client's IPoIB address for an InfiniBand
206 cleint's iWARP address(es) for an RNIC. 219 HCA or the cleint's iWARP address(es) for an RNIC.
207 220
208 NOTE: The "insecure" option must be used because the NFS/RDMA client does not 221 NOTE: The "insecure" option must be used because the NFS/RDMA client does
209 use a reserved port. 222 not use a reserved port.
210 223
211 Each time a machine boots: 224 Each time a machine boots:
212 225
@@ -214,43 +227,45 @@ NFS/RDMA Setup
214 227
215 For InfiniBand using a Mellanox adapter: 228 For InfiniBand using a Mellanox adapter:
216 229
217 > modprobe ib_mthca 230 $ modprobe ib_mthca
218 > modprobe ib_ipoib 231 $ modprobe ib_ipoib
219 > ifconfig ib0 a.b.c.d 232 $ ifconfig ib0 a.b.c.d
220 233
221 NOTE: use unique addresses for the client and server 234 NOTE: use unique addresses for the client and server
222 235
223 - Start the NFS server 236 - Start the NFS server
224 237
225 If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config), 238 If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
226 load the RDMA transport module: 239 kernel config), load the RDMA transport module:
227 240
228 > modprobe svcrdma 241 $ modprobe svcrdma
229 242
230 Regardless of how the server was built (module or built-in), start the server: 243 Regardless of how the server was built (module or built-in), start the
244 server:
231 245
232 > /etc/init.d/nfs start 246 $ /etc/init.d/nfs start
233 247
234 or 248 or
235 249
236 > service nfs start 250 $ service nfs start
237 251
238 Instruct the server to listen on the RDMA transport: 252 Instruct the server to listen on the RDMA transport:
239 253
240 > echo rdma 2050 > /proc/fs/nfsd/portlist 254 $ echo rdma 2050 > /proc/fs/nfsd/portlist
241 255
242 - On the client system 256 - On the client system
243 257
244 If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config), 258 If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
245 load the RDMA client module: 259 kernel config), load the RDMA client module:
246 260
247 > modprobe xprtrdma.ko 261 $ modprobe xprtrdma.ko
248 262
249 Regardless of how the client was built (module or built-in), issue the mount.nfs command: 263 Regardless of how the client was built (module or built-in), use this
264 command to mount the NFS/RDMA server:
250 265
251 > /path/to/your/mount.nfs <IPoIB-server-name-or-address>:/<export> /mnt -i -o rdma,port=2050 266 $ mount -o rdma,port=2050 <IPoIB-server-name-or-address>:/<export> /mnt
252 267
253 To verify that the mount is using RDMA, run "cat /proc/mounts" and check the 268 To verify that the mount is using RDMA, run "cat /proc/mounts" and check
254 "proto" field for the given mount. 269 the "proto" field for the given mount.
255 270
256 Congratulations! You're using NFS/RDMA! 271 Congratulations! You're using NFS/RDMA!
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 2169af4d5455..5bd9bf0fa9df 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -50,7 +50,7 @@ EXPORT_SYMBOL(nlmsvc_ops);
50static DEFINE_MUTEX(nlmsvc_mutex); 50static DEFINE_MUTEX(nlmsvc_mutex);
51static unsigned int nlmsvc_users; 51static unsigned int nlmsvc_users;
52static struct task_struct *nlmsvc_task; 52static struct task_struct *nlmsvc_task;
53static struct svc_serv *nlmsvc_serv; 53static struct svc_rqst *nlmsvc_rqst;
54int nlmsvc_grace_period; 54int nlmsvc_grace_period;
55unsigned long nlmsvc_timeout; 55unsigned long nlmsvc_timeout;
56 56
@@ -194,20 +194,11 @@ lockd(void *vrqstp)
194 194
195 svc_process(rqstp); 195 svc_process(rqstp);
196 } 196 }
197
198 flush_signals(current); 197 flush_signals(current);
199 if (nlmsvc_ops) 198 if (nlmsvc_ops)
200 nlmsvc_invalidate_all(); 199 nlmsvc_invalidate_all();
201 nlm_shutdown_hosts(); 200 nlm_shutdown_hosts();
202
203 unlock_kernel(); 201 unlock_kernel();
204
205 nlmsvc_task = NULL;
206 nlmsvc_serv = NULL;
207
208 /* Exit the RPC thread */
209 svc_exit_thread(rqstp);
210
211 return 0; 202 return 0;
212} 203}
213 204
@@ -254,16 +245,15 @@ int
254lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ 245lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
255{ 246{
256 struct svc_serv *serv; 247 struct svc_serv *serv;
257 struct svc_rqst *rqstp;
258 int error = 0; 248 int error = 0;
259 249
260 mutex_lock(&nlmsvc_mutex); 250 mutex_lock(&nlmsvc_mutex);
261 /* 251 /*
262 * Check whether we're already up and running. 252 * Check whether we're already up and running.
263 */ 253 */
264 if (nlmsvc_serv) { 254 if (nlmsvc_rqst) {
265 if (proto) 255 if (proto)
266 error = make_socks(nlmsvc_serv, proto); 256 error = make_socks(nlmsvc_rqst->rq_server, proto);
267 goto out; 257 goto out;
268 } 258 }
269 259
@@ -288,9 +278,10 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
288 /* 278 /*
289 * Create the kernel thread and wait for it to start. 279 * Create the kernel thread and wait for it to start.
290 */ 280 */
291 rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]); 281 nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0]);
292 if (IS_ERR(rqstp)) { 282 if (IS_ERR(nlmsvc_rqst)) {
293 error = PTR_ERR(rqstp); 283 error = PTR_ERR(nlmsvc_rqst);
284 nlmsvc_rqst = NULL;
294 printk(KERN_WARNING 285 printk(KERN_WARNING
295 "lockd_up: svc_rqst allocation failed, error=%d\n", 286 "lockd_up: svc_rqst allocation failed, error=%d\n",
296 error); 287 error);
@@ -298,16 +289,15 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
298 } 289 }
299 290
300 svc_sock_update_bufs(serv); 291 svc_sock_update_bufs(serv);
301 nlmsvc_serv = rqstp->rq_server;
302 292
303 nlmsvc_task = kthread_run(lockd, rqstp, serv->sv_name); 293 nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, serv->sv_name);
304 if (IS_ERR(nlmsvc_task)) { 294 if (IS_ERR(nlmsvc_task)) {
305 error = PTR_ERR(nlmsvc_task); 295 error = PTR_ERR(nlmsvc_task);
296 svc_exit_thread(nlmsvc_rqst);
306 nlmsvc_task = NULL; 297 nlmsvc_task = NULL;
307 nlmsvc_serv = NULL; 298 nlmsvc_rqst = NULL;
308 printk(KERN_WARNING 299 printk(KERN_WARNING
309 "lockd_up: kthread_run failed, error=%d\n", error); 300 "lockd_up: kthread_run failed, error=%d\n", error);
310 svc_exit_thread(rqstp);
311 goto destroy_and_out; 301 goto destroy_and_out;
312 } 302 }
313 303
@@ -346,6 +336,9 @@ lockd_down(void)
346 BUG(); 336 BUG();
347 } 337 }
348 kthread_stop(nlmsvc_task); 338 kthread_stop(nlmsvc_task);
339 svc_exit_thread(nlmsvc_rqst);
340 nlmsvc_task = NULL;
341 nlmsvc_rqst = NULL;
349out: 342out:
350 mutex_unlock(&nlmsvc_mutex); 343 mutex_unlock(&nlmsvc_mutex);
351} 344}
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 2e27176ff42f..399444639337 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -58,8 +58,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
58 return 0; 58 return 0;
59 59
60no_locks: 60no_locks:
61 if (host) 61 nlm_release_host(host);
62 nlm_release_host(host);
63 if (error) 62 if (error)
64 return error; 63 return error;
65 return nlm_lck_denied_nolocks; 64 return nlm_lck_denied_nolocks;
@@ -100,7 +99,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
100 return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; 99 return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
101 100
102 /* Now check for conflicting locks */ 101 /* Now check for conflicting locks */
103 resp->status = nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie); 102 resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie);
104 if (resp->status == nlm_drop_reply) 103 if (resp->status == nlm_drop_reply)
105 rc = rpc_drop_reply; 104 rc = rpc_drop_reply;
106 else 105 else
@@ -146,7 +145,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
146#endif 145#endif
147 146
148 /* Now try to lock the file */ 147 /* Now try to lock the file */
149 resp->status = nlmsvc_lock(rqstp, file, &argp->lock, 148 resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock,
150 argp->block, &argp->cookie); 149 argp->block, &argp->cookie);
151 if (resp->status == nlm_drop_reply) 150 if (resp->status == nlm_drop_reply)
152 rc = rpc_drop_reply; 151 rc = rpc_drop_reply;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 56a08ab9a4cb..821b9acdfb66 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -129,9 +129,9 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
129 129
130static inline int nlm_cookie_match(struct nlm_cookie *a, struct nlm_cookie *b) 130static inline int nlm_cookie_match(struct nlm_cookie *a, struct nlm_cookie *b)
131{ 131{
132 if(a->len != b->len) 132 if (a->len != b->len)
133 return 0; 133 return 0;
134 if(memcmp(a->data,b->data,a->len)) 134 if (memcmp(a->data, b->data, a->len))
135 return 0; 135 return 0;
136 return 1; 136 return 1;
137} 137}
@@ -180,6 +180,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
180 struct nlm_block *block; 180 struct nlm_block *block;
181 struct nlm_rqst *call = NULL; 181 struct nlm_rqst *call = NULL;
182 182
183 nlm_get_host(host);
183 call = nlm_alloc_call(host); 184 call = nlm_alloc_call(host);
184 if (call == NULL) 185 if (call == NULL)
185 return NULL; 186 return NULL;
@@ -358,10 +359,10 @@ nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block)
358 */ 359 */
359__be32 360__be32
360nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, 361nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
361 struct nlm_lock *lock, int wait, struct nlm_cookie *cookie) 362 struct nlm_host *host, struct nlm_lock *lock, int wait,
363 struct nlm_cookie *cookie)
362{ 364{
363 struct nlm_block *block = NULL; 365 struct nlm_block *block = NULL;
364 struct nlm_host *host;
365 int error; 366 int error;
366 __be32 ret; 367 __be32 ret;
367 368
@@ -373,11 +374,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
373 (long long)lock->fl.fl_end, 374 (long long)lock->fl.fl_end,
374 wait); 375 wait);
375 376
376 /* Create host handle for callback */
377 host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len);
378 if (host == NULL)
379 return nlm_lck_denied_nolocks;
380
381 /* Lock file against concurrent access */ 377 /* Lock file against concurrent access */
382 mutex_lock(&file->f_mutex); 378 mutex_lock(&file->f_mutex);
383 /* Get existing block (in case client is busy-waiting) 379 /* Get existing block (in case client is busy-waiting)
@@ -385,8 +381,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
385 */ 381 */
386 block = nlmsvc_lookup_block(file, lock); 382 block = nlmsvc_lookup_block(file, lock);
387 if (block == NULL) { 383 if (block == NULL) {
388 block = nlmsvc_create_block(rqstp, nlm_get_host(host), file, 384 block = nlmsvc_create_block(rqstp, host, file, lock, cookie);
389 lock, cookie);
390 ret = nlm_lck_denied_nolocks; 385 ret = nlm_lck_denied_nolocks;
391 if (block == NULL) 386 if (block == NULL)
392 goto out; 387 goto out;
@@ -417,7 +412,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
417 lock->fl.fl_flags &= ~FL_SLEEP; 412 lock->fl.fl_flags &= ~FL_SLEEP;
418 413
419 dprintk("lockd: vfs_lock_file returned %d\n", error); 414 dprintk("lockd: vfs_lock_file returned %d\n", error);
420 switch(error) { 415 switch (error) {
421 case 0: 416 case 0:
422 ret = nlm_granted; 417 ret = nlm_granted;
423 goto out; 418 goto out;
@@ -450,7 +445,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
450out: 445out:
451 mutex_unlock(&file->f_mutex); 446 mutex_unlock(&file->f_mutex);
452 nlmsvc_release_block(block); 447 nlmsvc_release_block(block);
453 nlm_release_host(host);
454 dprintk("lockd: nlmsvc_lock returned %u\n", ret); 448 dprintk("lockd: nlmsvc_lock returned %u\n", ret);
455 return ret; 449 return ret;
456} 450}
@@ -460,8 +454,8 @@ out:
460 */ 454 */
461__be32 455__be32
462nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, 456nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
463 struct nlm_lock *lock, struct nlm_lock *conflock, 457 struct nlm_host *host, struct nlm_lock *lock,
464 struct nlm_cookie *cookie) 458 struct nlm_lock *conflock, struct nlm_cookie *cookie)
465{ 459{
466 struct nlm_block *block = NULL; 460 struct nlm_block *block = NULL;
467 int error; 461 int error;
@@ -479,16 +473,9 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
479 473
480 if (block == NULL) { 474 if (block == NULL) {
481 struct file_lock *conf = kzalloc(sizeof(*conf), GFP_KERNEL); 475 struct file_lock *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
482 struct nlm_host *host;
483 476
484 if (conf == NULL) 477 if (conf == NULL)
485 return nlm_granted; 478 return nlm_granted;
486 /* Create host handle for callback */
487 host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len);
488 if (host == NULL) {
489 kfree(conf);
490 return nlm_lck_denied_nolocks;
491 }
492 block = nlmsvc_create_block(rqstp, host, file, lock, cookie); 479 block = nlmsvc_create_block(rqstp, host, file, lock, cookie);
493 if (block == NULL) { 480 if (block == NULL) {
494 kfree(conf); 481 kfree(conf);
@@ -897,7 +884,7 @@ nlmsvc_retry_blocked(void)
897 884
898 if (block->b_when == NLM_NEVER) 885 if (block->b_when == NLM_NEVER)
899 break; 886 break;
900 if (time_after(block->b_when,jiffies)) { 887 if (time_after(block->b_when, jiffies)) {
901 timeout = block->b_when - jiffies; 888 timeout = block->b_when - jiffies;
902 break; 889 break;
903 } 890 }
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index ce6952b50a75..76019d2ff72d 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -87,8 +87,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
87 return 0; 87 return 0;
88 88
89no_locks: 89no_locks:
90 if (host) 90 nlm_release_host(host);
91 nlm_release_host(host);
92 if (error) 91 if (error)
93 return error; 92 return error;
94 return nlm_lck_denied_nolocks; 93 return nlm_lck_denied_nolocks;
@@ -129,7 +128,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
129 return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; 128 return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
130 129
131 /* Now check for conflicting locks */ 130 /* Now check for conflicting locks */
132 resp->status = cast_status(nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie)); 131 resp->status = cast_status(nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie));
133 if (resp->status == nlm_drop_reply) 132 if (resp->status == nlm_drop_reply)
134 rc = rpc_drop_reply; 133 rc = rpc_drop_reply;
135 else 134 else
@@ -176,7 +175,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
176#endif 175#endif
177 176
178 /* Now try to lock the file */ 177 /* Now try to lock the file */
179 resp->status = cast_status(nlmsvc_lock(rqstp, file, &argp->lock, 178 resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock,
180 argp->block, &argp->cookie)); 179 argp->block, &argp->cookie));
181 if (resp->status == nlm_drop_reply) 180 if (resp->status == nlm_drop_reply)
182 rc = rpc_drop_reply; 181 rc = rpc_drop_reply;
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index d1c48b539df8..198b4e55b373 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -373,13 +373,16 @@ nlmsvc_free_host_resources(struct nlm_host *host)
373 } 373 }
374} 374}
375 375
376/* 376/**
377 * Remove all locks held for clients 377 * nlmsvc_invalidate_all - remove all locks held for clients
378 *
379 * Release all locks held by NFS clients.
380 *
378 */ 381 */
379void 382void
380nlmsvc_invalidate_all(void) 383nlmsvc_invalidate_all(void)
381{ 384{
382 /* Release all locks held by NFS clients. 385 /*
383 * Previously, the code would call 386 * Previously, the code would call
384 * nlmsvc_free_host_resources for each client in 387 * nlmsvc_free_host_resources for each client in
385 * turn, which is about as inefficient as it gets. 388 * turn, which is about as inefficient as it gets.
@@ -396,6 +399,12 @@ nlmsvc_match_sb(void *datap, struct nlm_file *file)
396 return sb == file->f_file->f_path.mnt->mnt_sb; 399 return sb == file->f_file->f_path.mnt->mnt_sb;
397} 400}
398 401
402/**
403 * nlmsvc_unlock_all_by_sb - release locks held on this file system
404 * @sb: super block
405 *
406 * Release all locks held by clients accessing this file system.
407 */
399int 408int
400nlmsvc_unlock_all_by_sb(struct super_block *sb) 409nlmsvc_unlock_all_by_sb(struct super_block *sb)
401{ 410{
@@ -409,17 +418,22 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb);
409static int 418static int
410nlmsvc_match_ip(void *datap, struct nlm_host *host) 419nlmsvc_match_ip(void *datap, struct nlm_host *host)
411{ 420{
412 __be32 *server_addr = datap; 421 return nlm_cmp_addr(&host->h_saddr, datap);
413
414 return host->h_saddr.sin_addr.s_addr == *server_addr;
415} 422}
416 423
424/**
425 * nlmsvc_unlock_all_by_ip - release local locks by IP address
426 * @server_addr: server's IP address as seen by clients
427 *
428 * Release all locks held by clients accessing this host
429 * via the passed in IP address.
430 */
417int 431int
418nlmsvc_unlock_all_by_ip(__be32 server_addr) 432nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr)
419{ 433{
420 int ret; 434 int ret;
421 ret = nlm_traverse_files(&server_addr, nlmsvc_match_ip, NULL);
422 return ret ? -EIO : 0;
423 435
436 ret = nlm_traverse_files(server_addr, nlmsvc_match_ip, NULL);
437 return ret ? -EIO : 0;
424} 438}
425EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_ip); 439EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_ip);
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 9e4a568a5013..6b6225ac4926 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -35,7 +35,7 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
35 fh.fh_export = NULL; 35 fh.fh_export = NULL;
36 36
37 exp_readlock(); 37 exp_readlock();
38 nfserr = nfsd_open(rqstp, &fh, S_IFREG, MAY_LOCK, filp); 38 nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp);
39 fh_put(&fh); 39 fh_put(&fh);
40 rqstp->rq_client = NULL; 40 rqstp->rq_client = NULL;
41 exp_readunlock(); 41 exp_readunlock();
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 1c3b7654e966..4e3219e84116 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -40,7 +40,8 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
40 dprintk("nfsd: GETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); 40 dprintk("nfsd: GETACL(2acl) %s\n", SVCFH_fmt(&argp->fh));
41 41
42 fh = fh_copy(&resp->fh, &argp->fh); 42 fh = fh_copy(&resp->fh, &argp->fh);
43 if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP))) 43 nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
44 if (nfserr)
44 RETURN_STATUS(nfserr); 45 RETURN_STATUS(nfserr);
45 46
46 if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) 47 if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
@@ -107,7 +108,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
107 dprintk("nfsd: SETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); 108 dprintk("nfsd: SETACL(2acl) %s\n", SVCFH_fmt(&argp->fh));
108 109
109 fh = fh_copy(&resp->fh, &argp->fh); 110 fh = fh_copy(&resp->fh, &argp->fh);
110 nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_SATTR); 111 nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR);
111 112
112 if (!nfserr) { 113 if (!nfserr) {
113 nfserr = nfserrno( nfsd_set_posix_acl( 114 nfserr = nfserrno( nfsd_set_posix_acl(
@@ -134,7 +135,7 @@ static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
134 dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); 135 dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh));
135 136
136 fh_copy(&resp->fh, &argp->fh); 137 fh_copy(&resp->fh, &argp->fh);
137 return fh_verify(rqstp, &resp->fh, 0, MAY_NOP); 138 return fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
138} 139}
139 140
140/* 141/*
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index b647f2f872dc..9981dbb377a3 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -36,7 +36,8 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
36 __be32 nfserr = 0; 36 __be32 nfserr = 0;
37 37
38 fh = fh_copy(&resp->fh, &argp->fh); 38 fh = fh_copy(&resp->fh, &argp->fh);
39 if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP))) 39 nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
40 if (nfserr)
40 RETURN_STATUS(nfserr); 41 RETURN_STATUS(nfserr);
41 42
42 if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) 43 if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
@@ -101,7 +102,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
101 __be32 nfserr = 0; 102 __be32 nfserr = 0;
102 103
103 fh = fh_copy(&resp->fh, &argp->fh); 104 fh = fh_copy(&resp->fh, &argp->fh);
104 nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_SATTR); 105 nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR);
105 106
106 if (!nfserr) { 107 if (!nfserr) {
107 nfserr = nfserrno( nfsd_set_posix_acl( 108 nfserr = nfserrno( nfsd_set_posix_acl(
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index c721a1e6e9dd..4d617ea28cfc 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -63,7 +63,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp,
63 SVCFH_fmt(&argp->fh)); 63 SVCFH_fmt(&argp->fh));
64 64
65 fh_copy(&resp->fh, &argp->fh); 65 fh_copy(&resp->fh, &argp->fh);
66 nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP); 66 nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
67 if (nfserr) 67 if (nfserr)
68 RETURN_STATUS(nfserr); 68 RETURN_STATUS(nfserr);
69 69
@@ -242,7 +242,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
242 attr = &argp->attrs; 242 attr = &argp->attrs;
243 243
244 /* Get the directory inode */ 244 /* Get the directory inode */
245 nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_CREATE); 245 nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_CREATE);
246 if (nfserr) 246 if (nfserr)
247 RETURN_STATUS(nfserr); 247 RETURN_STATUS(nfserr);
248 248
@@ -558,7 +558,7 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
558 resp->f_maxfilesize = ~(u32) 0; 558 resp->f_maxfilesize = ~(u32) 0;
559 resp->f_properties = NFS3_FSF_DEFAULT; 559 resp->f_properties = NFS3_FSF_DEFAULT;
560 560
561 nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP); 561 nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP);
562 562
563 /* Check special features of the file system. May request 563 /* Check special features of the file system. May request
564 * different read/write sizes for file systems known to have 564 * different read/write sizes for file systems known to have
@@ -597,7 +597,7 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
597 resp->p_case_insensitive = 0; 597 resp->p_case_insensitive = 0;
598 resp->p_case_preserving = 1; 598 resp->p_case_preserving = 1;
599 599
600 nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP); 600 nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP);
601 601
602 if (nfserr == 0) { 602 if (nfserr == 0) {
603 struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb; 603 struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index c309c881bd4e..eef1629806f5 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -71,11 +71,11 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
71 return nfserr_inval; 71 return nfserr_inval;
72 72
73 if (open->op_share_access & NFS4_SHARE_ACCESS_READ) 73 if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
74 accmode |= MAY_READ; 74 accmode |= NFSD_MAY_READ;
75 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) 75 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
76 accmode |= (MAY_WRITE | MAY_TRUNC); 76 accmode |= (NFSD_MAY_WRITE | NFSD_MAY_TRUNC);
77 if (open->op_share_deny & NFS4_SHARE_DENY_WRITE) 77 if (open->op_share_deny & NFS4_SHARE_DENY_WRITE)
78 accmode |= MAY_WRITE; 78 accmode |= NFSD_MAY_WRITE;
79 79
80 status = fh_verify(rqstp, current_fh, S_IFREG, accmode); 80 status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
81 81
@@ -126,7 +126,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
126 &resfh.fh_handle.fh_base, resfh.fh_handle.fh_size); 126 &resfh.fh_handle.fh_base, resfh.fh_handle.fh_size);
127 127
128 if (!created) 128 if (!created)
129 status = do_open_permission(rqstp, current_fh, open, MAY_NOP); 129 status = do_open_permission(rqstp, current_fh, open,
130 NFSD_MAY_NOP);
130 131
131out: 132out:
132 fh_put(&resfh); 133 fh_put(&resfh);
@@ -157,7 +158,8 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
157 open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && 158 open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) &&
158 (open->op_iattr.ia_size == 0); 159 (open->op_iattr.ia_size == 0);
159 160
160 status = do_open_permission(rqstp, current_fh, open, MAY_OWNER_OVERRIDE); 161 status = do_open_permission(rqstp, current_fh, open,
162 NFSD_MAY_OWNER_OVERRIDE);
161 163
162 return status; 164 return status;
163} 165}
@@ -186,7 +188,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
186 cstate->current_fh.fh_handle.fh_size = rp->rp_openfh_len; 188 cstate->current_fh.fh_handle.fh_size = rp->rp_openfh_len;
187 memcpy(&cstate->current_fh.fh_handle.fh_base, rp->rp_openfh, 189 memcpy(&cstate->current_fh.fh_handle.fh_base, rp->rp_openfh,
188 rp->rp_openfh_len); 190 rp->rp_openfh_len);
189 status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP); 191 status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
190 if (status) 192 if (status)
191 dprintk("nfsd4_open: replay failed" 193 dprintk("nfsd4_open: replay failed"
192 " restoring previous filehandle\n"); 194 " restoring previous filehandle\n");
@@ -285,7 +287,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
285 cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen; 287 cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen;
286 memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval, 288 memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval,
287 putfh->pf_fhlen); 289 putfh->pf_fhlen);
288 return fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP); 290 return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
289} 291}
290 292
291static __be32 293static __be32
@@ -363,7 +365,8 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
363 365
364 fh_init(&resfh, NFS4_FHSIZE); 366 fh_init(&resfh, NFS4_FHSIZE);
365 367
366 status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, MAY_CREATE); 368 status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR,
369 NFSD_MAY_CREATE);
367 if (status == nfserr_symlink) 370 if (status == nfserr_symlink)
368 status = nfserr_notdir; 371 status = nfserr_notdir;
369 if (status) 372 if (status)
@@ -445,7 +448,7 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
445{ 448{
446 __be32 status; 449 __be32 status;
447 450
448 status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP); 451 status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
449 if (status) 452 if (status)
450 return status; 453 return status;
451 454
@@ -730,7 +733,7 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
730 int count; 733 int count;
731 __be32 status; 734 __be32 status;
732 735
733 status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP); 736 status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
734 if (status) 737 if (status)
735 return status; 738 return status;
736 739
@@ -843,10 +846,13 @@ struct nfsd4_operation {
843#define ALLOWED_WITHOUT_FH 1 846#define ALLOWED_WITHOUT_FH 1
844/* GETATTR and ops not listed as returning NFS4ERR_MOVED: */ 847/* GETATTR and ops not listed as returning NFS4ERR_MOVED: */
845#define ALLOWED_ON_ABSENT_FS 2 848#define ALLOWED_ON_ABSENT_FS 2
849 char *op_name;
846}; 850};
847 851
848static struct nfsd4_operation nfsd4_ops[]; 852static struct nfsd4_operation nfsd4_ops[];
849 853
854static inline char *nfsd4_op_name(unsigned opnum);
855
850/* 856/*
851 * COMPOUND call. 857 * COMPOUND call.
852 */ 858 */
@@ -888,7 +894,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
888 while (!status && resp->opcnt < args->opcnt) { 894 while (!status && resp->opcnt < args->opcnt) {
889 op = &args->ops[resp->opcnt++]; 895 op = &args->ops[resp->opcnt++];
890 896
891 dprintk("nfsv4 compound op #%d: %d\n", resp->opcnt, op->opnum); 897 dprintk("nfsv4 compound op #%d/%d: %d (%s)\n",
898 resp->opcnt, args->opcnt, op->opnum,
899 nfsd4_op_name(op->opnum));
892 900
893 /* 901 /*
894 * The XDR decode routines may have pre-set op->status; 902 * The XDR decode routines may have pre-set op->status;
@@ -952,126 +960,170 @@ encode_op:
952out: 960out:
953 nfsd4_release_compoundargs(args); 961 nfsd4_release_compoundargs(args);
954 cstate_free(cstate); 962 cstate_free(cstate);
963 dprintk("nfsv4 compound returned %d\n", ntohl(status));
955 return status; 964 return status;
956} 965}
957 966
958static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = { 967static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
959 [OP_ACCESS] = { 968 [OP_ACCESS] = {
960 .op_func = (nfsd4op_func)nfsd4_access, 969 .op_func = (nfsd4op_func)nfsd4_access,
970 .op_name = "OP_ACCESS",
961 }, 971 },
962 [OP_CLOSE] = { 972 [OP_CLOSE] = {
963 .op_func = (nfsd4op_func)nfsd4_close, 973 .op_func = (nfsd4op_func)nfsd4_close,
974 .op_name = "OP_CLOSE",
964 }, 975 },
965 [OP_COMMIT] = { 976 [OP_COMMIT] = {
966 .op_func = (nfsd4op_func)nfsd4_commit, 977 .op_func = (nfsd4op_func)nfsd4_commit,
978 .op_name = "OP_COMMIT",
967 }, 979 },
968 [OP_CREATE] = { 980 [OP_CREATE] = {
969 .op_func = (nfsd4op_func)nfsd4_create, 981 .op_func = (nfsd4op_func)nfsd4_create,
982 .op_name = "OP_CREATE",
970 }, 983 },
971 [OP_DELEGRETURN] = { 984 [OP_DELEGRETURN] = {
972 .op_func = (nfsd4op_func)nfsd4_delegreturn, 985 .op_func = (nfsd4op_func)nfsd4_delegreturn,
986 .op_name = "OP_DELEGRETURN",
973 }, 987 },
974 [OP_GETATTR] = { 988 [OP_GETATTR] = {
975 .op_func = (nfsd4op_func)nfsd4_getattr, 989 .op_func = (nfsd4op_func)nfsd4_getattr,
976 .op_flags = ALLOWED_ON_ABSENT_FS, 990 .op_flags = ALLOWED_ON_ABSENT_FS,
991 .op_name = "OP_GETATTR",
977 }, 992 },
978 [OP_GETFH] = { 993 [OP_GETFH] = {
979 .op_func = (nfsd4op_func)nfsd4_getfh, 994 .op_func = (nfsd4op_func)nfsd4_getfh,
995 .op_name = "OP_GETFH",
980 }, 996 },
981 [OP_LINK] = { 997 [OP_LINK] = {
982 .op_func = (nfsd4op_func)nfsd4_link, 998 .op_func = (nfsd4op_func)nfsd4_link,
999 .op_name = "OP_LINK",
983 }, 1000 },
984 [OP_LOCK] = { 1001 [OP_LOCK] = {
985 .op_func = (nfsd4op_func)nfsd4_lock, 1002 .op_func = (nfsd4op_func)nfsd4_lock,
1003 .op_name = "OP_LOCK",
986 }, 1004 },
987 [OP_LOCKT] = { 1005 [OP_LOCKT] = {
988 .op_func = (nfsd4op_func)nfsd4_lockt, 1006 .op_func = (nfsd4op_func)nfsd4_lockt,
1007 .op_name = "OP_LOCKT",
989 }, 1008 },
990 [OP_LOCKU] = { 1009 [OP_LOCKU] = {
991 .op_func = (nfsd4op_func)nfsd4_locku, 1010 .op_func = (nfsd4op_func)nfsd4_locku,
1011 .op_name = "OP_LOCKU",
992 }, 1012 },
993 [OP_LOOKUP] = { 1013 [OP_LOOKUP] = {
994 .op_func = (nfsd4op_func)nfsd4_lookup, 1014 .op_func = (nfsd4op_func)nfsd4_lookup,
1015 .op_name = "OP_LOOKUP",
995 }, 1016 },
996 [OP_LOOKUPP] = { 1017 [OP_LOOKUPP] = {
997 .op_func = (nfsd4op_func)nfsd4_lookupp, 1018 .op_func = (nfsd4op_func)nfsd4_lookupp,
1019 .op_name = "OP_LOOKUPP",
998 }, 1020 },
999 [OP_NVERIFY] = { 1021 [OP_NVERIFY] = {
1000 .op_func = (nfsd4op_func)nfsd4_nverify, 1022 .op_func = (nfsd4op_func)nfsd4_nverify,
1023 .op_name = "OP_NVERIFY",
1001 }, 1024 },
1002 [OP_OPEN] = { 1025 [OP_OPEN] = {
1003 .op_func = (nfsd4op_func)nfsd4_open, 1026 .op_func = (nfsd4op_func)nfsd4_open,
1027 .op_name = "OP_OPEN",
1004 }, 1028 },
1005 [OP_OPEN_CONFIRM] = { 1029 [OP_OPEN_CONFIRM] = {
1006 .op_func = (nfsd4op_func)nfsd4_open_confirm, 1030 .op_func = (nfsd4op_func)nfsd4_open_confirm,
1031 .op_name = "OP_OPEN_CONFIRM",
1007 }, 1032 },
1008 [OP_OPEN_DOWNGRADE] = { 1033 [OP_OPEN_DOWNGRADE] = {
1009 .op_func = (nfsd4op_func)nfsd4_open_downgrade, 1034 .op_func = (nfsd4op_func)nfsd4_open_downgrade,
1035 .op_name = "OP_OPEN_DOWNGRADE",
1010 }, 1036 },
1011 [OP_PUTFH] = { 1037 [OP_PUTFH] = {
1012 .op_func = (nfsd4op_func)nfsd4_putfh, 1038 .op_func = (nfsd4op_func)nfsd4_putfh,
1013 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1039 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
1040 .op_name = "OP_PUTFH",
1014 }, 1041 },
1015 [OP_PUTPUBFH] = { 1042 [OP_PUTPUBFH] = {
1016 /* unsupported; just for future reference: */ 1043 /* unsupported, just for future reference: */
1017 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1044 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
1045 .op_name = "OP_PUTPUBFH",
1018 }, 1046 },
1019 [OP_PUTROOTFH] = { 1047 [OP_PUTROOTFH] = {
1020 .op_func = (nfsd4op_func)nfsd4_putrootfh, 1048 .op_func = (nfsd4op_func)nfsd4_putrootfh,
1021 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1049 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
1050 .op_name = "OP_PUTROOTFH",
1022 }, 1051 },
1023 [OP_READ] = { 1052 [OP_READ] = {
1024 .op_func = (nfsd4op_func)nfsd4_read, 1053 .op_func = (nfsd4op_func)nfsd4_read,
1054 .op_name = "OP_READ",
1025 }, 1055 },
1026 [OP_READDIR] = { 1056 [OP_READDIR] = {
1027 .op_func = (nfsd4op_func)nfsd4_readdir, 1057 .op_func = (nfsd4op_func)nfsd4_readdir,
1058 .op_name = "OP_READDIR",
1028 }, 1059 },
1029 [OP_READLINK] = { 1060 [OP_READLINK] = {
1030 .op_func = (nfsd4op_func)nfsd4_readlink, 1061 .op_func = (nfsd4op_func)nfsd4_readlink,
1062 .op_name = "OP_READLINK",
1031 }, 1063 },
1032 [OP_REMOVE] = { 1064 [OP_REMOVE] = {
1033 .op_func = (nfsd4op_func)nfsd4_remove, 1065 .op_func = (nfsd4op_func)nfsd4_remove,
1066 .op_name = "OP_REMOVE",
1034 }, 1067 },
1035 [OP_RENAME] = { 1068 [OP_RENAME] = {
1069 .op_name = "OP_RENAME",
1036 .op_func = (nfsd4op_func)nfsd4_rename, 1070 .op_func = (nfsd4op_func)nfsd4_rename,
1037 }, 1071 },
1038 [OP_RENEW] = { 1072 [OP_RENEW] = {
1039 .op_func = (nfsd4op_func)nfsd4_renew, 1073 .op_func = (nfsd4op_func)nfsd4_renew,
1040 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1074 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
1075 .op_name = "OP_RENEW",
1041 }, 1076 },
1042 [OP_RESTOREFH] = { 1077 [OP_RESTOREFH] = {
1043 .op_func = (nfsd4op_func)nfsd4_restorefh, 1078 .op_func = (nfsd4op_func)nfsd4_restorefh,
1044 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1079 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
1080 .op_name = "OP_RESTOREFH",
1045 }, 1081 },
1046 [OP_SAVEFH] = { 1082 [OP_SAVEFH] = {
1047 .op_func = (nfsd4op_func)nfsd4_savefh, 1083 .op_func = (nfsd4op_func)nfsd4_savefh,
1084 .op_name = "OP_SAVEFH",
1048 }, 1085 },
1049 [OP_SECINFO] = { 1086 [OP_SECINFO] = {
1050 .op_func = (nfsd4op_func)nfsd4_secinfo, 1087 .op_func = (nfsd4op_func)nfsd4_secinfo,
1088 .op_name = "OP_SECINFO",
1051 }, 1089 },
1052 [OP_SETATTR] = { 1090 [OP_SETATTR] = {
1053 .op_func = (nfsd4op_func)nfsd4_setattr, 1091 .op_func = (nfsd4op_func)nfsd4_setattr,
1092 .op_name = "OP_SETATTR",
1054 }, 1093 },
1055 [OP_SETCLIENTID] = { 1094 [OP_SETCLIENTID] = {
1056 .op_func = (nfsd4op_func)nfsd4_setclientid, 1095 .op_func = (nfsd4op_func)nfsd4_setclientid,
1057 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1096 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
1097 .op_name = "OP_SETCLIENTID",
1058 }, 1098 },
1059 [OP_SETCLIENTID_CONFIRM] = { 1099 [OP_SETCLIENTID_CONFIRM] = {
1060 .op_func = (nfsd4op_func)nfsd4_setclientid_confirm, 1100 .op_func = (nfsd4op_func)nfsd4_setclientid_confirm,
1061 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1101 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
1102 .op_name = "OP_SETCLIENTID_CONFIRM",
1062 }, 1103 },
1063 [OP_VERIFY] = { 1104 [OP_VERIFY] = {
1064 .op_func = (nfsd4op_func)nfsd4_verify, 1105 .op_func = (nfsd4op_func)nfsd4_verify,
1106 .op_name = "OP_VERIFY",
1065 }, 1107 },
1066 [OP_WRITE] = { 1108 [OP_WRITE] = {
1067 .op_func = (nfsd4op_func)nfsd4_write, 1109 .op_func = (nfsd4op_func)nfsd4_write,
1110 .op_name = "OP_WRITE",
1068 }, 1111 },
1069 [OP_RELEASE_LOCKOWNER] = { 1112 [OP_RELEASE_LOCKOWNER] = {
1070 .op_func = (nfsd4op_func)nfsd4_release_lockowner, 1113 .op_func = (nfsd4op_func)nfsd4_release_lockowner,
1071 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1114 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
1115 .op_name = "OP_RELEASE_LOCKOWNER",
1072 }, 1116 },
1073}; 1117};
1074 1118
1119static inline char *
1120nfsd4_op_name(unsigned opnum)
1121{
1122 if (opnum < ARRAY_SIZE(nfsd4_ops))
1123 return nfsd4_ops[opnum].op_name;
1124 return "unknown_operation";
1125}
1126
1075#define nfs4svc_decode_voidargs NULL 1127#define nfs4svc_decode_voidargs NULL
1076#define nfs4svc_release_void NULL 1128#define nfs4svc_release_void NULL
1077#define nfsd4_voidres nfsd4_voidargs 1129#define nfsd4_voidres nfsd4_voidargs
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8799b8708188..1578d7a2667e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1173,6 +1173,24 @@ static inline int deny_valid(u32 x)
1173 return x <= NFS4_SHARE_DENY_BOTH; 1173 return x <= NFS4_SHARE_DENY_BOTH;
1174} 1174}
1175 1175
1176/*
1177 * We store the NONE, READ, WRITE, and BOTH bits separately in the
1178 * st_{access,deny}_bmap field of the stateid, in order to track not
1179 * only what share bits are currently in force, but also what
1180 * combinations of share bits previous opens have used. This allows us
1181 * to enforce the recommendation of rfc 3530 14.2.19 that the server
1182 * return an error if the client attempt to downgrade to a combination
1183 * of share bits not explicable by closing some of its previous opens.
1184 *
1185 * XXX: This enforcement is actually incomplete, since we don't keep
1186 * track of access/deny bit combinations; so, e.g., we allow:
1187 *
1188 * OPEN allow read, deny write
1189 * OPEN allow both, deny none
1190 * DOWNGRADE allow read, deny none
1191 *
1192 * which we should reject.
1193 */
1176static void 1194static void
1177set_access(unsigned int *access, unsigned long bmap) { 1195set_access(unsigned int *access, unsigned long bmap) {
1178 int i; 1196 int i;
@@ -1570,6 +1588,10 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta
1570 int err = get_write_access(inode); 1588 int err = get_write_access(inode);
1571 if (err) 1589 if (err)
1572 return nfserrno(err); 1590 return nfserrno(err);
1591 err = mnt_want_write(cur_fh->fh_export->ex_path.mnt);
1592 if (err)
1593 return nfserrno(err);
1594 file_take_write(filp);
1573 } 1595 }
1574 status = nfsd4_truncate(rqstp, cur_fh, open); 1596 status = nfsd4_truncate(rqstp, cur_fh, open);
1575 if (status) { 1597 if (status) {
@@ -1579,8 +1601,8 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta
1579 } 1601 }
1580 /* remember the open */ 1602 /* remember the open */
1581 filp->f_mode |= open->op_share_access; 1603 filp->f_mode |= open->op_share_access;
1582 set_bit(open->op_share_access, &stp->st_access_bmap); 1604 __set_bit(open->op_share_access, &stp->st_access_bmap);
1583 set_bit(open->op_share_deny, &stp->st_deny_bmap); 1605 __set_bit(open->op_share_deny, &stp->st_deny_bmap);
1584 1606
1585 return nfs_ok; 1607 return nfs_ok;
1586} 1608}
@@ -1722,9 +1744,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
1722 /* Stateid was not found, this is a new OPEN */ 1744 /* Stateid was not found, this is a new OPEN */
1723 int flags = 0; 1745 int flags = 0;
1724 if (open->op_share_access & NFS4_SHARE_ACCESS_READ) 1746 if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
1725 flags |= MAY_READ; 1747 flags |= NFSD_MAY_READ;
1726 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) 1748 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
1727 flags |= MAY_WRITE; 1749 flags |= NFSD_MAY_WRITE;
1728 status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags); 1750 status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags);
1729 if (status) 1751 if (status)
1730 goto out; 1752 goto out;
@@ -2610,7 +2632,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2610 return nfserr_inval; 2632 return nfserr_inval;
2611 2633
2612 if ((status = fh_verify(rqstp, &cstate->current_fh, 2634 if ((status = fh_verify(rqstp, &cstate->current_fh,
2613 S_IFREG, MAY_LOCK))) { 2635 S_IFREG, NFSD_MAY_LOCK))) {
2614 dprintk("NFSD: nfsd4_lock: permission denied!\n"); 2636 dprintk("NFSD: nfsd4_lock: permission denied!\n");
2615 return status; 2637 return status;
2616 } 2638 }
@@ -3249,12 +3271,14 @@ nfs4_state_shutdown(void)
3249 nfs4_unlock_state(); 3271 nfs4_unlock_state();
3250} 3272}
3251 3273
3274/*
3275 * user_recovery_dirname is protected by the nfsd_mutex since it's only
3276 * accessed when nfsd is starting.
3277 */
3252static void 3278static void
3253nfs4_set_recdir(char *recdir) 3279nfs4_set_recdir(char *recdir)
3254{ 3280{
3255 nfs4_lock_state();
3256 strcpy(user_recovery_dirname, recdir); 3281 strcpy(user_recovery_dirname, recdir);
3257 nfs4_unlock_state();
3258} 3282}
3259 3283
3260/* 3284/*
@@ -3278,6 +3302,12 @@ nfs4_reset_recoverydir(char *recdir)
3278 return status; 3302 return status;
3279} 3303}
3280 3304
3305char *
3306nfs4_recoverydir(void)
3307{
3308 return user_recovery_dirname;
3309}
3310
3281/* 3311/*
3282 * Called when leasetime is changed. 3312 * Called when leasetime is changed.
3283 * 3313 *
@@ -3286,11 +3316,12 @@ nfs4_reset_recoverydir(char *recdir)
3286 * we start to register any changes in lease time. If the administrator 3316 * we start to register any changes in lease time. If the administrator
3287 * really wants to change the lease time *now*, they can go ahead and bring 3317 * really wants to change the lease time *now*, they can go ahead and bring
3288 * nfsd down and then back up again after changing the lease time. 3318 * nfsd down and then back up again after changing the lease time.
3319 *
3320 * user_lease_time is protected by nfsd_mutex since it's only really accessed
3321 * when nfsd is starting
3289 */ 3322 */
3290void 3323void
3291nfs4_reset_lease(time_t leasetime) 3324nfs4_reset_lease(time_t leasetime)
3292{ 3325{
3293 lock_kernel();
3294 user_lease_time = leasetime; 3326 user_lease_time = leasetime;
3295 unlock_kernel();
3296} 3327}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index c513bbdf2d36..14ba4d9b2859 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -986,10 +986,74 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel
986} 986}
987 987
988static __be32 988static __be32
989nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
990{
991 return nfs_ok;
992}
993
994static __be32
995nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p)
996{
997 return nfserr_opnotsupp;
998}
999
1000typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *);
1001
1002static nfsd4_dec nfsd4_dec_ops[] = {
1003 [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access,
1004 [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close,
1005 [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit,
1006 [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create,
1007 [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp,
1008 [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn,
1009 [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr,
1010 [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop,
1011 [OP_LINK] = (nfsd4_dec)nfsd4_decode_link,
1012 [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock,
1013 [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt,
1014 [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku,
1015 [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup,
1016 [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop,
1017 [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify,
1018 [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open,
1019 [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp,
1020 [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm,
1021 [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade,
1022 [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh,
1023 [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_notsupp,
1024 [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop,
1025 [OP_READ] = (nfsd4_dec)nfsd4_decode_read,
1026 [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir,
1027 [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop,
1028 [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove,
1029 [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename,
1030 [OP_RENEW] = (nfsd4_dec)nfsd4_decode_renew,
1031 [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop,
1032 [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop,
1033 [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo,
1034 [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr,
1035 [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_setclientid,
1036 [OP_SETCLIENTID_CONFIRM] = (nfsd4_dec)nfsd4_decode_setclientid_confirm,
1037 [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify,
1038 [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write,
1039 [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner,
1040};
1041
1042struct nfsd4_minorversion_ops {
1043 nfsd4_dec *decoders;
1044 int nops;
1045};
1046
1047static struct nfsd4_minorversion_ops nfsd4_minorversion[] = {
1048 [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) },
1049};
1050
1051static __be32
989nfsd4_decode_compound(struct nfsd4_compoundargs *argp) 1052nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
990{ 1053{
991 DECODE_HEAD; 1054 DECODE_HEAD;
992 struct nfsd4_op *op; 1055 struct nfsd4_op *op;
1056 struct nfsd4_minorversion_ops *ops;
993 int i; 1057 int i;
994 1058
995 /* 1059 /*
@@ -1019,6 +1083,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
1019 } 1083 }
1020 } 1084 }
1021 1085
1086 if (argp->minorversion >= ARRAY_SIZE(nfsd4_minorversion))
1087 argp->opcnt = 0;
1088
1089 ops = &nfsd4_minorversion[argp->minorversion];
1022 for (i = 0; i < argp->opcnt; i++) { 1090 for (i = 0; i < argp->opcnt; i++) {
1023 op = &argp->ops[i]; 1091 op = &argp->ops[i];
1024 op->replay = NULL; 1092 op->replay = NULL;
@@ -1056,120 +1124,11 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
1056 } 1124 }
1057 op->opnum = ntohl(*argp->p++); 1125 op->opnum = ntohl(*argp->p++);
1058 1126
1059 switch (op->opnum) { 1127 if (op->opnum >= OP_ACCESS && op->opnum < ops->nops)
1060 case 2: /* Reserved operation */ 1128 op->status = ops->decoders[op->opnum](argp, &op->u);
1061 op->opnum = OP_ILLEGAL; 1129 else {
1062 if (argp->minorversion == 0)
1063 op->status = nfserr_op_illegal;
1064 else
1065 op->status = nfserr_minor_vers_mismatch;
1066 break;
1067 case OP_ACCESS:
1068 op->status = nfsd4_decode_access(argp, &op->u.access);
1069 break;
1070 case OP_CLOSE:
1071 op->status = nfsd4_decode_close(argp, &op->u.close);
1072 break;
1073 case OP_COMMIT:
1074 op->status = nfsd4_decode_commit(argp, &op->u.commit);
1075 break;
1076 case OP_CREATE:
1077 op->status = nfsd4_decode_create(argp, &op->u.create);
1078 break;
1079 case OP_DELEGRETURN:
1080 op->status = nfsd4_decode_delegreturn(argp, &op->u.delegreturn);
1081 break;
1082 case OP_GETATTR:
1083 op->status = nfsd4_decode_getattr(argp, &op->u.getattr);
1084 break;
1085 case OP_GETFH:
1086 op->status = nfs_ok;
1087 break;
1088 case OP_LINK:
1089 op->status = nfsd4_decode_link(argp, &op->u.link);
1090 break;
1091 case OP_LOCK:
1092 op->status = nfsd4_decode_lock(argp, &op->u.lock);
1093 break;
1094 case OP_LOCKT:
1095 op->status = nfsd4_decode_lockt(argp, &op->u.lockt);
1096 break;
1097 case OP_LOCKU:
1098 op->status = nfsd4_decode_locku(argp, &op->u.locku);
1099 break;
1100 case OP_LOOKUP:
1101 op->status = nfsd4_decode_lookup(argp, &op->u.lookup);
1102 break;
1103 case OP_LOOKUPP:
1104 op->status = nfs_ok;
1105 break;
1106 case OP_NVERIFY:
1107 op->status = nfsd4_decode_verify(argp, &op->u.nverify);
1108 break;
1109 case OP_OPEN:
1110 op->status = nfsd4_decode_open(argp, &op->u.open);
1111 break;
1112 case OP_OPEN_CONFIRM:
1113 op->status = nfsd4_decode_open_confirm(argp, &op->u.open_confirm);
1114 break;
1115 case OP_OPEN_DOWNGRADE:
1116 op->status = nfsd4_decode_open_downgrade(argp, &op->u.open_downgrade);
1117 break;
1118 case OP_PUTFH:
1119 op->status = nfsd4_decode_putfh(argp, &op->u.putfh);
1120 break;
1121 case OP_PUTROOTFH:
1122 op->status = nfs_ok;
1123 break;
1124 case OP_READ:
1125 op->status = nfsd4_decode_read(argp, &op->u.read);
1126 break;
1127 case OP_READDIR:
1128 op->status = nfsd4_decode_readdir(argp, &op->u.readdir);
1129 break;
1130 case OP_READLINK:
1131 op->status = nfs_ok;
1132 break;
1133 case OP_REMOVE:
1134 op->status = nfsd4_decode_remove(argp, &op->u.remove);
1135 break;
1136 case OP_RENAME:
1137 op->status = nfsd4_decode_rename(argp, &op->u.rename);
1138 break;
1139 case OP_RESTOREFH:
1140 op->status = nfs_ok;
1141 break;
1142 case OP_RENEW:
1143 op->status = nfsd4_decode_renew(argp, &op->u.renew);
1144 break;
1145 case OP_SAVEFH:
1146 op->status = nfs_ok;
1147 break;
1148 case OP_SECINFO:
1149 op->status = nfsd4_decode_secinfo(argp, &op->u.secinfo);
1150 break;
1151 case OP_SETATTR:
1152 op->status = nfsd4_decode_setattr(argp, &op->u.setattr);
1153 break;
1154 case OP_SETCLIENTID:
1155 op->status = nfsd4_decode_setclientid(argp, &op->u.setclientid);
1156 break;
1157 case OP_SETCLIENTID_CONFIRM:
1158 op->status = nfsd4_decode_setclientid_confirm(argp, &op->u.setclientid_confirm);
1159 break;
1160 case OP_VERIFY:
1161 op->status = nfsd4_decode_verify(argp, &op->u.verify);
1162 break;
1163 case OP_WRITE:
1164 op->status = nfsd4_decode_write(argp, &op->u.write);
1165 break;
1166 case OP_RELEASE_LOCKOWNER:
1167 op->status = nfsd4_decode_release_lockowner(argp, &op->u.release_lockowner);
1168 break;
1169 default:
1170 op->opnum = OP_ILLEGAL; 1130 op->opnum = OP_ILLEGAL;
1171 op->status = nfserr_op_illegal; 1131 op->status = nfserr_op_illegal;
1172 break;
1173 } 1132 }
1174 1133
1175 if (op->status) { 1134 if (op->status) {
@@ -1201,11 +1160,11 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
1201 *p++ = htonl((u32)((n) >> 32)); \ 1160 *p++ = htonl((u32)((n) >> 32)); \
1202 *p++ = htonl((u32)(n)); \ 1161 *p++ = htonl((u32)(n)); \
1203} while (0) 1162} while (0)
1204#define WRITEMEM(ptr,nbytes) do { \ 1163#define WRITEMEM(ptr,nbytes) do { if (nbytes > 0) { \
1205 *(p + XDR_QUADLEN(nbytes) -1) = 0; \ 1164 *(p + XDR_QUADLEN(nbytes) -1) = 0; \
1206 memcpy(p, ptr, nbytes); \ 1165 memcpy(p, ptr, nbytes); \
1207 p += XDR_QUADLEN(nbytes); \ 1166 p += XDR_QUADLEN(nbytes); \
1208} while (0) 1167}} while (0)
1209#define WRITECINFO(c) do { \ 1168#define WRITECINFO(c) do { \
1210 *p++ = htonl(c.atomic); \ 1169 *p++ = htonl(c.atomic); \
1211 *p++ = htonl(c.before_ctime_sec); \ 1170 *p++ = htonl(c.before_ctime_sec); \
@@ -1991,7 +1950,7 @@ fail:
1991 return -EINVAL; 1950 return -EINVAL;
1992} 1951}
1993 1952
1994static void 1953static __be32
1995nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) 1954nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access)
1996{ 1955{
1997 ENCODE_HEAD; 1956 ENCODE_HEAD;
@@ -2002,9 +1961,10 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
2002 WRITE32(access->ac_resp_access); 1961 WRITE32(access->ac_resp_access);
2003 ADJUST_ARGS(); 1962 ADJUST_ARGS();
2004 } 1963 }
1964 return nfserr;
2005} 1965}
2006 1966
2007static void 1967static __be32
2008nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) 1968nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close)
2009{ 1969{
2010 ENCODE_SEQID_OP_HEAD; 1970 ENCODE_SEQID_OP_HEAD;
@@ -2016,10 +1976,11 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c
2016 ADJUST_ARGS(); 1976 ADJUST_ARGS();
2017 } 1977 }
2018 ENCODE_SEQID_OP_TAIL(close->cl_stateowner); 1978 ENCODE_SEQID_OP_TAIL(close->cl_stateowner);
1979 return nfserr;
2019} 1980}
2020 1981
2021 1982
2022static void 1983static __be32
2023nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) 1984nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit)
2024{ 1985{
2025 ENCODE_HEAD; 1986 ENCODE_HEAD;
@@ -2029,9 +1990,10 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
2029 WRITEMEM(commit->co_verf.data, 8); 1990 WRITEMEM(commit->co_verf.data, 8);
2030 ADJUST_ARGS(); 1991 ADJUST_ARGS();
2031 } 1992 }
1993 return nfserr;
2032} 1994}
2033 1995
2034static void 1996static __be32
2035nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) 1997nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create)
2036{ 1998{
2037 ENCODE_HEAD; 1999 ENCODE_HEAD;
@@ -2044,6 +2006,7 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
2044 WRITE32(create->cr_bmval[1]); 2006 WRITE32(create->cr_bmval[1]);
2045 ADJUST_ARGS(); 2007 ADJUST_ARGS();
2046 } 2008 }
2009 return nfserr;
2047} 2010}
2048 2011
2049static __be32 2012static __be32
@@ -2064,9 +2027,10 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
2064 return nfserr; 2027 return nfserr;
2065} 2028}
2066 2029
2067static void 2030static __be32
2068nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh *fhp) 2031nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp)
2069{ 2032{
2033 struct svc_fh *fhp = *fhpp;
2070 unsigned int len; 2034 unsigned int len;
2071 ENCODE_HEAD; 2035 ENCODE_HEAD;
2072 2036
@@ -2077,6 +2041,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh
2077 WRITEMEM(&fhp->fh_handle.fh_base, len); 2041 WRITEMEM(&fhp->fh_handle.fh_base, len);
2078 ADJUST_ARGS(); 2042 ADJUST_ARGS();
2079 } 2043 }
2044 return nfserr;
2080} 2045}
2081 2046
2082/* 2047/*
@@ -2104,7 +2069,7 @@ nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denie
2104 ADJUST_ARGS(); 2069 ADJUST_ARGS();
2105} 2070}
2106 2071
2107static void 2072static __be32
2108nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock) 2073nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock)
2109{ 2074{
2110 ENCODE_SEQID_OP_HEAD; 2075 ENCODE_SEQID_OP_HEAD;
@@ -2118,16 +2083,18 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo
2118 nfsd4_encode_lock_denied(resp, &lock->lk_denied); 2083 nfsd4_encode_lock_denied(resp, &lock->lk_denied);
2119 2084
2120 ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner); 2085 ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner);
2086 return nfserr;
2121} 2087}
2122 2088
2123static void 2089static __be32
2124nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt) 2090nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt)
2125{ 2091{
2126 if (nfserr == nfserr_denied) 2092 if (nfserr == nfserr_denied)
2127 nfsd4_encode_lock_denied(resp, &lockt->lt_denied); 2093 nfsd4_encode_lock_denied(resp, &lockt->lt_denied);
2094 return nfserr;
2128} 2095}
2129 2096
2130static void 2097static __be32
2131nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku) 2098nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku)
2132{ 2099{
2133 ENCODE_SEQID_OP_HEAD; 2100 ENCODE_SEQID_OP_HEAD;
@@ -2140,10 +2107,11 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l
2140 } 2107 }
2141 2108
2142 ENCODE_SEQID_OP_TAIL(locku->lu_stateowner); 2109 ENCODE_SEQID_OP_TAIL(locku->lu_stateowner);
2110 return nfserr;
2143} 2111}
2144 2112
2145 2113
2146static void 2114static __be32
2147nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) 2115nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link)
2148{ 2116{
2149 ENCODE_HEAD; 2117 ENCODE_HEAD;
@@ -2153,10 +2121,11 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li
2153 WRITECINFO(link->li_cinfo); 2121 WRITECINFO(link->li_cinfo);
2154 ADJUST_ARGS(); 2122 ADJUST_ARGS();
2155 } 2123 }
2124 return nfserr;
2156} 2125}
2157 2126
2158 2127
2159static void 2128static __be32
2160nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) 2129nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open)
2161{ 2130{
2162 ENCODE_SEQID_OP_HEAD; 2131 ENCODE_SEQID_OP_HEAD;
@@ -2219,9 +2188,10 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
2219 /* XXX save filehandle here */ 2188 /* XXX save filehandle here */
2220out: 2189out:
2221 ENCODE_SEQID_OP_TAIL(open->op_stateowner); 2190 ENCODE_SEQID_OP_TAIL(open->op_stateowner);
2191 return nfserr;
2222} 2192}
2223 2193
2224static void 2194static __be32
2225nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) 2195nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc)
2226{ 2196{
2227 ENCODE_SEQID_OP_HEAD; 2197 ENCODE_SEQID_OP_HEAD;
@@ -2234,9 +2204,10 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct
2234 } 2204 }
2235 2205
2236 ENCODE_SEQID_OP_TAIL(oc->oc_stateowner); 2206 ENCODE_SEQID_OP_TAIL(oc->oc_stateowner);
2207 return nfserr;
2237} 2208}
2238 2209
2239static void 2210static __be32
2240nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) 2211nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od)
2241{ 2212{
2242 ENCODE_SEQID_OP_HEAD; 2213 ENCODE_SEQID_OP_HEAD;
@@ -2249,6 +2220,7 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struc
2249 } 2220 }
2250 2221
2251 ENCODE_SEQID_OP_TAIL(od->od_stateowner); 2222 ENCODE_SEQID_OP_TAIL(od->od_stateowner);
2223 return nfserr;
2252} 2224}
2253 2225
2254static __be32 2226static __be32
@@ -2443,7 +2415,7 @@ err_no_verf:
2443 return nfserr; 2415 return nfserr;
2444} 2416}
2445 2417
2446static void 2418static __be32
2447nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) 2419nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove)
2448{ 2420{
2449 ENCODE_HEAD; 2421 ENCODE_HEAD;
@@ -2453,9 +2425,10 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
2453 WRITECINFO(remove->rm_cinfo); 2425 WRITECINFO(remove->rm_cinfo);
2454 ADJUST_ARGS(); 2426 ADJUST_ARGS();
2455 } 2427 }
2428 return nfserr;
2456} 2429}
2457 2430
2458static void 2431static __be32
2459nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) 2432nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename)
2460{ 2433{
2461 ENCODE_HEAD; 2434 ENCODE_HEAD;
@@ -2466,9 +2439,10 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
2466 WRITECINFO(rename->rn_tinfo); 2439 WRITECINFO(rename->rn_tinfo);
2467 ADJUST_ARGS(); 2440 ADJUST_ARGS();
2468 } 2441 }
2442 return nfserr;
2469} 2443}
2470 2444
2471static void 2445static __be32
2472nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, 2446nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
2473 struct nfsd4_secinfo *secinfo) 2447 struct nfsd4_secinfo *secinfo)
2474{ 2448{
@@ -2532,13 +2506,14 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
2532out: 2506out:
2533 if (exp) 2507 if (exp)
2534 exp_put(exp); 2508 exp_put(exp);
2509 return nfserr;
2535} 2510}
2536 2511
2537/* 2512/*
2538 * The SETATTR encode routine is special -- it always encodes a bitmap, 2513 * The SETATTR encode routine is special -- it always encodes a bitmap,
2539 * regardless of the error status. 2514 * regardless of the error status.
2540 */ 2515 */
2541static void 2516static __be32
2542nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) 2517nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr)
2543{ 2518{
2544 ENCODE_HEAD; 2519 ENCODE_HEAD;
@@ -2555,9 +2530,10 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
2555 WRITE32(setattr->sa_bmval[1]); 2530 WRITE32(setattr->sa_bmval[1]);
2556 } 2531 }
2557 ADJUST_ARGS(); 2532 ADJUST_ARGS();
2533 return nfserr;
2558} 2534}
2559 2535
2560static void 2536static __be32
2561nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) 2537nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd)
2562{ 2538{
2563 ENCODE_HEAD; 2539 ENCODE_HEAD;
@@ -2574,9 +2550,10 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n
2574 WRITE32(0); 2550 WRITE32(0);
2575 ADJUST_ARGS(); 2551 ADJUST_ARGS();
2576 } 2552 }
2553 return nfserr;
2577} 2554}
2578 2555
2579static void 2556static __be32
2580nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) 2557nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write)
2581{ 2558{
2582 ENCODE_HEAD; 2559 ENCODE_HEAD;
@@ -2588,8 +2565,56 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w
2588 WRITEMEM(write->wr_verifier.data, 8); 2565 WRITEMEM(write->wr_verifier.data, 8);
2589 ADJUST_ARGS(); 2566 ADJUST_ARGS();
2590 } 2567 }
2568 return nfserr;
2591} 2569}
2592 2570
2571static __be32
2572nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
2573{
2574 return nfserr;
2575}
2576
2577typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *);
2578
2579static nfsd4_enc nfsd4_enc_ops[] = {
2580 [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access,
2581 [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close,
2582 [OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit,
2583 [OP_CREATE] = (nfsd4_enc)nfsd4_encode_create,
2584 [OP_DELEGPURGE] = (nfsd4_enc)nfsd4_encode_noop,
2585 [OP_DELEGRETURN] = (nfsd4_enc)nfsd4_encode_noop,
2586 [OP_GETATTR] = (nfsd4_enc)nfsd4_encode_getattr,
2587 [OP_GETFH] = (nfsd4_enc)nfsd4_encode_getfh,
2588 [OP_LINK] = (nfsd4_enc)nfsd4_encode_link,
2589 [OP_LOCK] = (nfsd4_enc)nfsd4_encode_lock,
2590 [OP_LOCKT] = (nfsd4_enc)nfsd4_encode_lockt,
2591 [OP_LOCKU] = (nfsd4_enc)nfsd4_encode_locku,
2592 [OP_LOOKUP] = (nfsd4_enc)nfsd4_encode_noop,
2593 [OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop,
2594 [OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop,
2595 [OP_OPEN] = (nfsd4_enc)nfsd4_encode_open,
2596 [OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm,
2597 [OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade,
2598 [OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop,
2599 [OP_PUTPUBFH] = (nfsd4_enc)nfsd4_encode_noop,
2600 [OP_PUTROOTFH] = (nfsd4_enc)nfsd4_encode_noop,
2601 [OP_READ] = (nfsd4_enc)nfsd4_encode_read,
2602 [OP_READDIR] = (nfsd4_enc)nfsd4_encode_readdir,
2603 [OP_READLINK] = (nfsd4_enc)nfsd4_encode_readlink,
2604 [OP_REMOVE] = (nfsd4_enc)nfsd4_encode_remove,
2605 [OP_RENAME] = (nfsd4_enc)nfsd4_encode_rename,
2606 [OP_RENEW] = (nfsd4_enc)nfsd4_encode_noop,
2607 [OP_RESTOREFH] = (nfsd4_enc)nfsd4_encode_noop,
2608 [OP_SAVEFH] = (nfsd4_enc)nfsd4_encode_noop,
2609 [OP_SECINFO] = (nfsd4_enc)nfsd4_encode_secinfo,
2610 [OP_SETATTR] = (nfsd4_enc)nfsd4_encode_setattr,
2611 [OP_SETCLIENTID] = (nfsd4_enc)nfsd4_encode_setclientid,
2612 [OP_SETCLIENTID_CONFIRM] = (nfsd4_enc)nfsd4_encode_noop,
2613 [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop,
2614 [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write,
2615 [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop,
2616};
2617
2593void 2618void
2594nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) 2619nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
2595{ 2620{
@@ -2601,101 +2626,12 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
2601 statp = p++; /* to be backfilled at the end */ 2626 statp = p++; /* to be backfilled at the end */
2602 ADJUST_ARGS(); 2627 ADJUST_ARGS();
2603 2628
2604 switch (op->opnum) { 2629 if (op->opnum == OP_ILLEGAL)
2605 case OP_ACCESS: 2630 goto status;
2606 nfsd4_encode_access(resp, op->status, &op->u.access); 2631 BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||
2607 break; 2632 !nfsd4_enc_ops[op->opnum]);
2608 case OP_CLOSE: 2633 op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u);
2609 nfsd4_encode_close(resp, op->status, &op->u.close); 2634status:
2610 break;
2611 case OP_COMMIT:
2612 nfsd4_encode_commit(resp, op->status, &op->u.commit);
2613 break;
2614 case OP_CREATE:
2615 nfsd4_encode_create(resp, op->status, &op->u.create);
2616 break;
2617 case OP_DELEGRETURN:
2618 break;
2619 case OP_GETATTR:
2620 op->status = nfsd4_encode_getattr(resp, op->status, &op->u.getattr);
2621 break;
2622 case OP_GETFH:
2623 nfsd4_encode_getfh(resp, op->status, op->u.getfh);
2624 break;
2625 case OP_LINK:
2626 nfsd4_encode_link(resp, op->status, &op->u.link);
2627 break;
2628 case OP_LOCK:
2629 nfsd4_encode_lock(resp, op->status, &op->u.lock);
2630 break;
2631 case OP_LOCKT:
2632 nfsd4_encode_lockt(resp, op->status, &op->u.lockt);
2633 break;
2634 case OP_LOCKU:
2635 nfsd4_encode_locku(resp, op->status, &op->u.locku);
2636 break;
2637 case OP_LOOKUP:
2638 break;
2639 case OP_LOOKUPP:
2640 break;
2641 case OP_NVERIFY:
2642 break;
2643 case OP_OPEN:
2644 nfsd4_encode_open(resp, op->status, &op->u.open);
2645 break;
2646 case OP_OPEN_CONFIRM:
2647 nfsd4_encode_open_confirm(resp, op->status, &op->u.open_confirm);
2648 break;
2649 case OP_OPEN_DOWNGRADE:
2650 nfsd4_encode_open_downgrade(resp, op->status, &op->u.open_downgrade);
2651 break;
2652 case OP_PUTFH:
2653 break;
2654 case OP_PUTROOTFH:
2655 break;
2656 case OP_READ:
2657 op->status = nfsd4_encode_read(resp, op->status, &op->u.read);
2658 break;
2659 case OP_READDIR:
2660 op->status = nfsd4_encode_readdir(resp, op->status, &op->u.readdir);
2661 break;
2662 case OP_READLINK:
2663 op->status = nfsd4_encode_readlink(resp, op->status, &op->u.readlink);
2664 break;
2665 case OP_REMOVE:
2666 nfsd4_encode_remove(resp, op->status, &op->u.remove);
2667 break;
2668 case OP_RENAME:
2669 nfsd4_encode_rename(resp, op->status, &op->u.rename);
2670 break;
2671 case OP_RENEW:
2672 break;
2673 case OP_RESTOREFH:
2674 break;
2675 case OP_SAVEFH:
2676 break;
2677 case OP_SECINFO:
2678 nfsd4_encode_secinfo(resp, op->status, &op->u.secinfo);
2679 break;
2680 case OP_SETATTR:
2681 nfsd4_encode_setattr(resp, op->status, &op->u.setattr);
2682 break;
2683 case OP_SETCLIENTID:
2684 nfsd4_encode_setclientid(resp, op->status, &op->u.setclientid);
2685 break;
2686 case OP_SETCLIENTID_CONFIRM:
2687 break;
2688 case OP_VERIFY:
2689 break;
2690 case OP_WRITE:
2691 nfsd4_encode_write(resp, op->status, &op->u.write);
2692 break;
2693 case OP_RELEASE_LOCKOWNER:
2694 break;
2695 default:
2696 break;
2697 }
2698
2699 /* 2635 /*
2700 * Note: We write the status directly, instead of using WRITE32(), 2636 * Note: We write the status directly, instead of using WRITE32(),
2701 * since it is already in network byte order. 2637 * since it is already in network byte order.
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 5ac00c4fee91..1955a2702e60 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -310,9 +310,12 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
310 310
311static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size) 311static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size)
312{ 312{
313 __be32 server_ip; 313 struct sockaddr_in sin = {
314 char *fo_path, c; 314 .sin_family = AF_INET,
315 };
315 int b1, b2, b3, b4; 316 int b1, b2, b3, b4;
317 char c;
318 char *fo_path;
316 319
317 /* sanity check */ 320 /* sanity check */
318 if (size == 0) 321 if (size == 0)
@@ -326,11 +329,13 @@ static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size)
326 return -EINVAL; 329 return -EINVAL;
327 330
328 /* get ipv4 address */ 331 /* get ipv4 address */
329 if (sscanf(fo_path, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4) 332 if (sscanf(fo_path, NIPQUAD_FMT "%c", &b1, &b2, &b3, &b4, &c) != 4)
330 return -EINVAL; 333 return -EINVAL;
331 server_ip = htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4); 334 if (b1 > 255 || b2 > 255 || b3 > 255 || b4 > 255)
335 return -EINVAL;
336 sin.sin_addr.s_addr = htonl((b1 << 24) | (b2 << 16) | (b3 << 8) | b4);
332 337
333 return nlmsvc_unlock_all_by_ip(server_ip); 338 return nlmsvc_unlock_all_by_ip((struct sockaddr *)&sin);
334} 339}
335 340
336static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size) 341static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size)
@@ -450,22 +455,26 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
450 int i; 455 int i;
451 int rv; 456 int rv;
452 int len; 457 int len;
453 int npools = nfsd_nrpools(); 458 int npools;
454 int *nthreads; 459 int *nthreads;
455 460
461 mutex_lock(&nfsd_mutex);
462 npools = nfsd_nrpools();
456 if (npools == 0) { 463 if (npools == 0) {
457 /* 464 /*
458 * NFS is shut down. The admin can start it by 465 * NFS is shut down. The admin can start it by
459 * writing to the threads file but NOT the pool_threads 466 * writing to the threads file but NOT the pool_threads
460 * file, sorry. Report zero threads. 467 * file, sorry. Report zero threads.
461 */ 468 */
469 mutex_unlock(&nfsd_mutex);
462 strcpy(buf, "0\n"); 470 strcpy(buf, "0\n");
463 return strlen(buf); 471 return strlen(buf);
464 } 472 }
465 473
466 nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL); 474 nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL);
475 rv = -ENOMEM;
467 if (nthreads == NULL) 476 if (nthreads == NULL)
468 return -ENOMEM; 477 goto out_free;
469 478
470 if (size > 0) { 479 if (size > 0) {
471 for (i = 0; i < npools; i++) { 480 for (i = 0; i < npools; i++) {
@@ -496,14 +505,16 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
496 mesg += len; 505 mesg += len;
497 } 506 }
498 507
508 mutex_unlock(&nfsd_mutex);
499 return (mesg-buf); 509 return (mesg-buf);
500 510
501out_free: 511out_free:
502 kfree(nthreads); 512 kfree(nthreads);
513 mutex_unlock(&nfsd_mutex);
503 return rv; 514 return rv;
504} 515}
505 516
506static ssize_t write_versions(struct file *file, char *buf, size_t size) 517static ssize_t __write_versions(struct file *file, char *buf, size_t size)
507{ 518{
508 /* 519 /*
509 * Format: 520 * Format:
@@ -566,14 +577,23 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
566 return len; 577 return len;
567} 578}
568 579
569static ssize_t write_ports(struct file *file, char *buf, size_t size) 580static ssize_t write_versions(struct file *file, char *buf, size_t size)
581{
582 ssize_t rv;
583
584 mutex_lock(&nfsd_mutex);
585 rv = __write_versions(file, buf, size);
586 mutex_unlock(&nfsd_mutex);
587 return rv;
588}
589
590static ssize_t __write_ports(struct file *file, char *buf, size_t size)
570{ 591{
571 if (size == 0) { 592 if (size == 0) {
572 int len = 0; 593 int len = 0;
573 lock_kernel(); 594
574 if (nfsd_serv) 595 if (nfsd_serv)
575 len = svc_xprt_names(nfsd_serv, buf, 0); 596 len = svc_xprt_names(nfsd_serv, buf, 0);
576 unlock_kernel();
577 return len; 597 return len;
578 } 598 }
579 /* Either a single 'fd' number is written, in which 599 /* Either a single 'fd' number is written, in which
@@ -603,9 +623,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
603 /* Decrease the count, but don't shutdown the 623 /* Decrease the count, but don't shutdown the
604 * the service 624 * the service
605 */ 625 */
606 lock_kernel();
607 nfsd_serv->sv_nrthreads--; 626 nfsd_serv->sv_nrthreads--;
608 unlock_kernel();
609 } 627 }
610 return err < 0 ? err : 0; 628 return err < 0 ? err : 0;
611 } 629 }
@@ -614,10 +632,8 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
614 int len = 0; 632 int len = 0;
615 if (!toclose) 633 if (!toclose)
616 return -ENOMEM; 634 return -ENOMEM;
617 lock_kernel();
618 if (nfsd_serv) 635 if (nfsd_serv)
619 len = svc_sock_names(buf, nfsd_serv, toclose); 636 len = svc_sock_names(buf, nfsd_serv, toclose);
620 unlock_kernel();
621 if (len >= 0) 637 if (len >= 0)
622 lockd_down(); 638 lockd_down();
623 kfree(toclose); 639 kfree(toclose);
@@ -655,7 +671,6 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
655 if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) { 671 if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) {
656 if (port == 0) 672 if (port == 0)
657 return -EINVAL; 673 return -EINVAL;
658 lock_kernel();
659 if (nfsd_serv) { 674 if (nfsd_serv) {
660 xprt = svc_find_xprt(nfsd_serv, transport, 675 xprt = svc_find_xprt(nfsd_serv, transport,
661 AF_UNSPEC, port); 676 AF_UNSPEC, port);
@@ -666,13 +681,23 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
666 } else 681 } else
667 err = -ENOTCONN; 682 err = -ENOTCONN;
668 } 683 }
669 unlock_kernel();
670 return err < 0 ? err : 0; 684 return err < 0 ? err : 0;
671 } 685 }
672 } 686 }
673 return -EINVAL; 687 return -EINVAL;
674} 688}
675 689
690static ssize_t write_ports(struct file *file, char *buf, size_t size)
691{
692 ssize_t rv;
693
694 mutex_lock(&nfsd_mutex);
695 rv = __write_ports(file, buf, size);
696 mutex_unlock(&nfsd_mutex);
697 return rv;
698}
699
700
676int nfsd_max_blksize; 701int nfsd_max_blksize;
677 702
678static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) 703static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
@@ -691,13 +716,13 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
691 if (bsize > NFSSVC_MAXBLKSIZE) 716 if (bsize > NFSSVC_MAXBLKSIZE)
692 bsize = NFSSVC_MAXBLKSIZE; 717 bsize = NFSSVC_MAXBLKSIZE;
693 bsize &= ~(1024-1); 718 bsize &= ~(1024-1);
694 lock_kernel(); 719 mutex_lock(&nfsd_mutex);
695 if (nfsd_serv && nfsd_serv->sv_nrthreads) { 720 if (nfsd_serv && nfsd_serv->sv_nrthreads) {
696 unlock_kernel(); 721 mutex_unlock(&nfsd_mutex);
697 return -EBUSY; 722 return -EBUSY;
698 } 723 }
699 nfsd_max_blksize = bsize; 724 nfsd_max_blksize = bsize;
700 unlock_kernel(); 725 mutex_unlock(&nfsd_mutex);
701 } 726 }
702 return sprintf(buf, "%d\n", nfsd_max_blksize); 727 return sprintf(buf, "%d\n", nfsd_max_blksize);
703} 728}
@@ -705,16 +730,17 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
705#ifdef CONFIG_NFSD_V4 730#ifdef CONFIG_NFSD_V4
706extern time_t nfs4_leasetime(void); 731extern time_t nfs4_leasetime(void);
707 732
708static ssize_t write_leasetime(struct file *file, char *buf, size_t size) 733static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
709{ 734{
710 /* if size > 10 seconds, call 735 /* if size > 10 seconds, call
711 * nfs4_reset_lease() then write out the new lease (seconds) as reply 736 * nfs4_reset_lease() then write out the new lease (seconds) as reply
712 */ 737 */
713 char *mesg = buf; 738 char *mesg = buf;
714 int rv; 739 int rv, lease;
715 740
716 if (size > 0) { 741 if (size > 0) {
717 int lease; 742 if (nfsd_serv)
743 return -EBUSY;
718 rv = get_int(&mesg, &lease); 744 rv = get_int(&mesg, &lease);
719 if (rv) 745 if (rv)
720 return rv; 746 return rv;
@@ -726,24 +752,52 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
726 return strlen(buf); 752 return strlen(buf);
727} 753}
728 754
729static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) 755static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
756{
757 ssize_t rv;
758
759 mutex_lock(&nfsd_mutex);
760 rv = __write_leasetime(file, buf, size);
761 mutex_unlock(&nfsd_mutex);
762 return rv;
763}
764
765extern char *nfs4_recoverydir(void);
766
767static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size)
730{ 768{
731 char *mesg = buf; 769 char *mesg = buf;
732 char *recdir; 770 char *recdir;
733 int len, status; 771 int len, status;
734 772
735 if (size == 0 || size > PATH_MAX || buf[size-1] != '\n') 773 if (size > 0) {
736 return -EINVAL; 774 if (nfsd_serv)
737 buf[size-1] = 0; 775 return -EBUSY;
776 if (size > PATH_MAX || buf[size-1] != '\n')
777 return -EINVAL;
778 buf[size-1] = 0;
738 779
739 recdir = mesg; 780 recdir = mesg;
740 len = qword_get(&mesg, recdir, size); 781 len = qword_get(&mesg, recdir, size);
741 if (len <= 0) 782 if (len <= 0)
742 return -EINVAL; 783 return -EINVAL;
743 784
744 status = nfs4_reset_recoverydir(recdir); 785 status = nfs4_reset_recoverydir(recdir);
786 }
787 sprintf(buf, "%s\n", nfs4_recoverydir());
745 return strlen(buf); 788 return strlen(buf);
746} 789}
790
791static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
792{
793 ssize_t rv;
794
795 mutex_lock(&nfsd_mutex);
796 rv = __write_recoverydir(file, buf, size);
797 mutex_unlock(&nfsd_mutex);
798 return rv;
799}
800
747#endif 801#endif
748 802
749/*----------------------------------------------------------------------------*/ 803/*----------------------------------------------------------------------------*/
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 100ae5641162..f45451eb1e38 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -176,9 +176,24 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
176 if (IS_ERR(exp)) 176 if (IS_ERR(exp))
177 return nfserrno(PTR_ERR(exp)); 177 return nfserrno(PTR_ERR(exp));
178 178
179 error = nfsd_setuser_and_check_port(rqstp, exp); 179 if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
180 if (error) 180 /* Elevate privileges so that the lack of 'r' or 'x'
181 goto out; 181 * permission on some parent directory will
182 * not stop exportfs_decode_fh from being able
183 * to reconnect a directory into the dentry cache.
184 * The same problem can affect "SUBTREECHECK" exports,
185 * but as nfsd_acceptable depends on correct
186 * access control settings being in effect, we cannot
187 * fix that case easily.
188 */
189 current->cap_effective =
190 cap_raise_nfsd_set(current->cap_effective,
191 current->cap_permitted);
192 } else {
193 error = nfsd_setuser_and_check_port(rqstp, exp);
194 if (error)
195 goto out;
196 }
182 197
183 /* 198 /*
184 * Look up the dentry using the NFS file handle. 199 * Look up the dentry using the NFS file handle.
@@ -215,6 +230,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
215 goto out; 230 goto out;
216 } 231 }
217 232
233 if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
234 error = nfsd_setuser_and_check_port(rqstp, exp);
235 if (error) {
236 dput(dentry);
237 goto out;
238 }
239 }
240
218 if (S_ISDIR(dentry->d_inode->i_mode) && 241 if (S_ISDIR(dentry->d_inode->i_mode) &&
219 (dentry->d_flags & DCACHE_DISCONNECTED)) { 242 (dentry->d_flags & DCACHE_DISCONNECTED)) {
220 printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", 243 printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n",
@@ -279,7 +302,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
279 if (error) 302 if (error)
280 goto out; 303 goto out;
281 304
282 if (!(access & MAY_LOCK)) { 305 if (!(access & NFSD_MAY_LOCK)) {
283 /* 306 /*
284 * pseudoflavor restrictions are not enforced on NLM, 307 * pseudoflavor restrictions are not enforced on NLM,
285 * which clients virtually always use auth_sys for, 308 * which clients virtually always use auth_sys for,
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 6cfc96a12483..0766f95d236a 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -65,7 +65,7 @@ nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp,
65 dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); 65 dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh));
66 66
67 fh_copy(&resp->fh, &argp->fh); 67 fh_copy(&resp->fh, &argp->fh);
68 nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP); 68 nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
69 return nfsd_return_attrs(nfserr, resp); 69 return nfsd_return_attrs(nfserr, resp);
70} 70}
71 71
@@ -215,11 +215,11 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
215 SVCFH_fmt(dirfhp), argp->len, argp->name); 215 SVCFH_fmt(dirfhp), argp->len, argp->name);
216 216
217 /* First verify the parent file handle */ 217 /* First verify the parent file handle */
218 nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_EXEC); 218 nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_EXEC);
219 if (nfserr) 219 if (nfserr)
220 goto done; /* must fh_put dirfhp even on error */ 220 goto done; /* must fh_put dirfhp even on error */
221 221
222 /* Check for MAY_WRITE in nfsd_create if necessary */ 222 /* Check for NFSD_MAY_WRITE in nfsd_create if necessary */
223 223
224 nfserr = nfserr_acces; 224 nfserr = nfserr_acces;
225 if (!argp->len) 225 if (!argp->len)
@@ -281,7 +281,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
281 nfserr = nfsd_permission(rqstp, 281 nfserr = nfsd_permission(rqstp,
282 newfhp->fh_export, 282 newfhp->fh_export,
283 newfhp->fh_dentry, 283 newfhp->fh_dentry,
284 MAY_WRITE|MAY_LOCAL_ACCESS); 284 NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS);
285 if (nfserr && nfserr != nfserr_rofs) 285 if (nfserr && nfserr != nfserr_rofs)
286 goto out_unlock; 286 goto out_unlock;
287 } 287 }
@@ -614,6 +614,7 @@ nfserrno (int errno)
614#endif 614#endif
615 { nfserr_stale, -ESTALE }, 615 { nfserr_stale, -ESTALE },
616 { nfserr_jukebox, -ETIMEDOUT }, 616 { nfserr_jukebox, -ETIMEDOUT },
617 { nfserr_jukebox, -ERESTARTSYS },
617 { nfserr_dropit, -EAGAIN }, 618 { nfserr_dropit, -EAGAIN },
618 { nfserr_dropit, -ENOMEM }, 619 { nfserr_dropit, -ENOMEM },
619 { nfserr_badname, -ESRCH }, 620 { nfserr_badname, -ESRCH },
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 941041f4b136..80292ff5e924 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -21,6 +21,7 @@
21#include <linux/smp_lock.h> 21#include <linux/smp_lock.h>
22#include <linux/freezer.h> 22#include <linux/freezer.h>
23#include <linux/fs_struct.h> 23#include <linux/fs_struct.h>
24#include <linux/kthread.h>
24 25
25#include <linux/sunrpc/types.h> 26#include <linux/sunrpc/types.h>
26#include <linux/sunrpc/stats.h> 27#include <linux/sunrpc/stats.h>
@@ -36,28 +37,38 @@
36 37
37#define NFSDDBG_FACILITY NFSDDBG_SVC 38#define NFSDDBG_FACILITY NFSDDBG_SVC
38 39
39/* these signals will be delivered to an nfsd thread
40 * when handling a request
41 */
42#define ALLOWED_SIGS (sigmask(SIGKILL))
43/* these signals will be delivered to an nfsd thread
44 * when not handling a request. i.e. when waiting
45 */
46#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT))
47/* if the last thread dies with SIGHUP, then the exports table is
48 * left unchanged ( like 2.4-{0-9} ). Any other signal will clear
49 * the exports table (like 2.2).
50 */
51#define SIG_NOCLEAN SIGHUP
52
53extern struct svc_program nfsd_program; 40extern struct svc_program nfsd_program;
54static void nfsd(struct svc_rqst *rqstp); 41static int nfsd(void *vrqstp);
55struct timeval nfssvc_boot; 42struct timeval nfssvc_boot;
56 struct svc_serv *nfsd_serv;
57static atomic_t nfsd_busy; 43static atomic_t nfsd_busy;
58static unsigned long nfsd_last_call; 44static unsigned long nfsd_last_call;
59static DEFINE_SPINLOCK(nfsd_call_lock); 45static DEFINE_SPINLOCK(nfsd_call_lock);
60 46
47/*
48 * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members
49 * of the svc_serv struct. In particular, ->sv_nrthreads but also to some
50 * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt
51 *
52 * If (out side the lock) nfsd_serv is non-NULL, then it must point to a
53 * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number
54 * of nfsd threads must exist and each must listed in ->sp_all_threads in each
55 * entry of ->sv_pools[].
56 *
57 * Transitions of the thread count between zero and non-zero are of particular
58 * interest since the svc_serv needs to be created and initialized at that
59 * point, or freed.
60 *
61 * Finally, the nfsd_mutex also protects some of the global variables that are
62 * accessed when nfsd starts and that are settable via the write_* routines in
63 * nfsctl.c. In particular:
64 *
65 * user_recovery_dirname
66 * user_lease_time
67 * nfsd_versions
68 */
69DEFINE_MUTEX(nfsd_mutex);
70struct svc_serv *nfsd_serv;
71
61#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) 72#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
62static struct svc_stat nfsd_acl_svcstats; 73static struct svc_stat nfsd_acl_svcstats;
63static struct svc_version * nfsd_acl_version[] = { 74static struct svc_version * nfsd_acl_version[] = {
@@ -145,13 +156,14 @@ int nfsd_vers(int vers, enum vers_op change)
145 156
146int nfsd_nrthreads(void) 157int nfsd_nrthreads(void)
147{ 158{
148 if (nfsd_serv == NULL) 159 int rv = 0;
149 return 0; 160 mutex_lock(&nfsd_mutex);
150 else 161 if (nfsd_serv)
151 return nfsd_serv->sv_nrthreads; 162 rv = nfsd_serv->sv_nrthreads;
163 mutex_unlock(&nfsd_mutex);
164 return rv;
152} 165}
153 166
154static int killsig; /* signal that was used to kill last nfsd */
155static void nfsd_last_thread(struct svc_serv *serv) 167static void nfsd_last_thread(struct svc_serv *serv)
156{ 168{
157 /* When last nfsd thread exits we need to do some clean-up */ 169 /* When last nfsd thread exits we need to do some clean-up */
@@ -162,11 +174,9 @@ static void nfsd_last_thread(struct svc_serv *serv)
162 nfsd_racache_shutdown(); 174 nfsd_racache_shutdown();
163 nfs4_state_shutdown(); 175 nfs4_state_shutdown();
164 176
165 printk(KERN_WARNING "nfsd: last server has exited\n"); 177 printk(KERN_WARNING "nfsd: last server has exited, flushing export "
166 if (killsig != SIG_NOCLEAN) { 178 "cache\n");
167 printk(KERN_WARNING "nfsd: unexporting all filesystems\n"); 179 nfsd_export_flush();
168 nfsd_export_flush();
169 }
170} 180}
171 181
172void nfsd_reset_versions(void) 182void nfsd_reset_versions(void)
@@ -190,13 +200,14 @@ void nfsd_reset_versions(void)
190 } 200 }
191} 201}
192 202
203
193int nfsd_create_serv(void) 204int nfsd_create_serv(void)
194{ 205{
195 int err = 0; 206 int err = 0;
196 lock_kernel(); 207
208 WARN_ON(!mutex_is_locked(&nfsd_mutex));
197 if (nfsd_serv) { 209 if (nfsd_serv) {
198 svc_get(nfsd_serv); 210 svc_get(nfsd_serv);
199 unlock_kernel();
200 return 0; 211 return 0;
201 } 212 }
202 if (nfsd_max_blksize == 0) { 213 if (nfsd_max_blksize == 0) {
@@ -217,13 +228,11 @@ int nfsd_create_serv(void)
217 } 228 }
218 229
219 atomic_set(&nfsd_busy, 0); 230 atomic_set(&nfsd_busy, 0);
220 nfsd_serv = svc_create_pooled(&nfsd_program, 231 nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
221 nfsd_max_blksize, 232 nfsd_last_thread, nfsd, THIS_MODULE);
222 nfsd_last_thread,
223 nfsd, SIG_NOCLEAN, THIS_MODULE);
224 if (nfsd_serv == NULL) 233 if (nfsd_serv == NULL)
225 err = -ENOMEM; 234 err = -ENOMEM;
226 unlock_kernel(); 235
227 do_gettimeofday(&nfssvc_boot); /* record boot time */ 236 do_gettimeofday(&nfssvc_boot); /* record boot time */
228 return err; 237 return err;
229} 238}
@@ -282,6 +291,8 @@ int nfsd_set_nrthreads(int n, int *nthreads)
282 int tot = 0; 291 int tot = 0;
283 int err = 0; 292 int err = 0;
284 293
294 WARN_ON(!mutex_is_locked(&nfsd_mutex));
295
285 if (nfsd_serv == NULL || n <= 0) 296 if (nfsd_serv == NULL || n <= 0)
286 return 0; 297 return 0;
287 298
@@ -316,7 +327,6 @@ int nfsd_set_nrthreads(int n, int *nthreads)
316 nthreads[0] = 1; 327 nthreads[0] = 1;
317 328
318 /* apply the new numbers */ 329 /* apply the new numbers */
319 lock_kernel();
320 svc_get(nfsd_serv); 330 svc_get(nfsd_serv);
321 for (i = 0; i < n; i++) { 331 for (i = 0; i < n; i++) {
322 err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i], 332 err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i],
@@ -325,7 +335,6 @@ int nfsd_set_nrthreads(int n, int *nthreads)
325 break; 335 break;
326 } 336 }
327 svc_destroy(nfsd_serv); 337 svc_destroy(nfsd_serv);
328 unlock_kernel();
329 338
330 return err; 339 return err;
331} 340}
@@ -334,8 +343,8 @@ int
334nfsd_svc(unsigned short port, int nrservs) 343nfsd_svc(unsigned short port, int nrservs)
335{ 344{
336 int error; 345 int error;
337 346
338 lock_kernel(); 347 mutex_lock(&nfsd_mutex);
339 dprintk("nfsd: creating service\n"); 348 dprintk("nfsd: creating service\n");
340 error = -EINVAL; 349 error = -EINVAL;
341 if (nrservs <= 0) 350 if (nrservs <= 0)
@@ -363,7 +372,7 @@ nfsd_svc(unsigned short port, int nrservs)
363 failure: 372 failure:
364 svc_destroy(nfsd_serv); /* Release server */ 373 svc_destroy(nfsd_serv); /* Release server */
365 out: 374 out:
366 unlock_kernel(); 375 mutex_unlock(&nfsd_mutex);
367 return error; 376 return error;
368} 377}
369 378
@@ -391,18 +400,17 @@ update_thread_usage(int busy_threads)
391/* 400/*
392 * This is the NFS server kernel thread 401 * This is the NFS server kernel thread
393 */ 402 */
394static void 403static int
395nfsd(struct svc_rqst *rqstp) 404nfsd(void *vrqstp)
396{ 405{
406 struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
397 struct fs_struct *fsp; 407 struct fs_struct *fsp;
398 int err; 408 int err, preverr = 0;
399 sigset_t shutdown_mask, allowed_mask;
400 409
401 /* Lock module and set up kernel thread */ 410 /* Lock module and set up kernel thread */
402 lock_kernel(); 411 mutex_lock(&nfsd_mutex);
403 daemonize("nfsd");
404 412
405 /* After daemonize() this kernel thread shares current->fs 413 /* At this point, the thread shares current->fs
406 * with the init process. We need to create files with a 414 * with the init process. We need to create files with a
407 * umask of 0 instead of init's umask. */ 415 * umask of 0 instead of init's umask. */
408 fsp = copy_fs_struct(current->fs); 416 fsp = copy_fs_struct(current->fs);
@@ -414,14 +422,17 @@ nfsd(struct svc_rqst *rqstp)
414 current->fs = fsp; 422 current->fs = fsp;
415 current->fs->umask = 0; 423 current->fs->umask = 0;
416 424
417 siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS); 425 /*
418 siginitsetinv(&allowed_mask, ALLOWED_SIGS); 426 * thread is spawned with all signals set to SIG_IGN, re-enable
427 * the ones that will bring down the thread
428 */
429 allow_signal(SIGKILL);
430 allow_signal(SIGHUP);
431 allow_signal(SIGINT);
432 allow_signal(SIGQUIT);
419 433
420 nfsdstats.th_cnt++; 434 nfsdstats.th_cnt++;
421 435 mutex_unlock(&nfsd_mutex);
422 rqstp->rq_task = current;
423
424 unlock_kernel();
425 436
426 /* 437 /*
427 * We want less throttling in balance_dirty_pages() so that nfs to 438 * We want less throttling in balance_dirty_pages() so that nfs to
@@ -435,26 +446,30 @@ nfsd(struct svc_rqst *rqstp)
435 * The main request loop 446 * The main request loop
436 */ 447 */
437 for (;;) { 448 for (;;) {
438 /* Block all but the shutdown signals */
439 sigprocmask(SIG_SETMASK, &shutdown_mask, NULL);
440
441 /* 449 /*
442 * Find a socket with data available and call its 450 * Find a socket with data available and call its
443 * recvfrom routine. 451 * recvfrom routine.
444 */ 452 */
445 while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN) 453 while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
446 ; 454 ;
447 if (err < 0) 455 if (err == -EINTR)
448 break; 456 break;
457 else if (err < 0) {
458 if (err != preverr) {
459 printk(KERN_WARNING "%s: unexpected error "
460 "from svc_recv (%d)\n", __func__, -err);
461 preverr = err;
462 }
463 schedule_timeout_uninterruptible(HZ);
464 continue;
465 }
466
449 update_thread_usage(atomic_read(&nfsd_busy)); 467 update_thread_usage(atomic_read(&nfsd_busy));
450 atomic_inc(&nfsd_busy); 468 atomic_inc(&nfsd_busy);
451 469
452 /* Lock the export hash tables for reading. */ 470 /* Lock the export hash tables for reading. */
453 exp_readlock(); 471 exp_readlock();
454 472
455 /* Process request with signals blocked. */
456 sigprocmask(SIG_SETMASK, &allowed_mask, NULL);
457
458 svc_process(rqstp); 473 svc_process(rqstp);
459 474
460 /* Unlock export hash tables */ 475 /* Unlock export hash tables */
@@ -463,22 +478,10 @@ nfsd(struct svc_rqst *rqstp)
463 atomic_dec(&nfsd_busy); 478 atomic_dec(&nfsd_busy);
464 } 479 }
465 480
466 if (err != -EINTR) {
467 printk(KERN_WARNING "nfsd: terminating on error %d\n", -err);
468 } else {
469 unsigned int signo;
470
471 for (signo = 1; signo <= _NSIG; signo++)
472 if (sigismember(&current->pending.signal, signo) &&
473 !sigismember(&current->blocked, signo))
474 break;
475 killsig = signo;
476 }
477 /* Clear signals before calling svc_exit_thread() */ 481 /* Clear signals before calling svc_exit_thread() */
478 flush_signals(current); 482 flush_signals(current);
479 483
480 lock_kernel(); 484 mutex_lock(&nfsd_mutex);
481
482 nfsdstats.th_cnt --; 485 nfsdstats.th_cnt --;
483 486
484out: 487out:
@@ -486,8 +489,9 @@ out:
486 svc_exit_thread(rqstp); 489 svc_exit_thread(rqstp);
487 490
488 /* Release module */ 491 /* Release module */
489 unlock_kernel(); 492 mutex_unlock(&nfsd_mutex);
490 module_put_and_exit(0); 493 module_put_and_exit(0);
494 return 0;
491} 495}
492 496
493static __be32 map_new_errors(u32 vers, __be32 nfserr) 497static __be32 map_new_errors(u32 vers, __be32 nfserr)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index a3a291f771f4..0f4481e0502d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -144,7 +144,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
144 dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); 144 dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
145 145
146 /* Obtain dentry and export. */ 146 /* Obtain dentry and export. */
147 err = fh_verify(rqstp, fhp, S_IFDIR, MAY_EXEC); 147 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
148 if (err) 148 if (err)
149 return err; 149 return err;
150 150
@@ -262,14 +262,14 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
262{ 262{
263 struct dentry *dentry; 263 struct dentry *dentry;
264 struct inode *inode; 264 struct inode *inode;
265 int accmode = MAY_SATTR; 265 int accmode = NFSD_MAY_SATTR;
266 int ftype = 0; 266 int ftype = 0;
267 __be32 err; 267 __be32 err;
268 int host_err; 268 int host_err;
269 int size_change = 0; 269 int size_change = 0;
270 270
271 if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) 271 if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
272 accmode |= MAY_WRITE|MAY_OWNER_OVERRIDE; 272 accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
273 if (iap->ia_valid & ATTR_SIZE) 273 if (iap->ia_valid & ATTR_SIZE)
274 ftype = S_IFREG; 274 ftype = S_IFREG;
275 275
@@ -331,7 +331,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
331 */ 331 */
332 if (iap->ia_valid & ATTR_SIZE) { 332 if (iap->ia_valid & ATTR_SIZE) {
333 if (iap->ia_size < inode->i_size) { 333 if (iap->ia_size < inode->i_size) {
334 err = nfsd_permission(rqstp, fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE); 334 err = nfsd_permission(rqstp, fhp->fh_export, dentry,
335 NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
335 if (err) 336 if (err)
336 goto out; 337 goto out;
337 } 338 }
@@ -462,7 +463,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
462 unsigned int flags = 0; 463 unsigned int flags = 0;
463 464
464 /* Get inode */ 465 /* Get inode */
465 error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR); 466 error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR);
466 if (error) 467 if (error)
467 return error; 468 return error;
468 469
@@ -563,20 +564,20 @@ struct accessmap {
563 int how; 564 int how;
564}; 565};
565static struct accessmap nfs3_regaccess[] = { 566static struct accessmap nfs3_regaccess[] = {
566 { NFS3_ACCESS_READ, MAY_READ }, 567 { NFS3_ACCESS_READ, NFSD_MAY_READ },
567 { NFS3_ACCESS_EXECUTE, MAY_EXEC }, 568 { NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC },
568 { NFS3_ACCESS_MODIFY, MAY_WRITE|MAY_TRUNC }, 569 { NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_TRUNC },
569 { NFS3_ACCESS_EXTEND, MAY_WRITE }, 570 { NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE },
570 571
571 { 0, 0 } 572 { 0, 0 }
572}; 573};
573 574
574static struct accessmap nfs3_diraccess[] = { 575static struct accessmap nfs3_diraccess[] = {
575 { NFS3_ACCESS_READ, MAY_READ }, 576 { NFS3_ACCESS_READ, NFSD_MAY_READ },
576 { NFS3_ACCESS_LOOKUP, MAY_EXEC }, 577 { NFS3_ACCESS_LOOKUP, NFSD_MAY_EXEC },
577 { NFS3_ACCESS_MODIFY, MAY_EXEC|MAY_WRITE|MAY_TRUNC }, 578 { NFS3_ACCESS_MODIFY, NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC},
578 { NFS3_ACCESS_EXTEND, MAY_EXEC|MAY_WRITE }, 579 { NFS3_ACCESS_EXTEND, NFSD_MAY_EXEC|NFSD_MAY_WRITE },
579 { NFS3_ACCESS_DELETE, MAY_REMOVE }, 580 { NFS3_ACCESS_DELETE, NFSD_MAY_REMOVE },
580 581
581 { 0, 0 } 582 { 0, 0 }
582}; 583};
@@ -589,10 +590,10 @@ static struct accessmap nfs3_anyaccess[] = {
589 * mainly at mode bits, and we make sure to ignore read-only 590 * mainly at mode bits, and we make sure to ignore read-only
590 * filesystem checks 591 * filesystem checks
591 */ 592 */
592 { NFS3_ACCESS_READ, MAY_READ }, 593 { NFS3_ACCESS_READ, NFSD_MAY_READ },
593 { NFS3_ACCESS_EXECUTE, MAY_EXEC }, 594 { NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC },
594 { NFS3_ACCESS_MODIFY, MAY_WRITE|MAY_LOCAL_ACCESS }, 595 { NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS },
595 { NFS3_ACCESS_EXTEND, MAY_WRITE|MAY_LOCAL_ACCESS }, 596 { NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS },
596 597
597 { 0, 0 } 598 { 0, 0 }
598}; 599};
@@ -606,7 +607,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
606 u32 query, result = 0, sresult = 0; 607 u32 query, result = 0, sresult = 0;
607 __be32 error; 608 __be32 error;
608 609
609 error = fh_verify(rqstp, fhp, 0, MAY_NOP); 610 error = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
610 if (error) 611 if (error)
611 goto out; 612 goto out;
612 613
@@ -678,7 +679,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
678 * and (hopefully) checked permission - so allow OWNER_OVERRIDE 679 * and (hopefully) checked permission - so allow OWNER_OVERRIDE
679 * in case a chmod has now revoked permission. 680 * in case a chmod has now revoked permission.
680 */ 681 */
681 err = fh_verify(rqstp, fhp, type, access | MAY_OWNER_OVERRIDE); 682 err = fh_verify(rqstp, fhp, type, access | NFSD_MAY_OWNER_OVERRIDE);
682 if (err) 683 if (err)
683 goto out; 684 goto out;
684 685
@@ -689,7 +690,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
689 * or any access when mandatory locking enabled 690 * or any access when mandatory locking enabled
690 */ 691 */
691 err = nfserr_perm; 692 err = nfserr_perm;
692 if (IS_APPEND(inode) && (access & MAY_WRITE)) 693 if (IS_APPEND(inode) && (access & NFSD_MAY_WRITE))
693 goto out; 694 goto out;
694 /* 695 /*
695 * We must ignore files (but only files) which might have mandatory 696 * We must ignore files (but only files) which might have mandatory
@@ -706,14 +707,14 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
706 * Check to see if there are any leases on this file. 707 * Check to see if there are any leases on this file.
707 * This may block while leases are broken. 708 * This may block while leases are broken.
708 */ 709 */
709 host_err = break_lease(inode, O_NONBLOCK | ((access & MAY_WRITE) ? FMODE_WRITE : 0)); 710 host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? FMODE_WRITE : 0));
710 if (host_err == -EWOULDBLOCK) 711 if (host_err == -EWOULDBLOCK)
711 host_err = -ETIMEDOUT; 712 host_err = -ETIMEDOUT;
712 if (host_err) /* NOMEM or WOULDBLOCK */ 713 if (host_err) /* NOMEM or WOULDBLOCK */
713 goto out_nfserr; 714 goto out_nfserr;
714 715
715 if (access & MAY_WRITE) { 716 if (access & NFSD_MAY_WRITE) {
716 if (access & MAY_READ) 717 if (access & NFSD_MAY_READ)
717 flags = O_RDWR|O_LARGEFILE; 718 flags = O_RDWR|O_LARGEFILE;
718 else 719 else
719 flags = O_WRONLY|O_LARGEFILE; 720 flags = O_WRONLY|O_LARGEFILE;
@@ -1069,12 +1070,12 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1069 1070
1070 if (file) { 1071 if (file) {
1071 err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, 1072 err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
1072 MAY_READ|MAY_OWNER_OVERRIDE); 1073 NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE);
1073 if (err) 1074 if (err)
1074 goto out; 1075 goto out;
1075 err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); 1076 err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
1076 } else { 1077 } else {
1077 err = nfsd_open(rqstp, fhp, S_IFREG, MAY_READ, &file); 1078 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
1078 if (err) 1079 if (err)
1079 goto out; 1080 goto out;
1080 err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); 1081 err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
@@ -1098,13 +1099,13 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1098 1099
1099 if (file) { 1100 if (file) {
1100 err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, 1101 err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
1101 MAY_WRITE|MAY_OWNER_OVERRIDE); 1102 NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE);
1102 if (err) 1103 if (err)
1103 goto out; 1104 goto out;
1104 err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, 1105 err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
1105 stablep); 1106 stablep);
1106 } else { 1107 } else {
1107 err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file); 1108 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
1108 if (err) 1109 if (err)
1109 goto out; 1110 goto out;
1110 1111
@@ -1136,7 +1137,8 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
1136 if ((u64)count > ~(u64)offset) 1137 if ((u64)count > ~(u64)offset)
1137 return nfserr_inval; 1138 return nfserr_inval;
1138 1139
1139 if ((err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file)) != 0) 1140 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
1141 if (err)
1140 return err; 1142 return err;
1141 if (EX_ISSYNC(fhp->fh_export)) { 1143 if (EX_ISSYNC(fhp->fh_export)) {
1142 if (file->f_op && file->f_op->fsync) { 1144 if (file->f_op && file->f_op->fsync) {
@@ -1197,7 +1199,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1197 if (isdotent(fname, flen)) 1199 if (isdotent(fname, flen))
1198 goto out; 1200 goto out;
1199 1201
1200 err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); 1202 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
1201 if (err) 1203 if (err)
1202 goto out; 1204 goto out;
1203 1205
@@ -1248,36 +1250,34 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1248 iap->ia_mode = 0; 1250 iap->ia_mode = 0;
1249 iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type; 1251 iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type;
1250 1252
1253 err = nfserr_inval;
1254 if (!S_ISREG(type) && !S_ISDIR(type) && !special_file(type)) {
1255 printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
1256 type);
1257 goto out;
1258 }
1259
1260 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1261 if (host_err)
1262 goto out_nfserr;
1263
1251 /* 1264 /*
1252 * Get the dir op function pointer. 1265 * Get the dir op function pointer.
1253 */ 1266 */
1254 err = 0; 1267 err = 0;
1255 switch (type) { 1268 switch (type) {
1256 case S_IFREG: 1269 case S_IFREG:
1257 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1258 if (host_err)
1259 goto out_nfserr;
1260 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); 1270 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
1261 break; 1271 break;
1262 case S_IFDIR: 1272 case S_IFDIR:
1263 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1264 if (host_err)
1265 goto out_nfserr;
1266 host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); 1273 host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
1267 break; 1274 break;
1268 case S_IFCHR: 1275 case S_IFCHR:
1269 case S_IFBLK: 1276 case S_IFBLK:
1270 case S_IFIFO: 1277 case S_IFIFO:
1271 case S_IFSOCK: 1278 case S_IFSOCK:
1272 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1273 if (host_err)
1274 goto out_nfserr;
1275 host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); 1279 host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
1276 break; 1280 break;
1277 default:
1278 printk("nfsd: bad file type %o in nfsd_create\n", type);
1279 host_err = -EINVAL;
1280 goto out_nfserr;
1281 } 1281 }
1282 if (host_err < 0) { 1282 if (host_err < 0) {
1283 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1283 mnt_drop_write(fhp->fh_export->ex_path.mnt);
@@ -1289,7 +1289,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1289 write_inode_now(dchild->d_inode, 1); 1289 write_inode_now(dchild->d_inode, 1);
1290 } 1290 }
1291 1291
1292
1293 err2 = nfsd_create_setattr(rqstp, resfhp, iap); 1292 err2 = nfsd_create_setattr(rqstp, resfhp, iap);
1294 if (err2) 1293 if (err2)
1295 err = err2; 1294 err = err2;
@@ -1334,7 +1333,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1334 goto out; 1333 goto out;
1335 if (!(iap->ia_valid & ATTR_MODE)) 1334 if (!(iap->ia_valid & ATTR_MODE))
1336 iap->ia_mode = 0; 1335 iap->ia_mode = 0;
1337 err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); 1336 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
1338 if (err) 1337 if (err)
1339 goto out; 1338 goto out;
1340 1339
@@ -1471,7 +1470,7 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
1471 __be32 err; 1470 __be32 err;
1472 int host_err; 1471 int host_err;
1473 1472
1474 err = fh_verify(rqstp, fhp, S_IFLNK, MAY_NOP); 1473 err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP);
1475 if (err) 1474 if (err)
1476 goto out; 1475 goto out;
1477 1476
@@ -1526,7 +1525,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1526 if (isdotent(fname, flen)) 1525 if (isdotent(fname, flen))
1527 goto out; 1526 goto out;
1528 1527
1529 err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); 1528 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
1530 if (err) 1529 if (err)
1531 goto out; 1530 goto out;
1532 fh_lock(fhp); 1531 fh_lock(fhp);
@@ -1591,10 +1590,10 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1591 __be32 err; 1590 __be32 err;
1592 int host_err; 1591 int host_err;
1593 1592
1594 err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_CREATE); 1593 err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE);
1595 if (err) 1594 if (err)
1596 goto out; 1595 goto out;
1597 err = fh_verify(rqstp, tfhp, -S_IFDIR, MAY_NOP); 1596 err = fh_verify(rqstp, tfhp, -S_IFDIR, NFSD_MAY_NOP);
1598 if (err) 1597 if (err)
1599 goto out; 1598 goto out;
1600 1599
@@ -1661,10 +1660,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1661 __be32 err; 1660 __be32 err;
1662 int host_err; 1661 int host_err;
1663 1662
1664 err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_REMOVE); 1663 err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
1665 if (err) 1664 if (err)
1666 goto out; 1665 goto out;
1667 err = fh_verify(rqstp, tfhp, S_IFDIR, MAY_CREATE); 1666 err = fh_verify(rqstp, tfhp, S_IFDIR, NFSD_MAY_CREATE);
1668 if (err) 1667 if (err)
1669 goto out; 1668 goto out;
1670 1669
@@ -1768,7 +1767,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1768 err = nfserr_acces; 1767 err = nfserr_acces;
1769 if (!flen || isdotent(fname, flen)) 1768 if (!flen || isdotent(fname, flen))
1770 goto out; 1769 goto out;
1771 err = fh_verify(rqstp, fhp, S_IFDIR, MAY_REMOVE); 1770 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_REMOVE);
1772 if (err) 1771 if (err)
1773 goto out; 1772 goto out;
1774 1773
@@ -1834,7 +1833,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
1834 struct file *file; 1833 struct file *file;
1835 loff_t offset = *offsetp; 1834 loff_t offset = *offsetp;
1836 1835
1837 err = nfsd_open(rqstp, fhp, S_IFDIR, MAY_READ, &file); 1836 err = nfsd_open(rqstp, fhp, S_IFDIR, NFSD_MAY_READ, &file);
1838 if (err) 1837 if (err)
1839 goto out; 1838 goto out;
1840 1839
@@ -1875,7 +1874,7 @@ out:
1875__be32 1874__be32
1876nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat) 1875nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat)
1877{ 1876{
1878 __be32 err = fh_verify(rqstp, fhp, 0, MAY_NOP); 1877 __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
1879 if (!err && vfs_statfs(fhp->fh_dentry,stat)) 1878 if (!err && vfs_statfs(fhp->fh_dentry,stat))
1880 err = nfserr_io; 1879 err = nfserr_io;
1881 return err; 1880 return err;
@@ -1896,18 +1895,18 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
1896 struct inode *inode = dentry->d_inode; 1895 struct inode *inode = dentry->d_inode;
1897 int err; 1896 int err;
1898 1897
1899 if (acc == MAY_NOP) 1898 if (acc == NFSD_MAY_NOP)
1900 return 0; 1899 return 0;
1901#if 0 1900#if 0
1902 dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n", 1901 dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n",
1903 acc, 1902 acc,
1904 (acc & MAY_READ)? " read" : "", 1903 (acc & NFSD_MAY_READ)? " read" : "",
1905 (acc & MAY_WRITE)? " write" : "", 1904 (acc & NFSD_MAY_WRITE)? " write" : "",
1906 (acc & MAY_EXEC)? " exec" : "", 1905 (acc & NFSD_MAY_EXEC)? " exec" : "",
1907 (acc & MAY_SATTR)? " sattr" : "", 1906 (acc & NFSD_MAY_SATTR)? " sattr" : "",
1908 (acc & MAY_TRUNC)? " trunc" : "", 1907 (acc & NFSD_MAY_TRUNC)? " trunc" : "",
1909 (acc & MAY_LOCK)? " lock" : "", 1908 (acc & NFSD_MAY_LOCK)? " lock" : "",
1910 (acc & MAY_OWNER_OVERRIDE)? " owneroverride" : "", 1909 (acc & NFSD_MAY_OWNER_OVERRIDE)? " owneroverride" : "",
1911 inode->i_mode, 1910 inode->i_mode,
1912 IS_IMMUTABLE(inode)? " immut" : "", 1911 IS_IMMUTABLE(inode)? " immut" : "",
1913 IS_APPEND(inode)? " append" : "", 1912 IS_APPEND(inode)? " append" : "",
@@ -1920,18 +1919,18 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
1920 * system. But if it is IRIX doing check on write-access for a 1919 * system. But if it is IRIX doing check on write-access for a
1921 * device special file, we ignore rofs. 1920 * device special file, we ignore rofs.
1922 */ 1921 */
1923 if (!(acc & MAY_LOCAL_ACCESS)) 1922 if (!(acc & NFSD_MAY_LOCAL_ACCESS))
1924 if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { 1923 if (acc & (NFSD_MAY_WRITE | NFSD_MAY_SATTR | NFSD_MAY_TRUNC)) {
1925 if (exp_rdonly(rqstp, exp) || 1924 if (exp_rdonly(rqstp, exp) ||
1926 __mnt_is_readonly(exp->ex_path.mnt)) 1925 __mnt_is_readonly(exp->ex_path.mnt))
1927 return nfserr_rofs; 1926 return nfserr_rofs;
1928 if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) 1927 if (/* (acc & NFSD_MAY_WRITE) && */ IS_IMMUTABLE(inode))
1929 return nfserr_perm; 1928 return nfserr_perm;
1930 } 1929 }
1931 if ((acc & MAY_TRUNC) && IS_APPEND(inode)) 1930 if ((acc & NFSD_MAY_TRUNC) && IS_APPEND(inode))
1932 return nfserr_perm; 1931 return nfserr_perm;
1933 1932
1934 if (acc & MAY_LOCK) { 1933 if (acc & NFSD_MAY_LOCK) {
1935 /* If we cannot rely on authentication in NLM requests, 1934 /* If we cannot rely on authentication in NLM requests,
1936 * just allow locks, otherwise require read permission, or 1935 * just allow locks, otherwise require read permission, or
1937 * ownership 1936 * ownership
@@ -1939,7 +1938,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
1939 if (exp->ex_flags & NFSEXP_NOAUTHNLM) 1938 if (exp->ex_flags & NFSEXP_NOAUTHNLM)
1940 return 0; 1939 return 0;
1941 else 1940 else
1942 acc = MAY_READ | MAY_OWNER_OVERRIDE; 1941 acc = NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE;
1943 } 1942 }
1944 /* 1943 /*
1945 * The file owner always gets access permission for accesses that 1944 * The file owner always gets access permission for accesses that
@@ -1955,15 +1954,16 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
1955 * We must trust the client to do permission checking - using "ACCESS" 1954 * We must trust the client to do permission checking - using "ACCESS"
1956 * with NFSv3. 1955 * with NFSv3.
1957 */ 1956 */
1958 if ((acc & MAY_OWNER_OVERRIDE) && 1957 if ((acc & NFSD_MAY_OWNER_OVERRIDE) &&
1959 inode->i_uid == current->fsuid) 1958 inode->i_uid == current->fsuid)
1960 return 0; 1959 return 0;
1961 1960
1961 /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
1962 err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL); 1962 err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL);
1963 1963
1964 /* Allow read access to binaries even when mode 111 */ 1964 /* Allow read access to binaries even when mode 111 */
1965 if (err == -EACCES && S_ISREG(inode->i_mode) && 1965 if (err == -EACCES && S_ISREG(inode->i_mode) &&
1966 acc == (MAY_READ | MAY_OWNER_OVERRIDE)) 1966 acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE))
1967 err = permission(inode, MAY_EXEC, NULL); 1967 err = permission(inode, MAY_EXEC, NULL);
1968 1968
1969 return err? nfserrno(err) : 0; 1969 return err? nfserrno(err) : 0;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c6455dadb21b..9c2ac5c0ef5c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -918,12 +918,12 @@ struct file_lock {
918 struct list_head fl_link; /* doubly linked list of all locks */ 918 struct list_head fl_link; /* doubly linked list of all locks */
919 struct list_head fl_block; /* circular list of blocked processes */ 919 struct list_head fl_block; /* circular list of blocked processes */
920 fl_owner_t fl_owner; 920 fl_owner_t fl_owner;
921 unsigned char fl_flags;
922 unsigned char fl_type;
921 unsigned int fl_pid; 923 unsigned int fl_pid;
922 struct pid *fl_nspid; 924 struct pid *fl_nspid;
923 wait_queue_head_t fl_wait; 925 wait_queue_head_t fl_wait;
924 struct file *fl_file; 926 struct file *fl_file;
925 unsigned char fl_flags;
926 unsigned char fl_type;
927 loff_t fl_start; 927 loff_t fl_start;
928 loff_t fl_end; 928 loff_t fl_end;
929 929
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 102d928f7206..dbb87ab282e8 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -200,10 +200,12 @@ typedef int (*nlm_host_match_fn_t)(void *cur, struct nlm_host *ref);
200 * Server-side lock handling 200 * Server-side lock handling
201 */ 201 */
202__be32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *, 202__be32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
203 struct nlm_lock *, int, struct nlm_cookie *); 203 struct nlm_host *, struct nlm_lock *, int,
204 struct nlm_cookie *);
204__be32 nlmsvc_unlock(struct nlm_file *, struct nlm_lock *); 205__be32 nlmsvc_unlock(struct nlm_file *, struct nlm_lock *);
205__be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *, 206__be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *,
206 struct nlm_lock *, struct nlm_lock *, struct nlm_cookie *); 207 struct nlm_host *, struct nlm_lock *,
208 struct nlm_lock *, struct nlm_cookie *);
207__be32 nlmsvc_cancel_blocked(struct nlm_file *, struct nlm_lock *); 209__be32 nlmsvc_cancel_blocked(struct nlm_file *, struct nlm_lock *);
208unsigned long nlmsvc_retry_blocked(void); 210unsigned long nlmsvc_retry_blocked(void);
209void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *, 211void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
@@ -224,7 +226,7 @@ void nlmsvc_invalidate_all(void);
224 * Cluster failover support 226 * Cluster failover support
225 */ 227 */
226int nlmsvc_unlock_all_by_sb(struct super_block *sb); 228int nlmsvc_unlock_all_by_sb(struct super_block *sb);
227int nlmsvc_unlock_all_by_ip(__be32 server_addr); 229int nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
228 230
229static inline struct inode *nlmsvc_file_inode(struct nlm_file *file) 231static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
230{ 232{
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 8726491de154..ea0366769484 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -65,9 +65,6 @@
65#define NFS4_ACE_SUCCESSFUL_ACCESS_ACE_FLAG 0x00000010 65#define NFS4_ACE_SUCCESSFUL_ACCESS_ACE_FLAG 0x00000010
66#define NFS4_ACE_FAILED_ACCESS_ACE_FLAG 0x00000020 66#define NFS4_ACE_FAILED_ACCESS_ACE_FLAG 0x00000020
67#define NFS4_ACE_IDENTIFIER_GROUP 0x00000040 67#define NFS4_ACE_IDENTIFIER_GROUP 0x00000040
68#define NFS4_ACE_OWNER 0x00000080
69#define NFS4_ACE_GROUP 0x00000100
70#define NFS4_ACE_EVERYONE 0x00000200
71 68
72#define NFS4_ACE_READ_DATA 0x00000001 69#define NFS4_ACE_READ_DATA 0x00000001
73#define NFS4_ACE_LIST_DIRECTORY 0x00000001 70#define NFS4_ACE_LIST_DIRECTORY 0x00000001
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 41d30c9c9de6..a2861d95ecc3 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -28,20 +28,20 @@
28#define NFSD_SUPPORTED_MINOR_VERSION 0 28#define NFSD_SUPPORTED_MINOR_VERSION 0
29 29
30/* 30/*
31 * Special flags for nfsd_permission. These must be different from MAY_READ, 31 * Flags for nfsd_permission
32 * MAY_WRITE, and MAY_EXEC.
33 */ 32 */
34#define MAY_NOP 0 33#define NFSD_MAY_NOP 0
35#define MAY_SATTR 8 34#define NFSD_MAY_EXEC 1 /* == MAY_EXEC */
36#define MAY_TRUNC 16 35#define NFSD_MAY_WRITE 2 /* == MAY_WRITE */
37#define MAY_LOCK 32 36#define NFSD_MAY_READ 4 /* == MAY_READ */
38#define MAY_OWNER_OVERRIDE 64 37#define NFSD_MAY_SATTR 8
39#define MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/ 38#define NFSD_MAY_TRUNC 16
40#if (MAY_SATTR | MAY_TRUNC | MAY_LOCK | MAY_OWNER_OVERRIDE | MAY_LOCAL_ACCESS) & (MAY_READ | MAY_WRITE | MAY_EXEC) 39#define NFSD_MAY_LOCK 32
41# error "please use a different value for MAY_SATTR or MAY_TRUNC or MAY_LOCK or MAY_LOCAL_ACCESS or MAY_OWNER_OVERRIDE." 40#define NFSD_MAY_OWNER_OVERRIDE 64
42#endif 41#define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/
43#define MAY_CREATE (MAY_EXEC|MAY_WRITE) 42
44#define MAY_REMOVE (MAY_EXEC|MAY_WRITE|MAY_TRUNC) 43#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
44#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
45 45
46/* 46/*
47 * Callback function for readdir 47 * Callback function for readdir
@@ -54,6 +54,7 @@ typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int);
54extern struct svc_program nfsd_program; 54extern struct svc_program nfsd_program;
55extern struct svc_version nfsd_version2, nfsd_version3, 55extern struct svc_version nfsd_version2, nfsd_version3,
56 nfsd_version4; 56 nfsd_version4;
57extern struct mutex nfsd_mutex;
57extern struct svc_serv *nfsd_serv; 58extern struct svc_serv *nfsd_serv;
58 59
59extern struct seq_operations nfs_exports_op; 60extern struct seq_operations nfs_exports_op;
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index db348f749376..d0fe2e378452 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -98,8 +98,6 @@ struct nfs4_callback {
98 u32 cb_ident; 98 u32 cb_ident;
99 /* RPC client info */ 99 /* RPC client info */
100 atomic_t cb_set; /* successful CB_NULL call */ 100 atomic_t cb_set; /* successful CB_NULL call */
101 struct rpc_program cb_program;
102 struct rpc_stat cb_stat;
103 struct rpc_clnt * cb_client; 101 struct rpc_clnt * cb_client;
104}; 102};
105 103
diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index a10f1fb0bf7c..e7bbdba474d5 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -51,6 +51,9 @@ struct krb5_ctx {
51 51
52extern spinlock_t krb5_seq_lock; 52extern spinlock_t krb5_seq_lock;
53 53
54/* The length of the Kerberos GSS token header */
55#define GSS_KRB5_TOK_HDR_LEN (16)
56
54#define KG_TOK_MIC_MSG 0x0101 57#define KG_TOK_MIC_MSG 0x0101
55#define KG_TOK_WRAP_MSG 0x0201 58#define KG_TOK_WRAP_MSG 0x0201
56 59
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 4b54c5fdcfd9..dc69068d94c7 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -22,7 +22,7 @@
22/* 22/*
23 * This is the RPC server thread function prototype 23 * This is the RPC server thread function prototype
24 */ 24 */
25typedef void (*svc_thread_fn)(struct svc_rqst *); 25typedef int (*svc_thread_fn)(void *);
26 26
27/* 27/*
28 * 28 *
@@ -80,7 +80,6 @@ struct svc_serv {
80 struct module * sv_module; /* optional module to count when 80 struct module * sv_module; /* optional module to count when
81 * adding threads */ 81 * adding threads */
82 svc_thread_fn sv_function; /* main function for threads */ 82 svc_thread_fn sv_function; /* main function for threads */
83 int sv_kill_signal; /* signal to kill threads */
84}; 83};
85 84
86/* 85/*
@@ -388,8 +387,8 @@ struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
388 struct svc_pool *pool); 387 struct svc_pool *pool);
389void svc_exit_thread(struct svc_rqst *); 388void svc_exit_thread(struct svc_rqst *);
390struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, 389struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
391 void (*shutdown)(struct svc_serv*), 390 void (*shutdown)(struct svc_serv*), svc_thread_fn,
392 svc_thread_fn, int sig, struct module *); 391 struct module *);
393int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); 392int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
394void svc_destroy(struct svc_serv *); 393void svc_destroy(struct svc_serv *);
395int svc_process(struct svc_rqst *); 394int svc_process(struct svc_rqst *);
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 05eb4664d0dd..ef2e3a20bf3b 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -72,7 +72,7 @@ extern atomic_t rdma_stat_sq_prod;
72 */ 72 */
73struct svc_rdma_op_ctxt { 73struct svc_rdma_op_ctxt {
74 struct svc_rdma_op_ctxt *read_hdr; 74 struct svc_rdma_op_ctxt *read_hdr;
75 struct list_head free_list; 75 int hdr_count;
76 struct xdr_buf arg; 76 struct xdr_buf arg;
77 struct list_head dto_q; 77 struct list_head dto_q;
78 enum ib_wr_opcode wr_op; 78 enum ib_wr_opcode wr_op;
@@ -86,6 +86,31 @@ struct svc_rdma_op_ctxt {
86 struct page *pages[RPCSVC_MAXPAGES]; 86 struct page *pages[RPCSVC_MAXPAGES];
87}; 87};
88 88
89/*
90 * NFS_ requests are mapped on the client side by the chunk lists in
91 * the RPCRDMA header. During the fetching of the RPC from the client
92 * and the writing of the reply to the client, the memory in the
93 * client and the memory in the server must be mapped as contiguous
94 * vaddr/len for access by the hardware. These data strucures keep
95 * these mappings.
96 *
97 * For an RDMA_WRITE, the 'sge' maps the RPC REPLY. For RDMA_READ, the
98 * 'sge' in the svc_rdma_req_map maps the server side RPC reply and the
99 * 'ch' field maps the read-list of the RPCRDMA header to the 'sge'
100 * mapping of the reply.
101 */
102struct svc_rdma_chunk_sge {
103 int start; /* sge no for this chunk */
104 int count; /* sge count for this chunk */
105};
106struct svc_rdma_req_map {
107 unsigned long count;
108 union {
109 struct kvec sge[RPCSVC_MAXPAGES];
110 struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES];
111 };
112};
113
89#define RDMACTXT_F_LAST_CTXT 2 114#define RDMACTXT_F_LAST_CTXT 2
90 115
91struct svcxprt_rdma { 116struct svcxprt_rdma {
@@ -93,7 +118,6 @@ struct svcxprt_rdma {
93 struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ 118 struct rdma_cm_id *sc_cm_id; /* RDMA connection id */
94 struct list_head sc_accept_q; /* Conn. waiting accept */ 119 struct list_head sc_accept_q; /* Conn. waiting accept */
95 int sc_ord; /* RDMA read limit */ 120 int sc_ord; /* RDMA read limit */
96 wait_queue_head_t sc_read_wait;
97 int sc_max_sge; 121 int sc_max_sge;
98 122
99 int sc_sq_depth; /* Depth of SQ */ 123 int sc_sq_depth; /* Depth of SQ */
@@ -104,12 +128,8 @@ struct svcxprt_rdma {
104 128
105 struct ib_pd *sc_pd; 129 struct ib_pd *sc_pd;
106 130
131 atomic_t sc_dma_used;
107 atomic_t sc_ctxt_used; 132 atomic_t sc_ctxt_used;
108 struct list_head sc_ctxt_free;
109 int sc_ctxt_cnt;
110 int sc_ctxt_bump;
111 int sc_ctxt_max;
112 spinlock_t sc_ctxt_lock;
113 struct list_head sc_rq_dto_q; 133 struct list_head sc_rq_dto_q;
114 spinlock_t sc_rq_dto_lock; 134 spinlock_t sc_rq_dto_lock;
115 struct ib_qp *sc_qp; 135 struct ib_qp *sc_qp;
@@ -173,6 +193,8 @@ extern int svc_rdma_post_recv(struct svcxprt_rdma *);
173extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); 193extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
174extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); 194extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
175extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); 195extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
196extern struct svc_rdma_req_map *svc_rdma_get_req_map(void);
197extern void svc_rdma_put_req_map(struct svc_rdma_req_map *);
176extern void svc_sq_reap(struct svcxprt_rdma *); 198extern void svc_sq_reap(struct svcxprt_rdma *);
177extern void svc_rq_reap(struct svcxprt_rdma *); 199extern void svc_rq_reap(struct svcxprt_rdma *);
178extern struct svc_xprt_class svc_rdma_class; 200extern struct svc_xprt_class svc_rdma_class;
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index f3431a7e33da..4de8bcf26fa7 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -5,12 +5,12 @@
5obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o 5obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
6 6
7auth_rpcgss-objs := auth_gss.o gss_generic_token.o \ 7auth_rpcgss-objs := auth_gss.o gss_generic_token.o \
8 gss_mech_switch.o svcauth_gss.o gss_krb5_crypto.o 8 gss_mech_switch.o svcauth_gss.o
9 9
10obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o 10obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
11 11
12rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ 12rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
13 gss_krb5_seqnum.o gss_krb5_wrap.o 13 gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o
14 14
15obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o 15obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
16 16
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 1d52308ca324..c93fca204558 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -83,8 +83,6 @@ out:
83 return ret; 83 return ret;
84} 84}
85 85
86EXPORT_SYMBOL(krb5_encrypt);
87
88u32 86u32
89krb5_decrypt( 87krb5_decrypt(
90 struct crypto_blkcipher *tfm, 88 struct crypto_blkcipher *tfm,
@@ -118,8 +116,6 @@ out:
118 return ret; 116 return ret;
119} 117}
120 118
121EXPORT_SYMBOL(krb5_decrypt);
122
123static int 119static int
124checksummer(struct scatterlist *sg, void *data) 120checksummer(struct scatterlist *sg, void *data)
125{ 121{
@@ -161,8 +157,6 @@ out:
161 return err ? GSS_S_FAILURE : 0; 157 return err ? GSS_S_FAILURE : 0;
162} 158}
163 159
164EXPORT_SYMBOL(make_checksum);
165
166struct encryptor_desc { 160struct encryptor_desc {
167 u8 iv[8]; /* XXX hard-coded blocksize */ 161 u8 iv[8]; /* XXX hard-coded blocksize */
168 struct blkcipher_desc desc; 162 struct blkcipher_desc desc;
@@ -262,8 +256,6 @@ gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
262 return ret; 256 return ret;
263} 257}
264 258
265EXPORT_SYMBOL(gss_encrypt_xdr_buf);
266
267struct decryptor_desc { 259struct decryptor_desc {
268 u8 iv[8]; /* XXX hard-coded blocksize */ 260 u8 iv[8]; /* XXX hard-coded blocksize */
269 struct blkcipher_desc desc; 261 struct blkcipher_desc desc;
@@ -334,5 +326,3 @@ gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
334 326
335 return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc); 327 return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
336} 328}
337
338EXPORT_SYMBOL(gss_decrypt_xdr_buf);
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 5f1d36dfbcf7..b8f42ef7178e 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -78,7 +78,7 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
78 struct krb5_ctx *ctx = gss_ctx->internal_ctx_id; 78 struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
79 char cksumdata[16]; 79 char cksumdata[16];
80 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; 80 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
81 unsigned char *ptr, *krb5_hdr, *msg_start; 81 unsigned char *ptr, *msg_start;
82 s32 now; 82 s32 now;
83 u32 seq_send; 83 u32 seq_send;
84 84
@@ -87,36 +87,36 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
87 87
88 now = get_seconds(); 88 now = get_seconds();
89 89
90 token->len = g_token_size(&ctx->mech_used, 24); 90 token->len = g_token_size(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8);
91 91
92 ptr = token->data; 92 ptr = token->data;
93 g_make_token_header(&ctx->mech_used, 24, &ptr); 93 g_make_token_header(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8, &ptr);
94 94
95 *ptr++ = (unsigned char) ((KG_TOK_MIC_MSG>>8)&0xff); 95 /* ptr now at header described in rfc 1964, section 1.2.1: */
96 *ptr++ = (unsigned char) (KG_TOK_MIC_MSG&0xff); 96 ptr[0] = (unsigned char) ((KG_TOK_MIC_MSG >> 8) & 0xff);
97 ptr[1] = (unsigned char) (KG_TOK_MIC_MSG & 0xff);
97 98
98 /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ 99 msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8;
99 krb5_hdr = ptr - 2;
100 msg_start = krb5_hdr + 24;
101 100
102 *(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5); 101 *(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5);
103 memset(krb5_hdr + 4, 0xff, 4); 102 memset(ptr + 4, 0xff, 4);
104 103
105 if (make_checksum("md5", krb5_hdr, 8, text, 0, &md5cksum)) 104 if (make_checksum("md5", ptr, 8, text, 0, &md5cksum))
106 return GSS_S_FAILURE; 105 return GSS_S_FAILURE;
107 106
108 if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, 107 if (krb5_encrypt(ctx->seq, NULL, md5cksum.data,
109 md5cksum.data, md5cksum.len)) 108 md5cksum.data, md5cksum.len))
110 return GSS_S_FAILURE; 109 return GSS_S_FAILURE;
111 110
112 memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - 8, 8); 111 memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8);
113 112
114 spin_lock(&krb5_seq_lock); 113 spin_lock(&krb5_seq_lock);
115 seq_send = ctx->seq_send++; 114 seq_send = ctx->seq_send++;
116 spin_unlock(&krb5_seq_lock); 115 spin_unlock(&krb5_seq_lock);
117 116
118 if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff, 117 if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
119 seq_send, krb5_hdr + 16, krb5_hdr + 8)) 118 seq_send, ptr + GSS_KRB5_TOK_HDR_LEN,
119 ptr + 8))
120 return GSS_S_FAILURE; 120 return GSS_S_FAILURE;
121 121
122 return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE; 122 return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index d91a5d004803..066ec73c84d6 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -92,30 +92,30 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
92 read_token->len)) 92 read_token->len))
93 return GSS_S_DEFECTIVE_TOKEN; 93 return GSS_S_DEFECTIVE_TOKEN;
94 94
95 if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) || 95 if ((ptr[0] != ((KG_TOK_MIC_MSG >> 8) & 0xff)) ||
96 (*ptr++ != ( KG_TOK_MIC_MSG &0xff)) ) 96 (ptr[1] != (KG_TOK_MIC_MSG & 0xff)))
97 return GSS_S_DEFECTIVE_TOKEN; 97 return GSS_S_DEFECTIVE_TOKEN;
98 98
99 /* XXX sanity-check bodysize?? */ 99 /* XXX sanity-check bodysize?? */
100 100
101 signalg = ptr[0] + (ptr[1] << 8); 101 signalg = ptr[2] + (ptr[3] << 8);
102 if (signalg != SGN_ALG_DES_MAC_MD5) 102 if (signalg != SGN_ALG_DES_MAC_MD5)
103 return GSS_S_DEFECTIVE_TOKEN; 103 return GSS_S_DEFECTIVE_TOKEN;
104 104
105 sealalg = ptr[2] + (ptr[3] << 8); 105 sealalg = ptr[4] + (ptr[5] << 8);
106 if (sealalg != SEAL_ALG_NONE) 106 if (sealalg != SEAL_ALG_NONE)
107 return GSS_S_DEFECTIVE_TOKEN; 107 return GSS_S_DEFECTIVE_TOKEN;
108 108
109 if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) 109 if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
110 return GSS_S_DEFECTIVE_TOKEN; 110 return GSS_S_DEFECTIVE_TOKEN;
111 111
112 if (make_checksum("md5", ptr - 2, 8, message_buffer, 0, &md5cksum)) 112 if (make_checksum("md5", ptr, 8, message_buffer, 0, &md5cksum))
113 return GSS_S_FAILURE; 113 return GSS_S_FAILURE;
114 114
115 if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, 16)) 115 if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, 16))
116 return GSS_S_FAILURE; 116 return GSS_S_FAILURE;
117 117
118 if (memcmp(md5cksum.data + 8, ptr + 14, 8)) 118 if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8))
119 return GSS_S_BAD_SIG; 119 return GSS_S_BAD_SIG;
120 120
121 /* it got through unscathed. Make sure the context is unexpired */ 121 /* it got through unscathed. Make sure the context is unexpired */
@@ -127,7 +127,7 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
127 127
128 /* do sequencing checks */ 128 /* do sequencing checks */
129 129
130 if (krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction, &seqnum)) 130 if (krb5_get_seq_num(ctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8, &direction, &seqnum))
131 return GSS_S_FAILURE; 131 return GSS_S_FAILURE;
132 132
133 if ((ctx->initiate && direction != 0xff) || 133 if ((ctx->initiate && direction != 0xff) ||
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index b00b1b426301..ae8e69b59c4c 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -87,8 +87,8 @@ out:
87 return 0; 87 return 0;
88} 88}
89 89
90static inline void 90static void
91make_confounder(char *p, int blocksize) 91make_confounder(char *p, u32 conflen)
92{ 92{
93 static u64 i = 0; 93 static u64 i = 0;
94 u64 *q = (u64 *)p; 94 u64 *q = (u64 *)p;
@@ -102,8 +102,22 @@ make_confounder(char *p, int blocksize)
102 * uniqueness would mean worrying about atomicity and rollover, and I 102 * uniqueness would mean worrying about atomicity and rollover, and I
103 * don't care enough. */ 103 * don't care enough. */
104 104
105 BUG_ON(blocksize != 8); 105 /* initialize to random value */
106 *q = i++; 106 if (i == 0) {
107 i = random32();
108 i = (i << 32) | random32();
109 }
110
111 switch (conflen) {
112 case 16:
113 *q++ = i++;
114 /* fall through */
115 case 8:
116 *q++ = i++;
117 break;
118 default:
119 BUG();
120 }
107} 121}
108 122
109/* Assumptions: the head and tail of inbuf are ours to play with. 123/* Assumptions: the head and tail of inbuf are ours to play with.
@@ -122,7 +136,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
122 char cksumdata[16]; 136 char cksumdata[16];
123 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; 137 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
124 int blocksize = 0, plainlen; 138 int blocksize = 0, plainlen;
125 unsigned char *ptr, *krb5_hdr, *msg_start; 139 unsigned char *ptr, *msg_start;
126 s32 now; 140 s32 now;
127 int headlen; 141 int headlen;
128 struct page **tmp_pages; 142 struct page **tmp_pages;
@@ -149,26 +163,26 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
149 buf->len += headlen; 163 buf->len += headlen;
150 BUG_ON((buf->len - offset - headlen) % blocksize); 164 BUG_ON((buf->len - offset - headlen) % blocksize);
151 165
152 g_make_token_header(&kctx->mech_used, 24 + plainlen, &ptr); 166 g_make_token_header(&kctx->mech_used,
167 GSS_KRB5_TOK_HDR_LEN + 8 + plainlen, &ptr);
153 168
154 169
155 *ptr++ = (unsigned char) ((KG_TOK_WRAP_MSG>>8)&0xff); 170 /* ptr now at header described in rfc 1964, section 1.2.1: */
156 *ptr++ = (unsigned char) (KG_TOK_WRAP_MSG&0xff); 171 ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff);
172 ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff);
157 173
158 /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ 174 msg_start = ptr + 24;
159 krb5_hdr = ptr - 2;
160 msg_start = krb5_hdr + 24;
161 175
162 *(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5); 176 *(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5);
163 memset(krb5_hdr + 4, 0xff, 4); 177 memset(ptr + 4, 0xff, 4);
164 *(__be16 *)(krb5_hdr + 4) = htons(SEAL_ALG_DES); 178 *(__be16 *)(ptr + 4) = htons(SEAL_ALG_DES);
165 179
166 make_confounder(msg_start, blocksize); 180 make_confounder(msg_start, blocksize);
167 181
168 /* XXXJBF: UGH!: */ 182 /* XXXJBF: UGH!: */
169 tmp_pages = buf->pages; 183 tmp_pages = buf->pages;
170 buf->pages = pages; 184 buf->pages = pages;
171 if (make_checksum("md5", krb5_hdr, 8, buf, 185 if (make_checksum("md5", ptr, 8, buf,
172 offset + headlen - blocksize, &md5cksum)) 186 offset + headlen - blocksize, &md5cksum))
173 return GSS_S_FAILURE; 187 return GSS_S_FAILURE;
174 buf->pages = tmp_pages; 188 buf->pages = tmp_pages;
@@ -176,7 +190,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
176 if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, 190 if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
177 md5cksum.data, md5cksum.len)) 191 md5cksum.data, md5cksum.len))
178 return GSS_S_FAILURE; 192 return GSS_S_FAILURE;
179 memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - 8, 8); 193 memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8);
180 194
181 spin_lock(&krb5_seq_lock); 195 spin_lock(&krb5_seq_lock);
182 seq_send = kctx->seq_send++; 196 seq_send = kctx->seq_send++;
@@ -185,7 +199,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
185 /* XXX would probably be more efficient to compute checksum 199 /* XXX would probably be more efficient to compute checksum
186 * and encrypt at the same time: */ 200 * and encrypt at the same time: */
187 if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff, 201 if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff,
188 seq_send, krb5_hdr + 16, krb5_hdr + 8))) 202 seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
189 return GSS_S_FAILURE; 203 return GSS_S_FAILURE;
190 204
191 if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize, 205 if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize,
@@ -219,38 +233,38 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
219 buf->len - offset)) 233 buf->len - offset))
220 return GSS_S_DEFECTIVE_TOKEN; 234 return GSS_S_DEFECTIVE_TOKEN;
221 235
222 if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) || 236 if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) ||
223 (*ptr++ != (KG_TOK_WRAP_MSG &0xff)) ) 237 (ptr[1] != (KG_TOK_WRAP_MSG & 0xff)))
224 return GSS_S_DEFECTIVE_TOKEN; 238 return GSS_S_DEFECTIVE_TOKEN;
225 239
226 /* XXX sanity-check bodysize?? */ 240 /* XXX sanity-check bodysize?? */
227 241
228 /* get the sign and seal algorithms */ 242 /* get the sign and seal algorithms */
229 243
230 signalg = ptr[0] + (ptr[1] << 8); 244 signalg = ptr[2] + (ptr[3] << 8);
231 if (signalg != SGN_ALG_DES_MAC_MD5) 245 if (signalg != SGN_ALG_DES_MAC_MD5)
232 return GSS_S_DEFECTIVE_TOKEN; 246 return GSS_S_DEFECTIVE_TOKEN;
233 247
234 sealalg = ptr[2] + (ptr[3] << 8); 248 sealalg = ptr[4] + (ptr[5] << 8);
235 if (sealalg != SEAL_ALG_DES) 249 if (sealalg != SEAL_ALG_DES)
236 return GSS_S_DEFECTIVE_TOKEN; 250 return GSS_S_DEFECTIVE_TOKEN;
237 251
238 if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) 252 if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
239 return GSS_S_DEFECTIVE_TOKEN; 253 return GSS_S_DEFECTIVE_TOKEN;
240 254
241 if (gss_decrypt_xdr_buf(kctx->enc, buf, 255 if (gss_decrypt_xdr_buf(kctx->enc, buf,
242 ptr + 22 - (unsigned char *)buf->head[0].iov_base)) 256 ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base))
243 return GSS_S_DEFECTIVE_TOKEN; 257 return GSS_S_DEFECTIVE_TOKEN;
244 258
245 if (make_checksum("md5", ptr - 2, 8, buf, 259 if (make_checksum("md5", ptr, 8, buf,
246 ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum)) 260 ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base, &md5cksum))
247 return GSS_S_FAILURE; 261 return GSS_S_FAILURE;
248 262
249 if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, 263 if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
250 md5cksum.data, md5cksum.len)) 264 md5cksum.data, md5cksum.len))
251 return GSS_S_FAILURE; 265 return GSS_S_FAILURE;
252 266
253 if (memcmp(md5cksum.data + 8, ptr + 14, 8)) 267 if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8))
254 return GSS_S_BAD_SIG; 268 return GSS_S_BAD_SIG;
255 269
256 /* it got through unscathed. Make sure the context is unexpired */ 270 /* it got through unscathed. Make sure the context is unexpired */
@@ -262,8 +276,8 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
262 276
263 /* do sequencing checks */ 277 /* do sequencing checks */
264 278
265 if (krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction, 279 if (krb5_get_seq_num(kctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
266 &seqnum)) 280 &direction, &seqnum))
267 return GSS_S_BAD_SIG; 281 return GSS_S_BAD_SIG;
268 282
269 if ((kctx->initiate && direction != 0xff) || 283 if ((kctx->initiate && direction != 0xff) ||
@@ -274,7 +288,7 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
274 * better to copy and encrypt at the same time. */ 288 * better to copy and encrypt at the same time. */
275 289
276 blocksize = crypto_blkcipher_blocksize(kctx->enc); 290 blocksize = crypto_blkcipher_blocksize(kctx->enc);
277 data_start = ptr + 22 + blocksize; 291 data_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8 + blocksize;
278 orig_start = buf->head[0].iov_base + offset; 292 orig_start = buf->head[0].iov_base + offset;
279 data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start; 293 data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
280 memmove(orig_start, data_start, data_len); 294 memmove(orig_start, data_start, data_len);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 01c7e311b904..5a32cb7c4bb4 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -18,6 +18,7 @@
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <linux/interrupt.h> 19#include <linux/interrupt.h>
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/kthread.h>
21 22
22#include <linux/sunrpc/types.h> 23#include <linux/sunrpc/types.h>
23#include <linux/sunrpc/xdr.h> 24#include <linux/sunrpc/xdr.h>
@@ -291,15 +292,14 @@ svc_pool_map_put(void)
291 292
292 293
293/* 294/*
294 * Set the current thread's cpus_allowed mask so that it 295 * Set the given thread's cpus_allowed mask so that it
295 * will only run on cpus in the given pool. 296 * will only run on cpus in the given pool.
296 *
297 * Returns 1 and fills in oldmask iff a cpumask was applied.
298 */ 297 */
299static inline int 298static inline void
300svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask) 299svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
301{ 300{
302 struct svc_pool_map *m = &svc_pool_map; 301 struct svc_pool_map *m = &svc_pool_map;
302 unsigned int node = m->pool_to[pidx];
303 303
304 /* 304 /*
305 * The caller checks for sv_nrpools > 1, which 305 * The caller checks for sv_nrpools > 1, which
@@ -307,26 +307,17 @@ svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
307 */ 307 */
308 BUG_ON(m->count == 0); 308 BUG_ON(m->count == 0);
309 309
310 switch (m->mode) 310 switch (m->mode) {
311 {
312 default:
313 return 0;
314 case SVC_POOL_PERCPU: 311 case SVC_POOL_PERCPU:
315 { 312 {
316 unsigned int cpu = m->pool_to[pidx]; 313 set_cpus_allowed_ptr(task, &cpumask_of_cpu(node));
317 314 break;
318 *oldmask = current->cpus_allowed;
319 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
320 return 1;
321 } 315 }
322 case SVC_POOL_PERNODE: 316 case SVC_POOL_PERNODE:
323 { 317 {
324 unsigned int node = m->pool_to[pidx];
325 node_to_cpumask_ptr(nodecpumask, node); 318 node_to_cpumask_ptr(nodecpumask, node);
326 319 set_cpus_allowed_ptr(task, nodecpumask);
327 *oldmask = current->cpus_allowed; 320 break;
328 set_cpus_allowed_ptr(current, nodecpumask);
329 return 1;
330 } 321 }
331 } 322 }
332} 323}
@@ -443,7 +434,7 @@ EXPORT_SYMBOL(svc_create);
443struct svc_serv * 434struct svc_serv *
444svc_create_pooled(struct svc_program *prog, unsigned int bufsize, 435svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
445 void (*shutdown)(struct svc_serv *serv), 436 void (*shutdown)(struct svc_serv *serv),
446 svc_thread_fn func, int sig, struct module *mod) 437 svc_thread_fn func, struct module *mod)
447{ 438{
448 struct svc_serv *serv; 439 struct svc_serv *serv;
449 unsigned int npools = svc_pool_map_get(); 440 unsigned int npools = svc_pool_map_get();
@@ -452,7 +443,6 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
452 443
453 if (serv != NULL) { 444 if (serv != NULL) {
454 serv->sv_function = func; 445 serv->sv_function = func;
455 serv->sv_kill_signal = sig;
456 serv->sv_module = mod; 446 serv->sv_module = mod;
457 } 447 }
458 448
@@ -461,7 +451,8 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
461EXPORT_SYMBOL(svc_create_pooled); 451EXPORT_SYMBOL(svc_create_pooled);
462 452
463/* 453/*
464 * Destroy an RPC service. Should be called with the BKL held 454 * Destroy an RPC service. Should be called with appropriate locking to
455 * protect the sv_nrthreads, sv_permsocks and sv_tempsocks.
465 */ 456 */
466void 457void
467svc_destroy(struct svc_serv *serv) 458svc_destroy(struct svc_serv *serv)
@@ -578,46 +569,6 @@ out_enomem:
578EXPORT_SYMBOL(svc_prepare_thread); 569EXPORT_SYMBOL(svc_prepare_thread);
579 570
580/* 571/*
581 * Create a thread in the given pool. Caller must hold BKL.
582 * On a NUMA or SMP machine, with a multi-pool serv, the thread
583 * will be restricted to run on the cpus belonging to the pool.
584 */
585static int
586__svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
587 struct svc_pool *pool)
588{
589 struct svc_rqst *rqstp;
590 int error = -ENOMEM;
591 int have_oldmask = 0;
592 cpumask_t uninitialized_var(oldmask);
593
594 rqstp = svc_prepare_thread(serv, pool);
595 if (IS_ERR(rqstp)) {
596 error = PTR_ERR(rqstp);
597 goto out;
598 }
599
600 if (serv->sv_nrpools > 1)
601 have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask);
602
603 error = kernel_thread((int (*)(void *)) func, rqstp, 0);
604
605 if (have_oldmask)
606 set_cpus_allowed(current, oldmask);
607
608 if (error < 0)
609 goto out_thread;
610 svc_sock_update_bufs(serv);
611 error = 0;
612out:
613 return error;
614
615out_thread:
616 svc_exit_thread(rqstp);
617 goto out;
618}
619
620/*
621 * Choose a pool in which to create a new thread, for svc_set_num_threads 572 * Choose a pool in which to create a new thread, for svc_set_num_threads
622 */ 573 */
623static inline struct svc_pool * 574static inline struct svc_pool *
@@ -674,7 +625,7 @@ found_pool:
674 * of threads the given number. If `pool' is non-NULL, applies 625 * of threads the given number. If `pool' is non-NULL, applies
675 * only to threads in that pool, otherwise round-robins between 626 * only to threads in that pool, otherwise round-robins between
676 * all pools. Must be called with a svc_get() reference and 627 * all pools. Must be called with a svc_get() reference and
677 * the BKL held. 628 * the BKL or another lock to protect access to svc_serv fields.
678 * 629 *
679 * Destroying threads relies on the service threads filling in 630 * Destroying threads relies on the service threads filling in
680 * rqstp->rq_task, which only the nfs ones do. Assumes the serv 631 * rqstp->rq_task, which only the nfs ones do. Assumes the serv
@@ -686,7 +637,9 @@ found_pool:
686int 637int
687svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) 638svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
688{ 639{
689 struct task_struct *victim; 640 struct svc_rqst *rqstp;
641 struct task_struct *task;
642 struct svc_pool *chosen_pool;
690 int error = 0; 643 int error = 0;
691 unsigned int state = serv->sv_nrthreads-1; 644 unsigned int state = serv->sv_nrthreads-1;
692 645
@@ -702,18 +655,34 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
702 /* create new threads */ 655 /* create new threads */
703 while (nrservs > 0) { 656 while (nrservs > 0) {
704 nrservs--; 657 nrservs--;
658 chosen_pool = choose_pool(serv, pool, &state);
659
660 rqstp = svc_prepare_thread(serv, chosen_pool);
661 if (IS_ERR(rqstp)) {
662 error = PTR_ERR(rqstp);
663 break;
664 }
665
705 __module_get(serv->sv_module); 666 __module_get(serv->sv_module);
706 error = __svc_create_thread(serv->sv_function, serv, 667 task = kthread_create(serv->sv_function, rqstp, serv->sv_name);
707 choose_pool(serv, pool, &state)); 668 if (IS_ERR(task)) {
708 if (error < 0) { 669 error = PTR_ERR(task);
709 module_put(serv->sv_module); 670 module_put(serv->sv_module);
671 svc_exit_thread(rqstp);
710 break; 672 break;
711 } 673 }
674
675 rqstp->rq_task = task;
676 if (serv->sv_nrpools > 1)
677 svc_pool_map_set_cpumask(task, chosen_pool->sp_id);
678
679 svc_sock_update_bufs(serv);
680 wake_up_process(task);
712 } 681 }
713 /* destroy old threads */ 682 /* destroy old threads */
714 while (nrservs < 0 && 683 while (nrservs < 0 &&
715 (victim = choose_victim(serv, pool, &state)) != NULL) { 684 (task = choose_victim(serv, pool, &state)) != NULL) {
716 send_sig(serv->sv_kill_signal, victim, 1); 685 send_sig(SIGINT, task, 1);
717 nrservs++; 686 nrservs++;
718 } 687 }
719 688
@@ -722,7 +691,8 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
722EXPORT_SYMBOL(svc_set_num_threads); 691EXPORT_SYMBOL(svc_set_num_threads);
723 692
724/* 693/*
725 * Called from a server thread as it's exiting. Caller must hold BKL. 694 * Called from a server thread as it's exiting. Caller must hold the BKL or
695 * the "service mutex", whichever is appropriate for the service.
726 */ 696 */
727void 697void
728svc_exit_thread(struct svc_rqst *rqstp) 698svc_exit_thread(struct svc_rqst *rqstp)
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 88c0ca20bb1e..87101177825b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -69,6 +69,10 @@ atomic_t rdma_stat_rq_prod;
69atomic_t rdma_stat_sq_poll; 69atomic_t rdma_stat_sq_poll;
70atomic_t rdma_stat_sq_prod; 70atomic_t rdma_stat_sq_prod;
71 71
72/* Temporary NFS request map and context caches */
73struct kmem_cache *svc_rdma_map_cachep;
74struct kmem_cache *svc_rdma_ctxt_cachep;
75
72/* 76/*
73 * This function implements reading and resetting an atomic_t stat 77 * This function implements reading and resetting an atomic_t stat
74 * variable through read/write to a proc file. Any write to the file 78 * variable through read/write to a proc file. Any write to the file
@@ -236,11 +240,14 @@ static ctl_table svcrdma_root_table[] = {
236void svc_rdma_cleanup(void) 240void svc_rdma_cleanup(void)
237{ 241{
238 dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n"); 242 dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
243 flush_scheduled_work();
239 if (svcrdma_table_header) { 244 if (svcrdma_table_header) {
240 unregister_sysctl_table(svcrdma_table_header); 245 unregister_sysctl_table(svcrdma_table_header);
241 svcrdma_table_header = NULL; 246 svcrdma_table_header = NULL;
242 } 247 }
243 svc_unreg_xprt_class(&svc_rdma_class); 248 svc_unreg_xprt_class(&svc_rdma_class);
249 kmem_cache_destroy(svc_rdma_map_cachep);
250 kmem_cache_destroy(svc_rdma_ctxt_cachep);
244} 251}
245 252
246int svc_rdma_init(void) 253int svc_rdma_init(void)
@@ -255,9 +262,37 @@ int svc_rdma_init(void)
255 svcrdma_table_header = 262 svcrdma_table_header =
256 register_sysctl_table(svcrdma_root_table); 263 register_sysctl_table(svcrdma_root_table);
257 264
265 /* Create the temporary map cache */
266 svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache",
267 sizeof(struct svc_rdma_req_map),
268 0,
269 SLAB_HWCACHE_ALIGN,
270 NULL);
271 if (!svc_rdma_map_cachep) {
272 printk(KERN_INFO "Could not allocate map cache.\n");
273 goto err0;
274 }
275
276 /* Create the temporary context cache */
277 svc_rdma_ctxt_cachep =
278 kmem_cache_create("svc_rdma_ctxt_cache",
279 sizeof(struct svc_rdma_op_ctxt),
280 0,
281 SLAB_HWCACHE_ALIGN,
282 NULL);
283 if (!svc_rdma_ctxt_cachep) {
284 printk(KERN_INFO "Could not allocate WR ctxt cache.\n");
285 goto err1;
286 }
287
258 /* Register RDMA with the SVC transport switch */ 288 /* Register RDMA with the SVC transport switch */
259 svc_reg_xprt_class(&svc_rdma_class); 289 svc_reg_xprt_class(&svc_rdma_class);
260 return 0; 290 return 0;
291 err1:
292 kmem_cache_destroy(svc_rdma_map_cachep);
293 err0:
294 unregister_sysctl_table(svcrdma_table_header);
295 return -ENOMEM;
261} 296}
262MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); 297MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
263MODULE_DESCRIPTION("SVC RDMA Transport"); 298MODULE_DESCRIPTION("SVC RDMA Transport");
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 06ab4841537b..b4b17f44cb29 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -112,11 +112,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
112 rqstp->rq_arg.tail[0].iov_len = 0; 112 rqstp->rq_arg.tail[0].iov_len = 0;
113} 113}
114 114
115struct chunk_sge {
116 int start; /* sge no for this chunk */
117 int count; /* sge count for this chunk */
118};
119
120/* Encode a read-chunk-list as an array of IB SGE 115/* Encode a read-chunk-list as an array of IB SGE
121 * 116 *
122 * Assumptions: 117 * Assumptions:
@@ -134,8 +129,8 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
134 struct svc_rqst *rqstp, 129 struct svc_rqst *rqstp,
135 struct svc_rdma_op_ctxt *head, 130 struct svc_rdma_op_ctxt *head,
136 struct rpcrdma_msg *rmsgp, 131 struct rpcrdma_msg *rmsgp,
137 struct ib_sge *sge, 132 struct svc_rdma_req_map *rpl_map,
138 struct chunk_sge *ch_sge_ary, 133 struct svc_rdma_req_map *chl_map,
139 int ch_count, 134 int ch_count,
140 int byte_count) 135 int byte_count)
141{ 136{
@@ -156,22 +151,18 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
156 head->arg.head[0] = rqstp->rq_arg.head[0]; 151 head->arg.head[0] = rqstp->rq_arg.head[0];
157 head->arg.tail[0] = rqstp->rq_arg.tail[0]; 152 head->arg.tail[0] = rqstp->rq_arg.tail[0];
158 head->arg.pages = &head->pages[head->count]; 153 head->arg.pages = &head->pages[head->count];
159 head->sge[0].length = head->count; /* save count of hdr pages */ 154 head->hdr_count = head->count; /* save count of hdr pages */
160 head->arg.page_base = 0; 155 head->arg.page_base = 0;
161 head->arg.page_len = ch_bytes; 156 head->arg.page_len = ch_bytes;
162 head->arg.len = rqstp->rq_arg.len + ch_bytes; 157 head->arg.len = rqstp->rq_arg.len + ch_bytes;
163 head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes; 158 head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes;
164 head->count++; 159 head->count++;
165 ch_sge_ary[0].start = 0; 160 chl_map->ch[0].start = 0;
166 while (byte_count) { 161 while (byte_count) {
162 rpl_map->sge[sge_no].iov_base =
163 page_address(rqstp->rq_arg.pages[page_no]) + page_off;
167 sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes); 164 sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes);
168 sge[sge_no].addr = 165 rpl_map->sge[sge_no].iov_len = sge_bytes;
169 ib_dma_map_page(xprt->sc_cm_id->device,
170 rqstp->rq_arg.pages[page_no],
171 page_off, sge_bytes,
172 DMA_FROM_DEVICE);
173 sge[sge_no].length = sge_bytes;
174 sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
175 /* 166 /*
176 * Don't bump head->count here because the same page 167 * Don't bump head->count here because the same page
177 * may be used by multiple SGE. 168 * may be used by multiple SGE.
@@ -187,11 +178,11 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
187 * SGE, move to the next SGE 178 * SGE, move to the next SGE
188 */ 179 */
189 if (ch_bytes == 0) { 180 if (ch_bytes == 0) {
190 ch_sge_ary[ch_no].count = 181 chl_map->ch[ch_no].count =
191 sge_no - ch_sge_ary[ch_no].start; 182 sge_no - chl_map->ch[ch_no].start;
192 ch_no++; 183 ch_no++;
193 ch++; 184 ch++;
194 ch_sge_ary[ch_no].start = sge_no; 185 chl_map->ch[ch_no].start = sge_no;
195 ch_bytes = ch->rc_target.rs_length; 186 ch_bytes = ch->rc_target.rs_length;
196 /* If bytes remaining account for next chunk */ 187 /* If bytes remaining account for next chunk */
197 if (byte_count) { 188 if (byte_count) {
@@ -220,18 +211,25 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
220 return sge_no; 211 return sge_no;
221} 212}
222 213
223static void rdma_set_ctxt_sge(struct svc_rdma_op_ctxt *ctxt, 214static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
224 struct ib_sge *sge, 215 struct svc_rdma_op_ctxt *ctxt,
216 struct kvec *vec,
225 u64 *sgl_offset, 217 u64 *sgl_offset,
226 int count) 218 int count)
227{ 219{
228 int i; 220 int i;
229 221
230 ctxt->count = count; 222 ctxt->count = count;
223 ctxt->direction = DMA_FROM_DEVICE;
231 for (i = 0; i < count; i++) { 224 for (i = 0; i < count; i++) {
232 ctxt->sge[i].addr = sge[i].addr; 225 atomic_inc(&xprt->sc_dma_used);
233 ctxt->sge[i].length = sge[i].length; 226 ctxt->sge[i].addr =
234 *sgl_offset = *sgl_offset + sge[i].length; 227 ib_dma_map_single(xprt->sc_cm_id->device,
228 vec[i].iov_base, vec[i].iov_len,
229 DMA_FROM_DEVICE);
230 ctxt->sge[i].length = vec[i].iov_len;
231 ctxt->sge[i].lkey = xprt->sc_phys_mr->lkey;
232 *sgl_offset = *sgl_offset + vec[i].iov_len;
235 } 233 }
236} 234}
237 235
@@ -282,34 +280,29 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
282 struct ib_send_wr read_wr; 280 struct ib_send_wr read_wr;
283 int err = 0; 281 int err = 0;
284 int ch_no; 282 int ch_no;
285 struct ib_sge *sge;
286 int ch_count; 283 int ch_count;
287 int byte_count; 284 int byte_count;
288 int sge_count; 285 int sge_count;
289 u64 sgl_offset; 286 u64 sgl_offset;
290 struct rpcrdma_read_chunk *ch; 287 struct rpcrdma_read_chunk *ch;
291 struct svc_rdma_op_ctxt *ctxt = NULL; 288 struct svc_rdma_op_ctxt *ctxt = NULL;
292 struct svc_rdma_op_ctxt *tmp_sge_ctxt; 289 struct svc_rdma_req_map *rpl_map;
293 struct svc_rdma_op_ctxt *tmp_ch_ctxt; 290 struct svc_rdma_req_map *chl_map;
294 struct chunk_sge *ch_sge_ary;
295 291
296 /* If no read list is present, return 0 */ 292 /* If no read list is present, return 0 */
297 ch = svc_rdma_get_read_chunk(rmsgp); 293 ch = svc_rdma_get_read_chunk(rmsgp);
298 if (!ch) 294 if (!ch)
299 return 0; 295 return 0;
300 296
301 /* Allocate temporary contexts to keep SGE */ 297 /* Allocate temporary reply and chunk maps */
302 BUG_ON(sizeof(struct ib_sge) < sizeof(struct chunk_sge)); 298 rpl_map = svc_rdma_get_req_map();
303 tmp_sge_ctxt = svc_rdma_get_context(xprt); 299 chl_map = svc_rdma_get_req_map();
304 sge = tmp_sge_ctxt->sge;
305 tmp_ch_ctxt = svc_rdma_get_context(xprt);
306 ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge;
307 300
308 svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); 301 svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
309 if (ch_count > RPCSVC_MAXPAGES) 302 if (ch_count > RPCSVC_MAXPAGES)
310 return -EINVAL; 303 return -EINVAL;
311 sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, 304 sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
312 sge, ch_sge_ary, 305 rpl_map, chl_map,
313 ch_count, byte_count); 306 ch_count, byte_count);
314 sgl_offset = 0; 307 sgl_offset = 0;
315 ch_no = 0; 308 ch_no = 0;
@@ -331,14 +324,15 @@ next_sge:
331 read_wr.wr.rdma.remote_addr = 324 read_wr.wr.rdma.remote_addr =
332 get_unaligned(&(ch->rc_target.rs_offset)) + 325 get_unaligned(&(ch->rc_target.rs_offset)) +
333 sgl_offset; 326 sgl_offset;
334 read_wr.sg_list = &sge[ch_sge_ary[ch_no].start]; 327 read_wr.sg_list = ctxt->sge;
335 read_wr.num_sge = 328 read_wr.num_sge =
336 rdma_read_max_sge(xprt, ch_sge_ary[ch_no].count); 329 rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
337 rdma_set_ctxt_sge(ctxt, &sge[ch_sge_ary[ch_no].start], 330 rdma_set_ctxt_sge(xprt, ctxt,
331 &rpl_map->sge[chl_map->ch[ch_no].start],
338 &sgl_offset, 332 &sgl_offset,
339 read_wr.num_sge); 333 read_wr.num_sge);
340 if (((ch+1)->rc_discrim == 0) && 334 if (((ch+1)->rc_discrim == 0) &&
341 (read_wr.num_sge == ch_sge_ary[ch_no].count)) { 335 (read_wr.num_sge == chl_map->ch[ch_no].count)) {
342 /* 336 /*
343 * Mark the last RDMA_READ with a bit to 337 * Mark the last RDMA_READ with a bit to
344 * indicate all RPC data has been fetched from 338 * indicate all RPC data has been fetched from
@@ -358,9 +352,9 @@ next_sge:
358 } 352 }
359 atomic_inc(&rdma_stat_read); 353 atomic_inc(&rdma_stat_read);
360 354
361 if (read_wr.num_sge < ch_sge_ary[ch_no].count) { 355 if (read_wr.num_sge < chl_map->ch[ch_no].count) {
362 ch_sge_ary[ch_no].count -= read_wr.num_sge; 356 chl_map->ch[ch_no].count -= read_wr.num_sge;
363 ch_sge_ary[ch_no].start += read_wr.num_sge; 357 chl_map->ch[ch_no].start += read_wr.num_sge;
364 goto next_sge; 358 goto next_sge;
365 } 359 }
366 sgl_offset = 0; 360 sgl_offset = 0;
@@ -368,8 +362,8 @@ next_sge:
368 } 362 }
369 363
370 out: 364 out:
371 svc_rdma_put_context(tmp_sge_ctxt, 0); 365 svc_rdma_put_req_map(rpl_map);
372 svc_rdma_put_context(tmp_ch_ctxt, 0); 366 svc_rdma_put_req_map(chl_map);
373 367
374 /* Detach arg pages. svc_recv will replenish them */ 368 /* Detach arg pages. svc_recv will replenish them */
375 for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++) 369 for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++)
@@ -399,7 +393,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
399 rqstp->rq_pages[page_no] = head->pages[page_no]; 393 rqstp->rq_pages[page_no] = head->pages[page_no];
400 } 394 }
401 /* Point rq_arg.pages past header */ 395 /* Point rq_arg.pages past header */
402 rqstp->rq_arg.pages = &rqstp->rq_pages[head->sge[0].length]; 396 rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
403 rqstp->rq_arg.page_len = head->arg.page_len; 397 rqstp->rq_arg.page_len = head->arg.page_len;
404 rqstp->rq_arg.page_base = head->arg.page_base; 398 rqstp->rq_arg.page_base = head->arg.page_base;
405 399
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index fb82b1b683f8..a19b22b452a3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -63,52 +63,44 @@
63 * SGE[2..sge_count-2] data from xdr->pages[] 63 * SGE[2..sge_count-2] data from xdr->pages[]
64 * SGE[sge_count-1] data from xdr->tail. 64 * SGE[sge_count-1] data from xdr->tail.
65 * 65 *
66 * The max SGE we need is the length of the XDR / pagesize + one for
67 * head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES
68 * reserves a page for both the request and the reply header, and this
69 * array is only concerned with the reply we are assured that we have
70 * on extra page for the RPCRMDA header.
66 */ 71 */
67static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt, 72static void xdr_to_sge(struct svcxprt_rdma *xprt,
68 struct xdr_buf *xdr, 73 struct xdr_buf *xdr,
69 struct ib_sge *sge, 74 struct svc_rdma_req_map *vec)
70 int *sge_count)
71{ 75{
72 /* Max we need is the length of the XDR / pagesize + one for
73 * head + one for tail + one for RPCRDMA header
74 */
75 int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; 76 int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
76 int sge_no; 77 int sge_no;
77 u32 byte_count = xdr->len;
78 u32 sge_bytes; 78 u32 sge_bytes;
79 u32 page_bytes; 79 u32 page_bytes;
80 int page_off; 80 u32 page_off;
81 int page_no; 81 int page_no;
82 82
83 BUG_ON(xdr->len !=
84 (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
85
83 /* Skip the first sge, this is for the RPCRDMA header */ 86 /* Skip the first sge, this is for the RPCRDMA header */
84 sge_no = 1; 87 sge_no = 1;
85 88
86 /* Head SGE */ 89 /* Head SGE */
87 sge[sge_no].addr = ib_dma_map_single(xprt->sc_cm_id->device, 90 vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
88 xdr->head[0].iov_base, 91 vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
89 xdr->head[0].iov_len,
90 DMA_TO_DEVICE);
91 sge_bytes = min_t(u32, byte_count, xdr->head[0].iov_len);
92 byte_count -= sge_bytes;
93 sge[sge_no].length = sge_bytes;
94 sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
95 sge_no++; 92 sge_no++;
96 93
97 /* pages SGE */ 94 /* pages SGE */
98 page_no = 0; 95 page_no = 0;
99 page_bytes = xdr->page_len; 96 page_bytes = xdr->page_len;
100 page_off = xdr->page_base; 97 page_off = xdr->page_base;
101 while (byte_count && page_bytes) { 98 while (page_bytes) {
102 sge_bytes = min_t(u32, byte_count, (PAGE_SIZE-page_off)); 99 vec->sge[sge_no].iov_base =
103 sge[sge_no].addr = 100 page_address(xdr->pages[page_no]) + page_off;
104 ib_dma_map_page(xprt->sc_cm_id->device, 101 sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
105 xdr->pages[page_no], page_off,
106 sge_bytes, DMA_TO_DEVICE);
107 sge_bytes = min(sge_bytes, page_bytes);
108 byte_count -= sge_bytes;
109 page_bytes -= sge_bytes; 102 page_bytes -= sge_bytes;
110 sge[sge_no].length = sge_bytes; 103 vec->sge[sge_no].iov_len = sge_bytes;
111 sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
112 104
113 sge_no++; 105 sge_no++;
114 page_no++; 106 page_no++;
@@ -116,36 +108,24 @@ static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt,
116 } 108 }
117 109
118 /* Tail SGE */ 110 /* Tail SGE */
119 if (byte_count && xdr->tail[0].iov_len) { 111 if (xdr->tail[0].iov_len) {
120 sge[sge_no].addr = 112 vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
121 ib_dma_map_single(xprt->sc_cm_id->device, 113 vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
122 xdr->tail[0].iov_base,
123 xdr->tail[0].iov_len,
124 DMA_TO_DEVICE);
125 sge_bytes = min_t(u32, byte_count, xdr->tail[0].iov_len);
126 byte_count -= sge_bytes;
127 sge[sge_no].length = sge_bytes;
128 sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
129 sge_no++; 114 sge_no++;
130 } 115 }
131 116
132 BUG_ON(sge_no > sge_max); 117 BUG_ON(sge_no > sge_max);
133 BUG_ON(byte_count != 0); 118 vec->count = sge_no;
134
135 *sge_count = sge_no;
136 return sge;
137} 119}
138 120
139
140/* Assumptions: 121/* Assumptions:
141 * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE 122 * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
142 */ 123 */
143static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, 124static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
144 u32 rmr, u64 to, 125 u32 rmr, u64 to,
145 u32 xdr_off, int write_len, 126 u32 xdr_off, int write_len,
146 struct ib_sge *xdr_sge, int sge_count) 127 struct svc_rdma_req_map *vec)
147{ 128{
148 struct svc_rdma_op_ctxt *tmp_sge_ctxt;
149 struct ib_send_wr write_wr; 129 struct ib_send_wr write_wr;
150 struct ib_sge *sge; 130 struct ib_sge *sge;
151 int xdr_sge_no; 131 int xdr_sge_no;
@@ -154,25 +134,23 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
154 int sge_off; 134 int sge_off;
155 int bc; 135 int bc;
156 struct svc_rdma_op_ctxt *ctxt; 136 struct svc_rdma_op_ctxt *ctxt;
157 int ret = 0;
158 137
159 BUG_ON(sge_count > RPCSVC_MAXPAGES); 138 BUG_ON(vec->count > RPCSVC_MAXPAGES);
160 dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " 139 dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
161 "write_len=%d, xdr_sge=%p, sge_count=%d\n", 140 "write_len=%d, vec->sge=%p, vec->count=%lu\n",
162 rmr, (unsigned long long)to, xdr_off, 141 rmr, (unsigned long long)to, xdr_off,
163 write_len, xdr_sge, sge_count); 142 write_len, vec->sge, vec->count);
164 143
165 ctxt = svc_rdma_get_context(xprt); 144 ctxt = svc_rdma_get_context(xprt);
166 ctxt->count = 0; 145 ctxt->direction = DMA_TO_DEVICE;
167 tmp_sge_ctxt = svc_rdma_get_context(xprt); 146 sge = ctxt->sge;
168 sge = tmp_sge_ctxt->sge;
169 147
170 /* Find the SGE associated with xdr_off */ 148 /* Find the SGE associated with xdr_off */
171 for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < sge_count; 149 for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < vec->count;
172 xdr_sge_no++) { 150 xdr_sge_no++) {
173 if (xdr_sge[xdr_sge_no].length > bc) 151 if (vec->sge[xdr_sge_no].iov_len > bc)
174 break; 152 break;
175 bc -= xdr_sge[xdr_sge_no].length; 153 bc -= vec->sge[xdr_sge_no].iov_len;
176 } 154 }
177 155
178 sge_off = bc; 156 sge_off = bc;
@@ -180,21 +158,28 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
180 sge_no = 0; 158 sge_no = 0;
181 159
182 /* Copy the remaining SGE */ 160 /* Copy the remaining SGE */
183 while (bc != 0 && xdr_sge_no < sge_count) { 161 while (bc != 0 && xdr_sge_no < vec->count) {
184 sge[sge_no].addr = xdr_sge[xdr_sge_no].addr + sge_off; 162 sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
185 sge[sge_no].lkey = xdr_sge[xdr_sge_no].lkey;
186 sge_bytes = min((size_t)bc, 163 sge_bytes = min((size_t)bc,
187 (size_t)(xdr_sge[xdr_sge_no].length-sge_off)); 164 (size_t)(vec->sge[xdr_sge_no].iov_len-sge_off));
188 sge[sge_no].length = sge_bytes; 165 sge[sge_no].length = sge_bytes;
189 166 atomic_inc(&xprt->sc_dma_used);
167 sge[sge_no].addr =
168 ib_dma_map_single(xprt->sc_cm_id->device,
169 (void *)
170 vec->sge[xdr_sge_no].iov_base + sge_off,
171 sge_bytes, DMA_TO_DEVICE);
172 if (dma_mapping_error(sge[sge_no].addr))
173 goto err;
190 sge_off = 0; 174 sge_off = 0;
191 sge_no++; 175 sge_no++;
176 ctxt->count++;
192 xdr_sge_no++; 177 xdr_sge_no++;
193 bc -= sge_bytes; 178 bc -= sge_bytes;
194 } 179 }
195 180
196 BUG_ON(bc != 0); 181 BUG_ON(bc != 0);
197 BUG_ON(xdr_sge_no > sge_count); 182 BUG_ON(xdr_sge_no > vec->count);
198 183
199 /* Prepare WRITE WR */ 184 /* Prepare WRITE WR */
200 memset(&write_wr, 0, sizeof write_wr); 185 memset(&write_wr, 0, sizeof write_wr);
@@ -209,21 +194,20 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
209 194
210 /* Post It */ 195 /* Post It */
211 atomic_inc(&rdma_stat_write); 196 atomic_inc(&rdma_stat_write);
212 if (svc_rdma_send(xprt, &write_wr)) { 197 if (svc_rdma_send(xprt, &write_wr))
213 svc_rdma_put_context(ctxt, 1); 198 goto err;
214 /* Fatal error, close transport */ 199 return 0;
215 ret = -EIO; 200 err:
216 } 201 svc_rdma_put_context(ctxt, 0);
217 svc_rdma_put_context(tmp_sge_ctxt, 0); 202 /* Fatal error, close transport */
218 return ret; 203 return -EIO;
219} 204}
220 205
221static int send_write_chunks(struct svcxprt_rdma *xprt, 206static int send_write_chunks(struct svcxprt_rdma *xprt,
222 struct rpcrdma_msg *rdma_argp, 207 struct rpcrdma_msg *rdma_argp,
223 struct rpcrdma_msg *rdma_resp, 208 struct rpcrdma_msg *rdma_resp,
224 struct svc_rqst *rqstp, 209 struct svc_rqst *rqstp,
225 struct ib_sge *sge, 210 struct svc_rdma_req_map *vec)
226 int sge_count)
227{ 211{
228 u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; 212 u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
229 int write_len; 213 int write_len;
@@ -269,8 +253,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
269 rs_offset + chunk_off, 253 rs_offset + chunk_off,
270 xdr_off, 254 xdr_off,
271 this_write, 255 this_write,
272 sge, 256 vec);
273 sge_count);
274 if (ret) { 257 if (ret) {
275 dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", 258 dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
276 ret); 259 ret);
@@ -292,8 +275,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
292 struct rpcrdma_msg *rdma_argp, 275 struct rpcrdma_msg *rdma_argp,
293 struct rpcrdma_msg *rdma_resp, 276 struct rpcrdma_msg *rdma_resp,
294 struct svc_rqst *rqstp, 277 struct svc_rqst *rqstp,
295 struct ib_sge *sge, 278 struct svc_rdma_req_map *vec)
296 int sge_count)
297{ 279{
298 u32 xfer_len = rqstp->rq_res.len; 280 u32 xfer_len = rqstp->rq_res.len;
299 int write_len; 281 int write_len;
@@ -341,8 +323,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
341 rs_offset + chunk_off, 323 rs_offset + chunk_off,
342 xdr_off, 324 xdr_off,
343 this_write, 325 this_write,
344 sge, 326 vec);
345 sge_count);
346 if (ret) { 327 if (ret) {
347 dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", 328 dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
348 ret); 329 ret);
@@ -380,7 +361,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
380 struct page *page, 361 struct page *page,
381 struct rpcrdma_msg *rdma_resp, 362 struct rpcrdma_msg *rdma_resp,
382 struct svc_rdma_op_ctxt *ctxt, 363 struct svc_rdma_op_ctxt *ctxt,
383 int sge_count, 364 struct svc_rdma_req_map *vec,
384 int byte_count) 365 int byte_count)
385{ 366{
386 struct ib_send_wr send_wr; 367 struct ib_send_wr send_wr;
@@ -405,6 +386,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
405 ctxt->count = 1; 386 ctxt->count = 1;
406 387
407 /* Prepare the SGE for the RPCRDMA Header */ 388 /* Prepare the SGE for the RPCRDMA Header */
389 atomic_inc(&rdma->sc_dma_used);
408 ctxt->sge[0].addr = 390 ctxt->sge[0].addr =
409 ib_dma_map_page(rdma->sc_cm_id->device, 391 ib_dma_map_page(rdma->sc_cm_id->device,
410 page, 0, PAGE_SIZE, DMA_TO_DEVICE); 392 page, 0, PAGE_SIZE, DMA_TO_DEVICE);
@@ -413,10 +395,16 @@ static int send_reply(struct svcxprt_rdma *rdma,
413 ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey; 395 ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey;
414 396
415 /* Determine how many of our SGE are to be transmitted */ 397 /* Determine how many of our SGE are to be transmitted */
416 for (sge_no = 1; byte_count && sge_no < sge_count; sge_no++) { 398 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
417 sge_bytes = min((size_t)ctxt->sge[sge_no].length, 399 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
418 (size_t)byte_count);
419 byte_count -= sge_bytes; 400 byte_count -= sge_bytes;
401 atomic_inc(&rdma->sc_dma_used);
402 ctxt->sge[sge_no].addr =
403 ib_dma_map_single(rdma->sc_cm_id->device,
404 vec->sge[sge_no].iov_base,
405 sge_bytes, DMA_TO_DEVICE);
406 ctxt->sge[sge_no].length = sge_bytes;
407 ctxt->sge[sge_no].lkey = rdma->sc_phys_mr->lkey;
420 } 408 }
421 BUG_ON(byte_count != 0); 409 BUG_ON(byte_count != 0);
422 410
@@ -428,8 +416,10 @@ static int send_reply(struct svcxprt_rdma *rdma,
428 ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; 416 ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
429 ctxt->count++; 417 ctxt->count++;
430 rqstp->rq_respages[page_no] = NULL; 418 rqstp->rq_respages[page_no] = NULL;
419 /* If there are more pages than SGE, terminate SGE list */
420 if (page_no+1 >= sge_no)
421 ctxt->sge[page_no+1].length = 0;
431 } 422 }
432
433 BUG_ON(sge_no > rdma->sc_max_sge); 423 BUG_ON(sge_no > rdma->sc_max_sge);
434 memset(&send_wr, 0, sizeof send_wr); 424 memset(&send_wr, 0, sizeof send_wr);
435 ctxt->wr_op = IB_WR_SEND; 425 ctxt->wr_op = IB_WR_SEND;
@@ -473,20 +463,20 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
473 enum rpcrdma_proc reply_type; 463 enum rpcrdma_proc reply_type;
474 int ret; 464 int ret;
475 int inline_bytes; 465 int inline_bytes;
476 struct ib_sge *sge;
477 int sge_count = 0;
478 struct page *res_page; 466 struct page *res_page;
479 struct svc_rdma_op_ctxt *ctxt; 467 struct svc_rdma_op_ctxt *ctxt;
468 struct svc_rdma_req_map *vec;
480 469
481 dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); 470 dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
482 471
483 /* Get the RDMA request header. */ 472 /* Get the RDMA request header. */
484 rdma_argp = xdr_start(&rqstp->rq_arg); 473 rdma_argp = xdr_start(&rqstp->rq_arg);
485 474
486 /* Build an SGE for the XDR */ 475 /* Build an req vec for the XDR */
487 ctxt = svc_rdma_get_context(rdma); 476 ctxt = svc_rdma_get_context(rdma);
488 ctxt->direction = DMA_TO_DEVICE; 477 ctxt->direction = DMA_TO_DEVICE;
489 sge = xdr_to_sge(rdma, &rqstp->rq_res, ctxt->sge, &sge_count); 478 vec = svc_rdma_get_req_map();
479 xdr_to_sge(rdma, &rqstp->rq_res, vec);
490 480
491 inline_bytes = rqstp->rq_res.len; 481 inline_bytes = rqstp->rq_res.len;
492 482
@@ -503,7 +493,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
503 493
504 /* Send any write-chunk data and build resp write-list */ 494 /* Send any write-chunk data and build resp write-list */
505 ret = send_write_chunks(rdma, rdma_argp, rdma_resp, 495 ret = send_write_chunks(rdma, rdma_argp, rdma_resp,
506 rqstp, sge, sge_count); 496 rqstp, vec);
507 if (ret < 0) { 497 if (ret < 0) {
508 printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", 498 printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
509 ret); 499 ret);
@@ -513,7 +503,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
513 503
514 /* Send any reply-list data and update resp reply-list */ 504 /* Send any reply-list data and update resp reply-list */
515 ret = send_reply_chunks(rdma, rdma_argp, rdma_resp, 505 ret = send_reply_chunks(rdma, rdma_argp, rdma_resp,
516 rqstp, sge, sge_count); 506 rqstp, vec);
517 if (ret < 0) { 507 if (ret < 0) {
518 printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", 508 printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
519 ret); 509 ret);
@@ -521,11 +511,13 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
521 } 511 }
522 inline_bytes -= ret; 512 inline_bytes -= ret;
523 513
524 ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, sge_count, 514 ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
525 inline_bytes); 515 inline_bytes);
516 svc_rdma_put_req_map(vec);
526 dprintk("svcrdma: send_reply returns %d\n", ret); 517 dprintk("svcrdma: send_reply returns %d\n", ret);
527 return ret; 518 return ret;
528 error: 519 error:
520 svc_rdma_put_req_map(vec);
529 svc_rdma_put_context(ctxt, 0); 521 svc_rdma_put_context(ctxt, 0);
530 put_page(res_page); 522 put_page(res_page);
531 return ret; 523 return ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index e132509d1db0..19ddc382b777 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -84,70 +84,37 @@ struct svc_xprt_class svc_rdma_class = {
84 .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, 84 .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
85}; 85};
86 86
87static int rdma_bump_context_cache(struct svcxprt_rdma *xprt) 87/* WR context cache. Created in svc_rdma.c */
88extern struct kmem_cache *svc_rdma_ctxt_cachep;
89
90struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
88{ 91{
89 int target;
90 int at_least_one = 0;
91 struct svc_rdma_op_ctxt *ctxt; 92 struct svc_rdma_op_ctxt *ctxt;
92 93
93 target = min(xprt->sc_ctxt_cnt + xprt->sc_ctxt_bump, 94 while (1) {
94 xprt->sc_ctxt_max); 95 ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, GFP_KERNEL);
95 96 if (ctxt)
96 spin_lock_bh(&xprt->sc_ctxt_lock);
97 while (xprt->sc_ctxt_cnt < target) {
98 xprt->sc_ctxt_cnt++;
99 spin_unlock_bh(&xprt->sc_ctxt_lock);
100
101 ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
102
103 spin_lock_bh(&xprt->sc_ctxt_lock);
104 if (ctxt) {
105 at_least_one = 1;
106 INIT_LIST_HEAD(&ctxt->free_list);
107 list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
108 } else {
109 /* kmalloc failed...give up for now */
110 xprt->sc_ctxt_cnt--;
111 break; 97 break;
112 } 98 schedule_timeout_uninterruptible(msecs_to_jiffies(500));
113 } 99 }
114 spin_unlock_bh(&xprt->sc_ctxt_lock); 100 ctxt->xprt = xprt;
115 dprintk("svcrdma: sc_ctxt_max=%d, sc_ctxt_cnt=%d\n", 101 INIT_LIST_HEAD(&ctxt->dto_q);
116 xprt->sc_ctxt_max, xprt->sc_ctxt_cnt); 102 ctxt->count = 0;
117 return at_least_one; 103 atomic_inc(&xprt->sc_ctxt_used);
104 return ctxt;
118} 105}
119 106
120struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) 107static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
121{ 108{
122 struct svc_rdma_op_ctxt *ctxt; 109 struct svcxprt_rdma *xprt = ctxt->xprt;
123 110 int i;
124 while (1) { 111 for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
125 spin_lock_bh(&xprt->sc_ctxt_lock); 112 atomic_dec(&xprt->sc_dma_used);
126 if (unlikely(list_empty(&xprt->sc_ctxt_free))) { 113 ib_dma_unmap_single(xprt->sc_cm_id->device,
127 /* Try to bump my cache. */ 114 ctxt->sge[i].addr,
128 spin_unlock_bh(&xprt->sc_ctxt_lock); 115 ctxt->sge[i].length,
129 116 ctxt->direction);
130 if (rdma_bump_context_cache(xprt))
131 continue;
132
133 printk(KERN_INFO "svcrdma: sleeping waiting for "
134 "context memory on xprt=%p\n",
135 xprt);
136 schedule_timeout_uninterruptible(msecs_to_jiffies(500));
137 continue;
138 }
139 ctxt = list_entry(xprt->sc_ctxt_free.next,
140 struct svc_rdma_op_ctxt,
141 free_list);
142 list_del_init(&ctxt->free_list);
143 spin_unlock_bh(&xprt->sc_ctxt_lock);
144 ctxt->xprt = xprt;
145 INIT_LIST_HEAD(&ctxt->dto_q);
146 ctxt->count = 0;
147 atomic_inc(&xprt->sc_ctxt_used);
148 break;
149 } 117 }
150 return ctxt;
151} 118}
152 119
153void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) 120void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
@@ -161,18 +128,36 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
161 for (i = 0; i < ctxt->count; i++) 128 for (i = 0; i < ctxt->count; i++)
162 put_page(ctxt->pages[i]); 129 put_page(ctxt->pages[i]);
163 130
164 for (i = 0; i < ctxt->count; i++) 131 kmem_cache_free(svc_rdma_ctxt_cachep, ctxt);
165 ib_dma_unmap_single(xprt->sc_cm_id->device,
166 ctxt->sge[i].addr,
167 ctxt->sge[i].length,
168 ctxt->direction);
169
170 spin_lock_bh(&xprt->sc_ctxt_lock);
171 list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
172 spin_unlock_bh(&xprt->sc_ctxt_lock);
173 atomic_dec(&xprt->sc_ctxt_used); 132 atomic_dec(&xprt->sc_ctxt_used);
174} 133}
175 134
135/* Temporary NFS request map cache. Created in svc_rdma.c */
136extern struct kmem_cache *svc_rdma_map_cachep;
137
138/*
139 * Temporary NFS req mappings are shared across all transport
140 * instances. These are short lived and should be bounded by the number
141 * of concurrent server threads * depth of the SQ.
142 */
143struct svc_rdma_req_map *svc_rdma_get_req_map(void)
144{
145 struct svc_rdma_req_map *map;
146 while (1) {
147 map = kmem_cache_alloc(svc_rdma_map_cachep, GFP_KERNEL);
148 if (map)
149 break;
150 schedule_timeout_uninterruptible(msecs_to_jiffies(500));
151 }
152 map->count = 0;
153 return map;
154}
155
156void svc_rdma_put_req_map(struct svc_rdma_req_map *map)
157{
158 kmem_cache_free(svc_rdma_map_cachep, map);
159}
160
176/* ib_cq event handler */ 161/* ib_cq event handler */
177static void cq_event_handler(struct ib_event *event, void *context) 162static void cq_event_handler(struct ib_event *event, void *context)
178{ 163{
@@ -302,6 +287,7 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
302 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; 287 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
303 ctxt->wc_status = wc.status; 288 ctxt->wc_status = wc.status;
304 ctxt->byte_len = wc.byte_len; 289 ctxt->byte_len = wc.byte_len;
290 svc_rdma_unmap_dma(ctxt);
305 if (wc.status != IB_WC_SUCCESS) { 291 if (wc.status != IB_WC_SUCCESS) {
306 /* Close the transport */ 292 /* Close the transport */
307 dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt); 293 dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt);
@@ -351,6 +337,7 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
351 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; 337 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
352 xprt = ctxt->xprt; 338 xprt = ctxt->xprt;
353 339
340 svc_rdma_unmap_dma(ctxt);
354 if (wc.status != IB_WC_SUCCESS) 341 if (wc.status != IB_WC_SUCCESS)
355 /* Close the transport */ 342 /* Close the transport */
356 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 343 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -361,10 +348,13 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
361 348
362 switch (ctxt->wr_op) { 349 switch (ctxt->wr_op) {
363 case IB_WR_SEND: 350 case IB_WR_SEND:
364 case IB_WR_RDMA_WRITE:
365 svc_rdma_put_context(ctxt, 1); 351 svc_rdma_put_context(ctxt, 1);
366 break; 352 break;
367 353
354 case IB_WR_RDMA_WRITE:
355 svc_rdma_put_context(ctxt, 0);
356 break;
357
368 case IB_WR_RDMA_READ: 358 case IB_WR_RDMA_READ:
369 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { 359 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
370 struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; 360 struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
@@ -423,40 +413,6 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context)
423 tasklet_schedule(&dto_tasklet); 413 tasklet_schedule(&dto_tasklet);
424} 414}
425 415
426static void create_context_cache(struct svcxprt_rdma *xprt,
427 int ctxt_count, int ctxt_bump, int ctxt_max)
428{
429 struct svc_rdma_op_ctxt *ctxt;
430 int i;
431
432 xprt->sc_ctxt_max = ctxt_max;
433 xprt->sc_ctxt_bump = ctxt_bump;
434 xprt->sc_ctxt_cnt = 0;
435 atomic_set(&xprt->sc_ctxt_used, 0);
436
437 INIT_LIST_HEAD(&xprt->sc_ctxt_free);
438 for (i = 0; i < ctxt_count; i++) {
439 ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
440 if (ctxt) {
441 INIT_LIST_HEAD(&ctxt->free_list);
442 list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
443 xprt->sc_ctxt_cnt++;
444 }
445 }
446}
447
448static void destroy_context_cache(struct svcxprt_rdma *xprt)
449{
450 while (!list_empty(&xprt->sc_ctxt_free)) {
451 struct svc_rdma_op_ctxt *ctxt;
452 ctxt = list_entry(xprt->sc_ctxt_free.next,
453 struct svc_rdma_op_ctxt,
454 free_list);
455 list_del_init(&ctxt->free_list);
456 kfree(ctxt);
457 }
458}
459
460static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, 416static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
461 int listener) 417 int listener)
462{ 418{
@@ -473,7 +429,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
473 429
474 spin_lock_init(&cma_xprt->sc_lock); 430 spin_lock_init(&cma_xprt->sc_lock);
475 spin_lock_init(&cma_xprt->sc_read_complete_lock); 431 spin_lock_init(&cma_xprt->sc_read_complete_lock);
476 spin_lock_init(&cma_xprt->sc_ctxt_lock);
477 spin_lock_init(&cma_xprt->sc_rq_dto_lock); 432 spin_lock_init(&cma_xprt->sc_rq_dto_lock);
478 433
479 cma_xprt->sc_ord = svcrdma_ord; 434 cma_xprt->sc_ord = svcrdma_ord;
@@ -482,21 +437,9 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
482 cma_xprt->sc_max_requests = svcrdma_max_requests; 437 cma_xprt->sc_max_requests = svcrdma_max_requests;
483 cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT; 438 cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT;
484 atomic_set(&cma_xprt->sc_sq_count, 0); 439 atomic_set(&cma_xprt->sc_sq_count, 0);
440 atomic_set(&cma_xprt->sc_ctxt_used, 0);
485 441
486 if (!listener) { 442 if (listener)
487 int reqs = cma_xprt->sc_max_requests;
488 create_context_cache(cma_xprt,
489 reqs << 1, /* starting size */
490 reqs, /* bump amount */
491 reqs +
492 cma_xprt->sc_sq_depth +
493 RPCRDMA_MAX_THREADS + 1); /* max */
494 if (list_empty(&cma_xprt->sc_ctxt_free)) {
495 kfree(cma_xprt);
496 return NULL;
497 }
498 clear_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
499 } else
500 set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); 443 set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
501 444
502 return cma_xprt; 445 return cma_xprt;
@@ -532,6 +475,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
532 BUG_ON(sge_no >= xprt->sc_max_sge); 475 BUG_ON(sge_no >= xprt->sc_max_sge);
533 page = svc_rdma_get_page(); 476 page = svc_rdma_get_page();
534 ctxt->pages[sge_no] = page; 477 ctxt->pages[sge_no] = page;
478 atomic_inc(&xprt->sc_dma_used);
535 pa = ib_dma_map_page(xprt->sc_cm_id->device, 479 pa = ib_dma_map_page(xprt->sc_cm_id->device,
536 page, 0, PAGE_SIZE, 480 page, 0, PAGE_SIZE,
537 DMA_FROM_DEVICE); 481 DMA_FROM_DEVICE);
@@ -566,7 +510,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
566 * will call the recvfrom method on the listen xprt which will accept the new 510 * will call the recvfrom method on the listen xprt which will accept the new
567 * connection. 511 * connection.
568 */ 512 */
569static void handle_connect_req(struct rdma_cm_id *new_cma_id) 513static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird)
570{ 514{
571 struct svcxprt_rdma *listen_xprt = new_cma_id->context; 515 struct svcxprt_rdma *listen_xprt = new_cma_id->context;
572 struct svcxprt_rdma *newxprt; 516 struct svcxprt_rdma *newxprt;
@@ -583,6 +527,9 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id)
583 dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n", 527 dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n",
584 newxprt, newxprt->sc_cm_id, listen_xprt); 528 newxprt, newxprt->sc_cm_id, listen_xprt);
585 529
530 /* Save client advertised inbound read limit for use later in accept. */
531 newxprt->sc_ord = client_ird;
532
586 /* Set the local and remote addresses in the transport */ 533 /* Set the local and remote addresses in the transport */
587 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; 534 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
588 svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa)); 535 svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
@@ -619,7 +566,8 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
619 case RDMA_CM_EVENT_CONNECT_REQUEST: 566 case RDMA_CM_EVENT_CONNECT_REQUEST:
620 dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " 567 dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
621 "event=%d\n", cma_id, cma_id->context, event->event); 568 "event=%d\n", cma_id, cma_id->context, event->event);
622 handle_connect_req(cma_id); 569 handle_connect_req(cma_id,
570 event->param.conn.responder_resources);
623 break; 571 break;
624 572
625 case RDMA_CM_EVENT_ESTABLISHED: 573 case RDMA_CM_EVENT_ESTABLISHED:
@@ -793,8 +741,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
793 (size_t)svcrdma_max_requests); 741 (size_t)svcrdma_max_requests);
794 newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests; 742 newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests;
795 743
796 newxprt->sc_ord = min((size_t)devattr.max_qp_rd_atom, 744 /*
797 (size_t)svcrdma_ord); 745 * Limit ORD based on client limit, local device limit, and
746 * configured svcrdma limit.
747 */
748 newxprt->sc_ord = min_t(size_t, devattr.max_qp_rd_atom, newxprt->sc_ord);
749 newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord);
798 750
799 newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device); 751 newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device);
800 if (IS_ERR(newxprt->sc_pd)) { 752 if (IS_ERR(newxprt->sc_pd)) {
@@ -987,7 +939,6 @@ static void __svc_rdma_free(struct work_struct *work)
987 * cm_id because the device ptr is needed to unmap the dma in 939 * cm_id because the device ptr is needed to unmap the dma in
988 * svc_rdma_put_context. 940 * svc_rdma_put_context.
989 */ 941 */
990 spin_lock_bh(&rdma->sc_read_complete_lock);
991 while (!list_empty(&rdma->sc_read_complete_q)) { 942 while (!list_empty(&rdma->sc_read_complete_q)) {
992 struct svc_rdma_op_ctxt *ctxt; 943 struct svc_rdma_op_ctxt *ctxt;
993 ctxt = list_entry(rdma->sc_read_complete_q.next, 944 ctxt = list_entry(rdma->sc_read_complete_q.next,
@@ -996,10 +947,8 @@ static void __svc_rdma_free(struct work_struct *work)
996 list_del_init(&ctxt->dto_q); 947 list_del_init(&ctxt->dto_q);
997 svc_rdma_put_context(ctxt, 1); 948 svc_rdma_put_context(ctxt, 1);
998 } 949 }
999 spin_unlock_bh(&rdma->sc_read_complete_lock);
1000 950
1001 /* Destroy queued, but not processed recv completions */ 951 /* Destroy queued, but not processed recv completions */
1002 spin_lock_bh(&rdma->sc_rq_dto_lock);
1003 while (!list_empty(&rdma->sc_rq_dto_q)) { 952 while (!list_empty(&rdma->sc_rq_dto_q)) {
1004 struct svc_rdma_op_ctxt *ctxt; 953 struct svc_rdma_op_ctxt *ctxt;
1005 ctxt = list_entry(rdma->sc_rq_dto_q.next, 954 ctxt = list_entry(rdma->sc_rq_dto_q.next,
@@ -1008,10 +957,10 @@ static void __svc_rdma_free(struct work_struct *work)
1008 list_del_init(&ctxt->dto_q); 957 list_del_init(&ctxt->dto_q);
1009 svc_rdma_put_context(ctxt, 1); 958 svc_rdma_put_context(ctxt, 1);
1010 } 959 }
1011 spin_unlock_bh(&rdma->sc_rq_dto_lock);
1012 960
1013 /* Warn if we leaked a resource or under-referenced */ 961 /* Warn if we leaked a resource or under-referenced */
1014 WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); 962 WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
963 WARN_ON(atomic_read(&rdma->sc_dma_used) != 0);
1015 964
1016 /* Destroy the QP if present (not a listener) */ 965 /* Destroy the QP if present (not a listener) */
1017 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) 966 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
@@ -1032,7 +981,6 @@ static void __svc_rdma_free(struct work_struct *work)
1032 /* Destroy the CM ID */ 981 /* Destroy the CM ID */
1033 rdma_destroy_id(rdma->sc_cm_id); 982 rdma_destroy_id(rdma->sc_cm_id);
1034 983
1035 destroy_context_cache(rdma);
1036 kfree(rdma); 984 kfree(rdma);
1037} 985}
1038 986
@@ -1132,6 +1080,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1132 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); 1080 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
1133 1081
1134 /* Prepare SGE for local address */ 1082 /* Prepare SGE for local address */
1083 atomic_inc(&xprt->sc_dma_used);
1135 sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, 1084 sge.addr = ib_dma_map_page(xprt->sc_cm_id->device,
1136 p, 0, PAGE_SIZE, DMA_FROM_DEVICE); 1085 p, 0, PAGE_SIZE, DMA_FROM_DEVICE);
1137 sge.lkey = xprt->sc_phys_mr->lkey; 1086 sge.lkey = xprt->sc_phys_mr->lkey;