aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/nfs/nfs-rdma.txt16
-rw-r--r--fs/lockd/svc.c8
-rw-r--r--fs/locks.c1
-rw-r--r--fs/nfs/callback.c6
-rw-r--r--fs/nfs_common/grace.c23
-rw-r--r--fs/nfsd/export.c73
-rw-r--r--fs/nfsd/export.h1
-rw-r--r--fs/nfsd/idmap.h4
-rw-r--r--fs/nfsd/netns.h1
-rw-r--r--fs/nfsd/nfs2acl.c10
-rw-r--r--fs/nfsd/nfs3acl.c4
-rw-r--r--fs/nfsd/nfs4acl.c8
-rw-r--r--fs/nfsd/nfs4callback.c122
-rw-r--r--fs/nfsd/nfs4idmap.c3
-rw-r--r--fs/nfsd/nfs4proc.c30
-rw-r--r--fs/nfsd/nfs4recover.c18
-rw-r--r--fs/nfsd/nfs4state.c180
-rw-r--r--fs/nfsd/nfs4xdr.c158
-rw-r--r--fs/nfsd/nfssvc.c17
-rw-r--r--fs/nfsd/state.h2
-rw-r--r--fs/nfsd/vfs.c6
-rw-r--r--fs/nfsd/vfs.h6
-rw-r--r--include/linux/fs.h6
-rw-r--r--include/linux/sunrpc/cache.h9
-rw-r--r--include/linux/sunrpc/svc.h68
-rw-r--r--include/linux/sunrpc/svc_rdma.h91
-rw-r--r--include/linux/sunrpc/svc_xprt.h1
-rw-r--r--include/trace/events/sunrpc.h21
-rw-r--r--include/uapi/linux/nfsacl.h1
-rw-r--r--net/sunrpc/cache.c103
-rw-r--r--net/sunrpc/svc.c113
-rw-r--r--net/sunrpc/svc_xprt.c10
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c83
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c37
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h1
35 files changed, 669 insertions, 572 deletions
diff --git a/Documentation/filesystems/nfs/nfs-rdma.txt b/Documentation/filesystems/nfs/nfs-rdma.txt
index 95c13aa575ff..906b6c233f62 100644
--- a/Documentation/filesystems/nfs/nfs-rdma.txt
+++ b/Documentation/filesystems/nfs/nfs-rdma.txt
@@ -138,9 +138,9 @@ Installation
138 - Build, install, reboot 138 - Build, install, reboot
139 139
140 The NFS/RDMA code will be enabled automatically if NFS and RDMA 140 The NFS/RDMA code will be enabled automatically if NFS and RDMA
141 are turned on. The NFS/RDMA client and server are configured via the 141 are turned on. The NFS/RDMA client and server are configured via the hidden
142 SUNRPC_XPRT_RDMA_CLIENT and SUNRPC_XPRT_RDMA_SERVER config options that both 142 SUNRPC_XPRT_RDMA config option that depends on SUNRPC and INFINIBAND. The
143 depend on SUNRPC and INFINIBAND. The default value of both options will be: 143 value of SUNRPC_XPRT_RDMA will be:
144 144
145 - N if either SUNRPC or INFINIBAND are N, in this case the NFS/RDMA client 145 - N if either SUNRPC or INFINIBAND are N, in this case the NFS/RDMA client
146 and server will not be built 146 and server will not be built
@@ -238,9 +238,8 @@ NFS/RDMA Setup
238 238
239 - Start the NFS server 239 - Start the NFS server
240 240
241 If the NFS/RDMA server was built as a module 241 If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
242 (CONFIG_SUNRPC_XPRT_RDMA_SERVER=m in kernel config), load the RDMA 242 kernel config), load the RDMA transport module:
243 transport module:
244 243
245 $ modprobe svcrdma 244 $ modprobe svcrdma
246 245
@@ -259,9 +258,8 @@ NFS/RDMA Setup
259 258
260 - On the client system 259 - On the client system
261 260
262 If the NFS/RDMA client was built as a module 261 If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
263 (CONFIG_SUNRPC_XPRT_RDMA_CLIENT=m in kernel config), load the RDMA client 262 kernel config), load the RDMA client module:
264 module:
265 263
266 $ modprobe xprtrdma.ko 264 $ modprobe xprtrdma.ko
267 265
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 55505cbe11af..d678bcc3cbcb 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -322,6 +322,11 @@ out_rqst:
322 return error; 322 return error;
323} 323}
324 324
325static struct svc_serv_ops lockd_sv_ops = {
326 .svo_shutdown = svc_rpcb_cleanup,
327 .svo_enqueue_xprt = svc_xprt_do_enqueue,
328};
329
325static struct svc_serv *lockd_create_svc(void) 330static struct svc_serv *lockd_create_svc(void)
326{ 331{
327 struct svc_serv *serv; 332 struct svc_serv *serv;
@@ -350,7 +355,7 @@ static struct svc_serv *lockd_create_svc(void)
350 nlm_timeout = LOCKD_DFLT_TIMEO; 355 nlm_timeout = LOCKD_DFLT_TIMEO;
351 nlmsvc_timeout = nlm_timeout * HZ; 356 nlmsvc_timeout = nlm_timeout * HZ;
352 357
353 serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, svc_rpcb_cleanup); 358 serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, &lockd_sv_ops);
354 if (!serv) { 359 if (!serv) {
355 printk(KERN_WARNING "lockd_up: create service failed\n"); 360 printk(KERN_WARNING "lockd_up: create service failed\n");
356 return ERR_PTR(-ENOMEM); 361 return ERR_PTR(-ENOMEM);
@@ -586,6 +591,7 @@ static int lockd_init_net(struct net *net)
586 591
587 INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender); 592 INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender);
588 INIT_LIST_HEAD(&ln->lockd_manager.list); 593 INIT_LIST_HEAD(&ln->lockd_manager.list);
594 ln->lockd_manager.block_opens = false;
589 spin_lock_init(&ln->nsm_clnt_lock); 595 spin_lock_init(&ln->nsm_clnt_lock);
590 return 0; 596 return 0;
591} 597}
diff --git a/fs/locks.c b/fs/locks.c
index d3d558ba4da7..2a54c800a223 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1568,6 +1568,7 @@ int fcntl_getlease(struct file *filp)
1568 * desired lease. 1568 * desired lease.
1569 * @dentry: dentry to check 1569 * @dentry: dentry to check
1570 * @arg: type of lease that we're trying to acquire 1570 * @arg: type of lease that we're trying to acquire
1571 * @flags: current lock flags
1571 * 1572 *
1572 * Check to see if there's an existing open fd on this file that would 1573 * Check to see if there's an existing open fd on this file that would
1573 * conflict with the lease we're trying to set. 1574 * conflict with the lease we're trying to set.
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 682529c00996..2c4a0b565d28 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -308,6 +308,10 @@ err_bind:
308 return ret; 308 return ret;
309} 309}
310 310
311static struct svc_serv_ops nfs_cb_sv_ops = {
312 .svo_enqueue_xprt = svc_xprt_do_enqueue,
313};
314
311static struct svc_serv *nfs_callback_create_svc(int minorversion) 315static struct svc_serv *nfs_callback_create_svc(int minorversion)
312{ 316{
313 struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; 317 struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
@@ -333,7 +337,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
333 printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n", 337 printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n",
334 cb_info->users); 338 cb_info->users);
335 339
336 serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); 340 serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, &nfs_cb_sv_ops);
337 if (!serv) { 341 if (!serv) {
338 printk(KERN_ERR "nfs_callback_create_svc: create service failed\n"); 342 printk(KERN_ERR "nfs_callback_create_svc: create service failed\n");
339 return ERR_PTR(-ENOMEM); 343 return ERR_PTR(-ENOMEM);
diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c
index ae6e58ea4de5..fd8c9a5bcac4 100644
--- a/fs/nfs_common/grace.c
+++ b/fs/nfs_common/grace.c
@@ -63,14 +63,33 @@ EXPORT_SYMBOL_GPL(locks_end_grace);
63 * lock reclaims. 63 * lock reclaims.
64 */ 64 */
65int 65int
66locks_in_grace(struct net *net) 66__state_in_grace(struct net *net, bool open)
67{ 67{
68 struct list_head *grace_list = net_generic(net, grace_net_id); 68 struct list_head *grace_list = net_generic(net, grace_net_id);
69 struct lock_manager *lm;
69 70
70 return !list_empty(grace_list); 71 if (!open)
72 return !list_empty(grace_list);
73
74 list_for_each_entry(lm, grace_list, list) {
75 if (lm->block_opens)
76 return true;
77 }
78 return false;
79}
80
81int locks_in_grace(struct net *net)
82{
83 return __state_in_grace(net, 0);
71} 84}
72EXPORT_SYMBOL_GPL(locks_in_grace); 85EXPORT_SYMBOL_GPL(locks_in_grace);
73 86
87int opens_in_grace(struct net *net)
88{
89 return __state_in_grace(net, 1);
90}
91EXPORT_SYMBOL_GPL(opens_in_grace);
92
74static int __net_init 93static int __net_init
75grace_init_net(struct net *net) 94grace_init_net(struct net *net)
76{ 95{
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index f79521a59747..b4d84b579f20 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1075,73 +1075,6 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp)
1075 return rv; 1075 return rv;
1076} 1076}
1077 1077
1078/* Iterator */
1079
1080static void *e_start(struct seq_file *m, loff_t *pos)
1081 __acquires(((struct cache_detail *)m->private)->hash_lock)
1082{
1083 loff_t n = *pos;
1084 unsigned hash, export;
1085 struct cache_head *ch;
1086 struct cache_detail *cd = m->private;
1087 struct cache_head **export_table = cd->hash_table;
1088
1089 read_lock(&cd->hash_lock);
1090 if (!n--)
1091 return SEQ_START_TOKEN;
1092 hash = n >> 32;
1093 export = n & ((1LL<<32) - 1);
1094
1095
1096 for (ch=export_table[hash]; ch; ch=ch->next)
1097 if (!export--)
1098 return ch;
1099 n &= ~((1LL<<32) - 1);
1100 do {
1101 hash++;
1102 n += 1LL<<32;
1103 } while(hash < EXPORT_HASHMAX && export_table[hash]==NULL);
1104 if (hash >= EXPORT_HASHMAX)
1105 return NULL;
1106 *pos = n+1;
1107 return export_table[hash];
1108}
1109
1110static void *e_next(struct seq_file *m, void *p, loff_t *pos)
1111{
1112 struct cache_head *ch = p;
1113 int hash = (*pos >> 32);
1114 struct cache_detail *cd = m->private;
1115 struct cache_head **export_table = cd->hash_table;
1116
1117 if (p == SEQ_START_TOKEN)
1118 hash = 0;
1119 else if (ch->next == NULL) {
1120 hash++;
1121 *pos += 1LL<<32;
1122 } else {
1123 ++*pos;
1124 return ch->next;
1125 }
1126 *pos &= ~((1LL<<32) - 1);
1127 while (hash < EXPORT_HASHMAX && export_table[hash] == NULL) {
1128 hash++;
1129 *pos += 1LL<<32;
1130 }
1131 if (hash >= EXPORT_HASHMAX)
1132 return NULL;
1133 ++*pos;
1134 return export_table[hash];
1135}
1136
1137static void e_stop(struct seq_file *m, void *p)
1138 __releases(((struct cache_detail *)m->private)->hash_lock)
1139{
1140 struct cache_detail *cd = m->private;
1141
1142 read_unlock(&cd->hash_lock);
1143}
1144
1145static struct flags { 1078static struct flags {
1146 int flag; 1079 int flag;
1147 char *name[2]; 1080 char *name[2];
@@ -1270,9 +1203,9 @@ static int e_show(struct seq_file *m, void *p)
1270} 1203}
1271 1204
1272const struct seq_operations nfs_exports_op = { 1205const struct seq_operations nfs_exports_op = {
1273 .start = e_start, 1206 .start = cache_seq_start,
1274 .next = e_next, 1207 .next = cache_seq_next,
1275 .stop = e_stop, 1208 .stop = cache_seq_stop,
1276 .show = e_show, 1209 .show = e_show,
1277}; 1210};
1278 1211
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index 1f52bfcc436f..2e315072bf3f 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -6,6 +6,7 @@
6 6
7#include <linux/sunrpc/cache.h> 7#include <linux/sunrpc/cache.h>
8#include <uapi/linux/nfsd/export.h> 8#include <uapi/linux/nfsd/export.h>
9#include <linux/nfs4.h>
9 10
10struct knfsd_fh; 11struct knfsd_fh;
11struct svc_fh; 12struct svc_fh;
diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h
index a3f34900091f..23cc85d1efdd 100644
--- a/fs/nfsd/idmap.h
+++ b/fs/nfsd/idmap.h
@@ -37,9 +37,7 @@
37 37
38#include <linux/in.h> 38#include <linux/in.h>
39#include <linux/sunrpc/svc.h> 39#include <linux/sunrpc/svc.h>
40 40#include <linux/nfs_idmap.h>
41/* XXX from linux/nfs_idmap.h */
42#define IDMAP_NAMESZ 128
43 41
44#ifdef CONFIG_NFSD_V4 42#ifdef CONFIG_NFSD_V4
45int nfsd_idmap_init(struct net *); 43int nfsd_idmap_init(struct net *);
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index ea6749a32760..d8b16c2568f3 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -110,6 +110,7 @@ struct nfsd_net {
110 unsigned int max_connections; 110 unsigned int max_connections;
111 111
112 u32 clientid_counter; 112 u32 clientid_counter;
113 u32 clverifier_counter;
113 114
114 struct svc_serv *nfsd_serv; 115 struct svc_serv *nfsd_serv;
115}; 116};
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index d54701f6dc78..1580ea6fd64d 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -44,13 +44,13 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
44 44
45 inode = d_inode(fh->fh_dentry); 45 inode = d_inode(fh->fh_dentry);
46 46
47 if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) 47 if (argp->mask & ~NFS_ACL_MASK)
48 RETURN_STATUS(nfserr_inval); 48 RETURN_STATUS(nfserr_inval);
49 resp->mask = argp->mask; 49 resp->mask = argp->mask;
50 50
51 nfserr = fh_getattr(fh, &resp->stat); 51 nfserr = fh_getattr(fh, &resp->stat);
52 if (nfserr) 52 if (nfserr)
53 goto fail; 53 RETURN_STATUS(nfserr);
54 54
55 if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { 55 if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
56 acl = get_acl(inode, ACL_TYPE_ACCESS); 56 acl = get_acl(inode, ACL_TYPE_ACCESS);
@@ -202,7 +202,7 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
202 if (!p) 202 if (!p)
203 return 0; 203 return 0;
204 argp->mask = ntohl(*p++); 204 argp->mask = ntohl(*p++);
205 if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || 205 if (argp->mask & ~NFS_ACL_MASK ||
206 !xdr_argsize_check(rqstp, p)) 206 !xdr_argsize_check(rqstp, p))
207 return 0; 207 return 0;
208 208
@@ -293,9 +293,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
293 resp->acl_default, 293 resp->acl_default,
294 resp->mask & NFS_DFACL, 294 resp->mask & NFS_DFACL,
295 NFS_ACL_DEFAULT); 295 NFS_ACL_DEFAULT);
296 if (n <= 0) 296 return (n > 0);
297 return 0;
298 return 1;
299} 297}
300 298
301static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p, 299static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p,
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 882b1a14bc3e..01df4cd7c753 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -41,7 +41,7 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
41 41
42 inode = d_inode(fh->fh_dentry); 42 inode = d_inode(fh->fh_dentry);
43 43
44 if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) 44 if (argp->mask & ~NFS_ACL_MASK)
45 RETURN_STATUS(nfserr_inval); 45 RETURN_STATUS(nfserr_inval);
46 resp->mask = argp->mask; 46 resp->mask = argp->mask;
47 47
@@ -148,7 +148,7 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
148 if (!p) 148 if (!p)
149 return 0; 149 return 0;
150 args->mask = ntohl(*p++); 150 args->mask = ntohl(*p++);
151 if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || 151 if (args->mask & ~NFS_ACL_MASK ||
152 !xdr_argsize_check(rqstp, p)) 152 !xdr_argsize_check(rqstp, p))
153 return 0; 153 return 0;
154 154
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index eb5accf1b37f..6adabd6049b7 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -34,8 +34,10 @@
34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 */ 35 */
36 36
37#include <linux/fs.h>
37#include <linux/slab.h> 38#include <linux/slab.h>
38#include <linux/nfs_fs.h> 39#include <linux/posix_acl.h>
40
39#include "nfsfh.h" 41#include "nfsfh.h"
40#include "nfsd.h" 42#include "nfsd.h"
41#include "acl.h" 43#include "acl.h"
@@ -100,7 +102,7 @@ deny_mask_from_posix(unsigned short perm, u32 flags)
100/* We only map from NFSv4 to POSIX ACLs when setting ACLs, when we err on the 102/* We only map from NFSv4 to POSIX ACLs when setting ACLs, when we err on the
101 * side of being more restrictive, so the mode bit mapping below is 103 * side of being more restrictive, so the mode bit mapping below is
102 * pessimistic. An optimistic version would be needed to handle DENY's, 104 * pessimistic. An optimistic version would be needed to handle DENY's,
103 * but we espect to coalesce all ALLOWs and DENYs before mapping to mode 105 * but we expect to coalesce all ALLOWs and DENYs before mapping to mode
104 * bits. */ 106 * bits. */
105 107
106static void 108static void
@@ -458,7 +460,7 @@ init_state(struct posix_acl_state *state, int cnt)
458 state->empty = 1; 460 state->empty = 1;
459 /* 461 /*
460 * In the worst case, each individual acl could be for a distinct 462 * In the worst case, each individual acl could be for a distinct
461 * named user or group, but we don't no which, so we allocate 463 * named user or group, but we don't know which, so we allocate
462 * enough space for either: 464 * enough space for either:
463 */ 465 */
464 alloc = sizeof(struct posix_ace_state_array) 466 alloc = sizeof(struct posix_ace_state_array)
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index a49201835a97..e7f50c4081d6 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -435,12 +435,12 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
435 */ 435 */
436 status = 0; 436 status = 0;
437out: 437out:
438 if (status) 438 cb->cb_seq_status = status;
439 nfsd4_mark_cb_fault(cb->cb_clp, status);
440 return status; 439 return status;
441out_overflow: 440out_overflow:
442 print_overflow_msg(__func__, xdr); 441 print_overflow_msg(__func__, xdr);
443 return -EIO; 442 status = -EIO;
443 goto out;
444} 444}
445 445
446static int decode_cb_sequence4res(struct xdr_stream *xdr, 446static int decode_cb_sequence4res(struct xdr_stream *xdr,
@@ -451,11 +451,10 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr,
451 if (cb->cb_minorversion == 0) 451 if (cb->cb_minorversion == 0)
452 return 0; 452 return 0;
453 453
454 status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_status); 454 status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_seq_status);
455 if (unlikely(status || cb->cb_status)) 455 if (unlikely(status || cb->cb_seq_status))
456 return status; 456 return status;
457 457
458 cb->cb_update_seq_nr = true;
459 return decode_cb_sequence4resok(xdr, cb); 458 return decode_cb_sequence4resok(xdr, cb);
460} 459}
461 460
@@ -527,7 +526,7 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
527 526
528 if (cb != NULL) { 527 if (cb != NULL) {
529 status = decode_cb_sequence4res(xdr, cb); 528 status = decode_cb_sequence4res(xdr, cb);
530 if (unlikely(status || cb->cb_status)) 529 if (unlikely(status || cb->cb_seq_status))
531 return status; 530 return status;
532 } 531 }
533 532
@@ -617,7 +616,7 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
617 616
618 if (cb) { 617 if (cb) {
619 status = decode_cb_sequence4res(xdr, cb); 618 status = decode_cb_sequence4res(xdr, cb);
620 if (unlikely(status || cb->cb_status)) 619 if (unlikely(status || cb->cb_seq_status))
621 return status; 620 return status;
622 } 621 }
623 return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status); 622 return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status);
@@ -876,7 +875,11 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
876 u32 minorversion = clp->cl_minorversion; 875 u32 minorversion = clp->cl_minorversion;
877 876
878 cb->cb_minorversion = minorversion; 877 cb->cb_minorversion = minorversion;
879 cb->cb_update_seq_nr = false; 878 /*
879 * cb_seq_status is only set in decode_cb_sequence4res,
880 * and so will remain 1 if an rpc level failure occurs.
881 */
882 cb->cb_seq_status = 1;
880 cb->cb_status = 0; 883 cb->cb_status = 0;
881 if (minorversion) { 884 if (minorversion) {
882 if (!nfsd41_cb_get_slot(clp, task)) 885 if (!nfsd41_cb_get_slot(clp, task))
@@ -885,15 +888,30 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
885 rpc_call_start(task); 888 rpc_call_start(task);
886} 889}
887 890
888static void nfsd4_cb_done(struct rpc_task *task, void *calldata) 891static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback *cb)
889{ 892{
890 struct nfsd4_callback *cb = calldata;
891 struct nfs4_client *clp = cb->cb_clp; 893 struct nfs4_client *clp = cb->cb_clp;
894 struct nfsd4_session *session = clp->cl_cb_session;
895 bool ret = true;
892 896
893 dprintk("%s: minorversion=%d\n", __func__, 897 if (!clp->cl_minorversion) {
894 clp->cl_minorversion); 898 /*
899 * If the backchannel connection was shut down while this
900 * task was queued, we need to resubmit it after setting up
901 * a new backchannel connection.
902 *
903 * Note that if we lost our callback connection permanently
904 * the submission code will error out, so we don't need to
905 * handle that case here.
906 */
907 if (task->tk_flags & RPC_TASK_KILLED)
908 goto need_restart;
909
910 return true;
911 }
895 912
896 if (clp->cl_minorversion) { 913 switch (cb->cb_seq_status) {
914 case 0:
897 /* 915 /*
898 * No need for lock, access serialized in nfsd4_cb_prepare 916 * No need for lock, access serialized in nfsd4_cb_prepare
899 * 917 *
@@ -901,29 +919,63 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
901 * If CB_SEQUENCE returns an error, then the state of the slot 919 * If CB_SEQUENCE returns an error, then the state of the slot
902 * (sequence ID, cached reply) MUST NOT change. 920 * (sequence ID, cached reply) MUST NOT change.
903 */ 921 */
904 if (cb->cb_update_seq_nr) 922 ++session->se_cb_seq_nr;
905 ++clp->cl_cb_session->se_cb_seq_nr; 923 break;
906 924 case -ESERVERFAULT:
907 clear_bit(0, &clp->cl_cb_slot_busy); 925 ++session->se_cb_seq_nr;
908 rpc_wake_up_next(&clp->cl_cb_waitq); 926 case 1:
909 dprintk("%s: freed slot, new seqid=%d\n", __func__, 927 case -NFS4ERR_BADSESSION:
910 clp->cl_cb_session->se_cb_seq_nr); 928 nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status);
929 ret = false;
930 break;
931 case -NFS4ERR_DELAY:
932 if (!rpc_restart_call(task))
933 goto out;
934
935 rpc_delay(task, 2 * HZ);
936 return false;
937 case -NFS4ERR_BADSLOT:
938 goto retry_nowait;
939 case -NFS4ERR_SEQ_MISORDERED:
940 if (session->se_cb_seq_nr != 1) {
941 session->se_cb_seq_nr = 1;
942 goto retry_nowait;
943 }
944 break;
945 default:
946 dprintk("%s: unprocessed error %d\n", __func__,
947 cb->cb_seq_status);
911 } 948 }
912 949
913 /* 950 clear_bit(0, &clp->cl_cb_slot_busy);
914 * If the backchannel connection was shut down while this 951 rpc_wake_up_next(&clp->cl_cb_waitq);
915 * task was queued, we need to resubmit it after setting up 952 dprintk("%s: freed slot, new seqid=%d\n", __func__,
916 * a new backchannel connection. 953 clp->cl_cb_session->se_cb_seq_nr);
917 * 954
918 * Note that if we lost our callback connection permanently 955 if (task->tk_flags & RPC_TASK_KILLED)
919 * the submission code will error out, so we don't need to 956 goto need_restart;
920 * handle that case here. 957out:
921 */ 958 return ret;
922 if (task->tk_flags & RPC_TASK_KILLED) { 959retry_nowait:
923 task->tk_status = 0; 960 if (rpc_restart_call_prepare(task))
924 cb->cb_need_restart = true; 961 ret = false;
962 goto out;
963need_restart:
964 task->tk_status = 0;
965 cb->cb_need_restart = true;
966 return false;
967}
968
969static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
970{
971 struct nfsd4_callback *cb = calldata;
972 struct nfs4_client *clp = cb->cb_clp;
973
974 dprintk("%s: minorversion=%d\n", __func__,
975 clp->cl_minorversion);
976
977 if (!nfsd4_cb_sequence_done(task, cb))
925 return; 978 return;
926 }
927 979
928 if (cb->cb_status) { 980 if (cb->cb_status) {
929 WARN_ON_ONCE(task->tk_status); 981 WARN_ON_ONCE(task->tk_status);
@@ -1099,8 +1151,8 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
1099 cb->cb_msg.rpc_resp = cb; 1151 cb->cb_msg.rpc_resp = cb;
1100 cb->cb_ops = ops; 1152 cb->cb_ops = ops;
1101 INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); 1153 INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
1154 cb->cb_seq_status = 1;
1102 cb->cb_status = 0; 1155 cb->cb_status = 0;
1103 cb->cb_update_seq_nr = false;
1104 cb->cb_need_restart = false; 1156 cb->cb_need_restart = false;
1105} 1157}
1106 1158
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index e1b3d3d472da..5b20577dcdd2 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -59,9 +59,6 @@ MODULE_PARM_DESC(nfs4_disable_idmapping,
59 * that. 59 * that.
60 */ 60 */
61 61
62#define IDMAP_TYPE_USER 0
63#define IDMAP_TYPE_GROUP 1
64
65struct ent { 62struct ent {
66 struct cache_head h; 63 struct cache_head h;
67 int type; /* User / Group */ 64 int type; /* User / Group */
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 90cfda75313c..4ce6b97b31ad 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -276,13 +276,13 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
276 nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval); 276 nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval);
277 277
278 /* 278 /*
279 * Following rfc 3530 14.2.16, use the returned bitmask 279 * Following rfc 3530 14.2.16, and rfc 5661 18.16.4
280 * to indicate which attributes we used to store the 280 * use the returned bitmask to indicate which attributes
281 * verifier: 281 * we used to store the verifier:
282 */ 282 */
283 if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0) 283 if (nfsd_create_is_exclusive(open->op_createmode) && status == 0)
284 open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS | 284 open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS |
285 FATTR4_WORD1_TIME_MODIFY); 285 FATTR4_WORD1_TIME_MODIFY);
286 } else 286 } else
287 /* 287 /*
288 * Note this may exit with the parent still locked. 288 * Note this may exit with the parent still locked.
@@ -362,7 +362,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
362{ 362{
363 __be32 status; 363 __be32 status;
364 struct svc_fh *resfh = NULL; 364 struct svc_fh *resfh = NULL;
365 struct nfsd4_compoundres *resp;
366 struct net *net = SVC_NET(rqstp); 365 struct net *net = SVC_NET(rqstp);
367 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 366 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
368 367
@@ -389,8 +388,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
389 copy_clientid(&open->op_clientid, cstate->session); 388 copy_clientid(&open->op_clientid, cstate->session);
390 389
391 /* check seqid for replay. set nfs4_owner */ 390 /* check seqid for replay. set nfs4_owner */
392 resp = rqstp->rq_resp; 391 status = nfsd4_process_open1(cstate, open, nn);
393 status = nfsd4_process_open1(&resp->cstate, open, nn);
394 if (status == nfserr_replay_me) { 392 if (status == nfserr_replay_me) {
395 struct nfs4_replay *rp = &open->op_openowner->oo_owner.so_replay; 393 struct nfs4_replay *rp = &open->op_openowner->oo_owner.so_replay;
396 fh_put(&cstate->current_fh); 394 fh_put(&cstate->current_fh);
@@ -417,10 +415,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
417 /* Openowner is now set, so sequence id will get bumped. Now we need 415 /* Openowner is now set, so sequence id will get bumped. Now we need
418 * these checks before we do any creates: */ 416 * these checks before we do any creates: */
419 status = nfserr_grace; 417 status = nfserr_grace;
420 if (locks_in_grace(net) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) 418 if (opens_in_grace(net) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
421 goto out; 419 goto out;
422 status = nfserr_no_grace; 420 status = nfserr_no_grace;
423 if (!locks_in_grace(net) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) 421 if (!opens_in_grace(net) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
424 goto out; 422 goto out;
425 423
426 switch (open->op_claim_type) { 424 switch (open->op_claim_type) {
@@ -829,7 +827,7 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
829{ 827{
830 __be32 status; 828 __be32 status;
831 829
832 if (locks_in_grace(SVC_NET(rqstp))) 830 if (opens_in_grace(SVC_NET(rqstp)))
833 return nfserr_grace; 831 return nfserr_grace;
834 status = nfsd_unlink(rqstp, &cstate->current_fh, 0, 832 status = nfsd_unlink(rqstp, &cstate->current_fh, 0,
835 remove->rm_name, remove->rm_namelen); 833 remove->rm_name, remove->rm_namelen);
@@ -848,7 +846,7 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
848 846
849 if (!cstate->save_fh.fh_dentry) 847 if (!cstate->save_fh.fh_dentry)
850 return status; 848 return status;
851 if (locks_in_grace(SVC_NET(rqstp)) && 849 if (opens_in_grace(SVC_NET(rqstp)) &&
852 !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK)) 850 !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK))
853 return nfserr_grace; 851 return nfserr_grace;
854 status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, 852 status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname,
@@ -1364,10 +1362,6 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
1364 goto out; 1362 goto out;
1365 } 1363 }
1366 1364
1367 nfserr = ops->proc_layoutcommit(inode, lcp);
1368 if (nfserr)
1369 goto out_put_stid;
1370
1371 if (new_size > i_size_read(inode)) { 1365 if (new_size > i_size_read(inode)) {
1372 lcp->lc_size_chg = 1; 1366 lcp->lc_size_chg = 1;
1373 lcp->lc_newsize = new_size; 1367 lcp->lc_newsize = new_size;
@@ -1375,7 +1369,7 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
1375 lcp->lc_size_chg = 0; 1369 lcp->lc_size_chg = 0;
1376 } 1370 }
1377 1371
1378out_put_stid: 1372 nfserr = ops->proc_layoutcommit(inode, lcp);
1379 nfs4_put_stid(&ls->ls_stid); 1373 nfs4_put_stid(&ls->ls_stid);
1380out: 1374out:
1381 return nfserr; 1375 return nfserr;
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index d88ea7b9a85c..e3d47091b191 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -272,6 +272,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
272 .ctx.actor = nfsd4_build_namelist, 272 .ctx.actor = nfsd4_build_namelist,
273 .names = LIST_HEAD_INIT(ctx.names) 273 .names = LIST_HEAD_INIT(ctx.names)
274 }; 274 };
275 struct name_list *entry, *tmp;
275 int status; 276 int status;
276 277
277 status = nfs4_save_creds(&original_cred); 278 status = nfs4_save_creds(&original_cred);
@@ -286,9 +287,8 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
286 287
287 status = iterate_dir(nn->rec_file, &ctx.ctx); 288 status = iterate_dir(nn->rec_file, &ctx.ctx);
288 mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT); 289 mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
289 while (!list_empty(&ctx.names)) { 290
290 struct name_list *entry; 291 list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
291 entry = list_entry(ctx.names.next, struct name_list, list);
292 if (!status) { 292 if (!status) {
293 struct dentry *dentry; 293 struct dentry *dentry;
294 dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); 294 dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
@@ -304,6 +304,12 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
304 } 304 }
305 mutex_unlock(&d_inode(dir)->i_mutex); 305 mutex_unlock(&d_inode(dir)->i_mutex);
306 nfs4_reset_creds(original_cred); 306 nfs4_reset_creds(original_cred);
307
308 list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
309 dprintk("NFSD: %s. Left entry %s\n", __func__, entry->name);
310 list_del(&entry->list);
311 kfree(entry);
312 }
307 return status; 313 return status;
308} 314}
309 315
@@ -541,8 +547,7 @@ nfsd4_legacy_tracking_init(struct net *net)
541 547
542 /* XXX: The legacy code won't work in a container */ 548 /* XXX: The legacy code won't work in a container */
543 if (net != &init_net) { 549 if (net != &init_net) {
544 WARN(1, KERN_ERR "NFSD: attempt to initialize legacy client " 550 pr_warn("NFSD: attempt to initialize legacy client tracking in a container ignored.\n");
545 "tracking in a container!\n");
546 return -EINVAL; 551 return -EINVAL;
547 } 552 }
548 553
@@ -1254,8 +1259,7 @@ nfsd4_umh_cltrack_init(struct net *net)
1254 1259
1255 /* XXX: The usermode helper s not working in container yet. */ 1260 /* XXX: The usermode helper s not working in container yet. */
1256 if (net != &init_net) { 1261 if (net != &init_net) {
1257 WARN(1, KERN_ERR "NFSD: attempt to initialize umh client " 1262 pr_warn("NFSD: attempt to initialize umh client tracking in a container ignored.\n");
1258 "tracking in a container!\n");
1259 return -EINVAL; 1263 return -EINVAL;
1260 } 1264 }
1261 1265
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 95202719a1fd..0f1d5691b795 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -777,13 +777,16 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
777 list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); 777 list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
778} 778}
779 779
780static void 780static bool
781unhash_delegation_locked(struct nfs4_delegation *dp) 781unhash_delegation_locked(struct nfs4_delegation *dp)
782{ 782{
783 struct nfs4_file *fp = dp->dl_stid.sc_file; 783 struct nfs4_file *fp = dp->dl_stid.sc_file;
784 784
785 lockdep_assert_held(&state_lock); 785 lockdep_assert_held(&state_lock);
786 786
787 if (list_empty(&dp->dl_perfile))
788 return false;
789
787 dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID; 790 dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID;
788 /* Ensure that deleg break won't try to requeue it */ 791 /* Ensure that deleg break won't try to requeue it */
789 ++dp->dl_time; 792 ++dp->dl_time;
@@ -792,16 +795,21 @@ unhash_delegation_locked(struct nfs4_delegation *dp)
792 list_del_init(&dp->dl_recall_lru); 795 list_del_init(&dp->dl_recall_lru);
793 list_del_init(&dp->dl_perfile); 796 list_del_init(&dp->dl_perfile);
794 spin_unlock(&fp->fi_lock); 797 spin_unlock(&fp->fi_lock);
798 return true;
795} 799}
796 800
797static void destroy_delegation(struct nfs4_delegation *dp) 801static void destroy_delegation(struct nfs4_delegation *dp)
798{ 802{
803 bool unhashed;
804
799 spin_lock(&state_lock); 805 spin_lock(&state_lock);
800 unhash_delegation_locked(dp); 806 unhashed = unhash_delegation_locked(dp);
801 spin_unlock(&state_lock); 807 spin_unlock(&state_lock);
802 put_clnt_odstate(dp->dl_clnt_odstate); 808 if (unhashed) {
803 nfs4_put_deleg_lease(dp->dl_stid.sc_file); 809 put_clnt_odstate(dp->dl_clnt_odstate);
804 nfs4_put_stid(&dp->dl_stid); 810 nfs4_put_deleg_lease(dp->dl_stid.sc_file);
811 nfs4_put_stid(&dp->dl_stid);
812 }
805} 813}
806 814
807static void revoke_delegation(struct nfs4_delegation *dp) 815static void revoke_delegation(struct nfs4_delegation *dp)
@@ -990,6 +998,12 @@ release_all_access(struct nfs4_ol_stateid *stp)
990 } 998 }
991} 999}
992 1000
1001static inline void nfs4_free_stateowner(struct nfs4_stateowner *sop)
1002{
1003 kfree(sop->so_owner.data);
1004 sop->so_ops->so_free(sop);
1005}
1006
993static void nfs4_put_stateowner(struct nfs4_stateowner *sop) 1007static void nfs4_put_stateowner(struct nfs4_stateowner *sop)
994{ 1008{
995 struct nfs4_client *clp = sop->so_client; 1009 struct nfs4_client *clp = sop->so_client;
@@ -1000,20 +1014,23 @@ static void nfs4_put_stateowner(struct nfs4_stateowner *sop)
1000 return; 1014 return;
1001 sop->so_ops->so_unhash(sop); 1015 sop->so_ops->so_unhash(sop);
1002 spin_unlock(&clp->cl_lock); 1016 spin_unlock(&clp->cl_lock);
1003 kfree(sop->so_owner.data); 1017 nfs4_free_stateowner(sop);
1004 sop->so_ops->so_free(sop);
1005} 1018}
1006 1019
1007static void unhash_ol_stateid(struct nfs4_ol_stateid *stp) 1020static bool unhash_ol_stateid(struct nfs4_ol_stateid *stp)
1008{ 1021{
1009 struct nfs4_file *fp = stp->st_stid.sc_file; 1022 struct nfs4_file *fp = stp->st_stid.sc_file;
1010 1023
1011 lockdep_assert_held(&stp->st_stateowner->so_client->cl_lock); 1024 lockdep_assert_held(&stp->st_stateowner->so_client->cl_lock);
1012 1025
1026 if (list_empty(&stp->st_perfile))
1027 return false;
1028
1013 spin_lock(&fp->fi_lock); 1029 spin_lock(&fp->fi_lock);
1014 list_del(&stp->st_perfile); 1030 list_del_init(&stp->st_perfile);
1015 spin_unlock(&fp->fi_lock); 1031 spin_unlock(&fp->fi_lock);
1016 list_del(&stp->st_perstateowner); 1032 list_del(&stp->st_perstateowner);
1033 return true;
1017} 1034}
1018 1035
1019static void nfs4_free_ol_stateid(struct nfs4_stid *stid) 1036static void nfs4_free_ol_stateid(struct nfs4_stid *stid)
@@ -1063,25 +1080,27 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
1063 list_add(&stp->st_locks, reaplist); 1080 list_add(&stp->st_locks, reaplist);
1064} 1081}
1065 1082
1066static void unhash_lock_stateid(struct nfs4_ol_stateid *stp) 1083static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp)
1067{ 1084{
1068 struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); 1085 struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner);
1069 1086
1070 lockdep_assert_held(&oo->oo_owner.so_client->cl_lock); 1087 lockdep_assert_held(&oo->oo_owner.so_client->cl_lock);
1071 1088
1072 list_del_init(&stp->st_locks); 1089 list_del_init(&stp->st_locks);
1073 unhash_ol_stateid(stp);
1074 nfs4_unhash_stid(&stp->st_stid); 1090 nfs4_unhash_stid(&stp->st_stid);
1091 return unhash_ol_stateid(stp);
1075} 1092}
1076 1093
1077static void release_lock_stateid(struct nfs4_ol_stateid *stp) 1094static void release_lock_stateid(struct nfs4_ol_stateid *stp)
1078{ 1095{
1079 struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); 1096 struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner);
1097 bool unhashed;
1080 1098
1081 spin_lock(&oo->oo_owner.so_client->cl_lock); 1099 spin_lock(&oo->oo_owner.so_client->cl_lock);
1082 unhash_lock_stateid(stp); 1100 unhashed = unhash_lock_stateid(stp);
1083 spin_unlock(&oo->oo_owner.so_client->cl_lock); 1101 spin_unlock(&oo->oo_owner.so_client->cl_lock);
1084 nfs4_put_stid(&stp->st_stid); 1102 if (unhashed)
1103 nfs4_put_stid(&stp->st_stid);
1085} 1104}
1086 1105
1087static void unhash_lockowner_locked(struct nfs4_lockowner *lo) 1106static void unhash_lockowner_locked(struct nfs4_lockowner *lo)
@@ -1129,7 +1148,7 @@ static void release_lockowner(struct nfs4_lockowner *lo)
1129 while (!list_empty(&lo->lo_owner.so_stateids)) { 1148 while (!list_empty(&lo->lo_owner.so_stateids)) {
1130 stp = list_first_entry(&lo->lo_owner.so_stateids, 1149 stp = list_first_entry(&lo->lo_owner.so_stateids,
1131 struct nfs4_ol_stateid, st_perstateowner); 1150 struct nfs4_ol_stateid, st_perstateowner);
1132 unhash_lock_stateid(stp); 1151 WARN_ON(!unhash_lock_stateid(stp));
1133 put_ol_stateid_locked(stp, &reaplist); 1152 put_ol_stateid_locked(stp, &reaplist);
1134 } 1153 }
1135 spin_unlock(&clp->cl_lock); 1154 spin_unlock(&clp->cl_lock);
@@ -1142,21 +1161,26 @@ static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp,
1142{ 1161{
1143 struct nfs4_ol_stateid *stp; 1162 struct nfs4_ol_stateid *stp;
1144 1163
1164 lockdep_assert_held(&open_stp->st_stid.sc_client->cl_lock);
1165
1145 while (!list_empty(&open_stp->st_locks)) { 1166 while (!list_empty(&open_stp->st_locks)) {
1146 stp = list_entry(open_stp->st_locks.next, 1167 stp = list_entry(open_stp->st_locks.next,
1147 struct nfs4_ol_stateid, st_locks); 1168 struct nfs4_ol_stateid, st_locks);
1148 unhash_lock_stateid(stp); 1169 WARN_ON(!unhash_lock_stateid(stp));
1149 put_ol_stateid_locked(stp, reaplist); 1170 put_ol_stateid_locked(stp, reaplist);
1150 } 1171 }
1151} 1172}
1152 1173
1153static void unhash_open_stateid(struct nfs4_ol_stateid *stp, 1174static bool unhash_open_stateid(struct nfs4_ol_stateid *stp,
1154 struct list_head *reaplist) 1175 struct list_head *reaplist)
1155{ 1176{
1177 bool unhashed;
1178
1156 lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); 1179 lockdep_assert_held(&stp->st_stid.sc_client->cl_lock);
1157 1180
1158 unhash_ol_stateid(stp); 1181 unhashed = unhash_ol_stateid(stp);
1159 release_open_stateid_locks(stp, reaplist); 1182 release_open_stateid_locks(stp, reaplist);
1183 return unhashed;
1160} 1184}
1161 1185
1162static void release_open_stateid(struct nfs4_ol_stateid *stp) 1186static void release_open_stateid(struct nfs4_ol_stateid *stp)
@@ -1164,8 +1188,8 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp)
1164 LIST_HEAD(reaplist); 1188 LIST_HEAD(reaplist);
1165 1189
1166 spin_lock(&stp->st_stid.sc_client->cl_lock); 1190 spin_lock(&stp->st_stid.sc_client->cl_lock);
1167 unhash_open_stateid(stp, &reaplist); 1191 if (unhash_open_stateid(stp, &reaplist))
1168 put_ol_stateid_locked(stp, &reaplist); 1192 put_ol_stateid_locked(stp, &reaplist);
1169 spin_unlock(&stp->st_stid.sc_client->cl_lock); 1193 spin_unlock(&stp->st_stid.sc_client->cl_lock);
1170 free_ol_stateid_reaplist(&reaplist); 1194 free_ol_stateid_reaplist(&reaplist);
1171} 1195}
@@ -1210,8 +1234,8 @@ static void release_openowner(struct nfs4_openowner *oo)
1210 while (!list_empty(&oo->oo_owner.so_stateids)) { 1234 while (!list_empty(&oo->oo_owner.so_stateids)) {
1211 stp = list_first_entry(&oo->oo_owner.so_stateids, 1235 stp = list_first_entry(&oo->oo_owner.so_stateids,
1212 struct nfs4_ol_stateid, st_perstateowner); 1236 struct nfs4_ol_stateid, st_perstateowner);
1213 unhash_open_stateid(stp, &reaplist); 1237 if (unhash_open_stateid(stp, &reaplist))
1214 put_ol_stateid_locked(stp, &reaplist); 1238 put_ol_stateid_locked(stp, &reaplist);
1215 } 1239 }
1216 spin_unlock(&clp->cl_lock); 1240 spin_unlock(&clp->cl_lock);
1217 free_ol_stateid_reaplist(&reaplist); 1241 free_ol_stateid_reaplist(&reaplist);
@@ -1714,7 +1738,7 @@ __destroy_client(struct nfs4_client *clp)
1714 spin_lock(&state_lock); 1738 spin_lock(&state_lock);
1715 while (!list_empty(&clp->cl_delegations)) { 1739 while (!list_empty(&clp->cl_delegations)) {
1716 dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); 1740 dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
1717 unhash_delegation_locked(dp); 1741 WARN_ON(!unhash_delegation_locked(dp));
1718 list_add(&dp->dl_recall_lru, &reaplist); 1742 list_add(&dp->dl_recall_lru, &reaplist);
1719 } 1743 }
1720 spin_unlock(&state_lock); 1744 spin_unlock(&state_lock);
@@ -1894,7 +1918,7 @@ static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn)
1894 * __force to keep sparse happy 1918 * __force to keep sparse happy
1895 */ 1919 */
1896 verf[0] = (__force __be32)get_seconds(); 1920 verf[0] = (__force __be32)get_seconds();
1897 verf[1] = (__force __be32)nn->clientid_counter; 1921 verf[1] = (__force __be32)nn->clverifier_counter++;
1898 memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data)); 1922 memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data));
1899} 1923}
1900 1924
@@ -2241,6 +2265,9 @@ static bool client_has_state(struct nfs4_client *clp)
2241 * Also note we should probably be using this in 4.0 case too. 2265 * Also note we should probably be using this in 4.0 case too.
2242 */ 2266 */
2243 return !list_empty(&clp->cl_openowners) 2267 return !list_empty(&clp->cl_openowners)
2268#ifdef CONFIG_NFSD_PNFS
2269 || !list_empty(&clp->cl_lo_states)
2270#endif
2244 || !list_empty(&clp->cl_delegations) 2271 || !list_empty(&clp->cl_delegations)
2245 || !list_empty(&clp->cl_sessions); 2272 || !list_empty(&clp->cl_sessions);
2246} 2273}
@@ -2547,11 +2574,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
2547 goto out_free_conn; 2574 goto out_free_conn;
2548 cs_slot = &conf->cl_cs_slot; 2575 cs_slot = &conf->cl_cs_slot;
2549 status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); 2576 status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
2550 if (status == nfserr_replay_cache) { 2577 if (status) {
2551 status = nfsd4_replay_create_session(cr_ses, cs_slot); 2578 if (status == nfserr_replay_cache)
2552 goto out_free_conn; 2579 status = nfsd4_replay_create_session(cr_ses, cs_slot);
2553 } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) {
2554 status = nfserr_seq_misordered;
2555 goto out_free_conn; 2580 goto out_free_conn;
2556 } 2581 }
2557 } else if (unconf) { 2582 } else if (unconf) {
@@ -3041,10 +3066,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3041 unconf = find_unconfirmed_client_by_name(&clname, nn); 3066 unconf = find_unconfirmed_client_by_name(&clname, nn);
3042 if (unconf) 3067 if (unconf)
3043 unhash_client_locked(unconf); 3068 unhash_client_locked(unconf);
3044 if (conf && same_verf(&conf->cl_verifier, &clverifier)) 3069 if (conf && same_verf(&conf->cl_verifier, &clverifier)) {
3045 /* case 1: probable callback update */ 3070 /* case 1: probable callback update */
3046 copy_clid(new, conf); 3071 copy_clid(new, conf);
3047 else /* case 4 (new client) or cases 2, 3 (client reboot): */ 3072 gen_confirm(new, nn);
3073 } else /* case 4 (new client) or cases 2, 3 (client reboot): */
3048 gen_clid(new, nn); 3074 gen_clid(new, nn);
3049 new->cl_minorversion = 0; 3075 new->cl_minorversion = 0;
3050 gen_callback(new, setclid, rqstp); 3076 gen_callback(new, setclid, rqstp);
@@ -3085,10 +3111,11 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
3085 /* 3111 /*
3086 * We try hard to give out unique clientid's, so if we get an 3112 * We try hard to give out unique clientid's, so if we get an
3087 * attempt to confirm the same clientid with a different cred, 3113 * attempt to confirm the same clientid with a different cred,
3088 * there's a bug somewhere. Let's charitably assume it's our 3114 * the client may be buggy; this should never happen.
3089 * bug. 3115 *
3116 * Nevertheless, RFC 7530 recommends INUSE for this case:
3090 */ 3117 */
3091 status = nfserr_serverfault; 3118 status = nfserr_clid_inuse;
3092 if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred)) 3119 if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred))
3093 goto out; 3120 goto out;
3094 if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred)) 3121 if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred))
@@ -3315,7 +3342,8 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
3315 hash_openowner(oo, clp, strhashval); 3342 hash_openowner(oo, clp, strhashval);
3316 ret = oo; 3343 ret = oo;
3317 } else 3344 } else
3318 nfs4_free_openowner(&oo->oo_owner); 3345 nfs4_free_stateowner(&oo->oo_owner);
3346
3319 spin_unlock(&clp->cl_lock); 3347 spin_unlock(&clp->cl_lock);
3320 return ret; 3348 return ret;
3321} 3349}
@@ -3482,6 +3510,9 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
3482{ 3510{
3483 struct nfs4_delegation *dp = cb_to_delegation(cb); 3511 struct nfs4_delegation *dp = cb_to_delegation(cb);
3484 3512
3513 if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID)
3514 return 1;
3515
3485 switch (task->tk_status) { 3516 switch (task->tk_status) {
3486 case 0: 3517 case 0:
3487 return 1; 3518 return 1;
@@ -3885,12 +3916,6 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c
3885 return status; 3916 return status;
3886} 3917}
3887 3918
3888static void
3889nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session)
3890{
3891 open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
3892}
3893
3894/* Should we give out recallable state?: */ 3919/* Should we give out recallable state?: */
3895static bool nfsd4_cb_channel_good(struct nfs4_client *clp) 3920static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
3896{ 3921{
@@ -3923,7 +3948,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag)
3923static int nfs4_setlease(struct nfs4_delegation *dp) 3948static int nfs4_setlease(struct nfs4_delegation *dp)
3924{ 3949{
3925 struct nfs4_file *fp = dp->dl_stid.sc_file; 3950 struct nfs4_file *fp = dp->dl_stid.sc_file;
3926 struct file_lock *fl, *ret; 3951 struct file_lock *fl;
3927 struct file *filp; 3952 struct file *filp;
3928 int status = 0; 3953 int status = 0;
3929 3954
@@ -3934,10 +3959,10 @@ static int nfs4_setlease(struct nfs4_delegation *dp)
3934 if (!filp) { 3959 if (!filp) {
3935 /* We should always have a readable file here */ 3960 /* We should always have a readable file here */
3936 WARN_ON_ONCE(1); 3961 WARN_ON_ONCE(1);
3962 locks_free_lock(fl);
3937 return -EBADF; 3963 return -EBADF;
3938 } 3964 }
3939 fl->fl_file = filp; 3965 fl->fl_file = filp;
3940 ret = fl;
3941 status = vfs_setlease(filp, fl->fl_type, &fl, NULL); 3966 status = vfs_setlease(filp, fl->fl_type, &fl, NULL);
3942 if (fl) 3967 if (fl)
3943 locks_free_lock(fl); 3968 locks_free_lock(fl);
@@ -4063,7 +4088,8 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
4063 case NFS4_OPEN_CLAIM_FH: 4088 case NFS4_OPEN_CLAIM_FH:
4064 /* 4089 /*
4065 * Let's not give out any delegations till everyone's 4090 * Let's not give out any delegations till everyone's
4066 * had the chance to reclaim theirs.... 4091 * had the chance to reclaim theirs, *and* until
4092 * NLM locks have all been reclaimed:
4067 */ 4093 */
4068 if (locks_in_grace(clp->net)) 4094 if (locks_in_grace(clp->net))
4069 goto out_no_deleg; 4095 goto out_no_deleg;
@@ -4209,7 +4235,7 @@ out:
4209 if (fp) 4235 if (fp)
4210 put_nfs4_file(fp); 4236 put_nfs4_file(fp);
4211 if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) 4237 if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
4212 nfs4_set_claim_prev(open, nfsd4_has_session(&resp->cstate)); 4238 open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
4213 /* 4239 /*
4214 * To finish the open response, we just need to set the rflags. 4240 * To finish the open response, we just need to set the rflags.
4215 */ 4241 */
@@ -4338,14 +4364,12 @@ nfs4_laundromat(struct nfsd_net *nn)
4338 spin_lock(&state_lock); 4364 spin_lock(&state_lock);
4339 list_for_each_safe(pos, next, &nn->del_recall_lru) { 4365 list_for_each_safe(pos, next, &nn->del_recall_lru) {
4340 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); 4366 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
4341 if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn)
4342 continue;
4343 if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) { 4367 if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
4344 t = dp->dl_time - cutoff; 4368 t = dp->dl_time - cutoff;
4345 new_timeo = min(new_timeo, t); 4369 new_timeo = min(new_timeo, t);
4346 break; 4370 break;
4347 } 4371 }
4348 unhash_delegation_locked(dp); 4372 WARN_ON(!unhash_delegation_locked(dp));
4349 list_add(&dp->dl_recall_lru, &reaplist); 4373 list_add(&dp->dl_recall_lru, &reaplist);
4350 } 4374 }
4351 spin_unlock(&state_lock); 4375 spin_unlock(&state_lock);
@@ -4440,7 +4464,7 @@ check_special_stateids(struct net *net, svc_fh *current_fh, stateid_t *stateid,
4440{ 4464{
4441 if (ONE_STATEID(stateid) && (flags & RD_STATE)) 4465 if (ONE_STATEID(stateid) && (flags & RD_STATE))
4442 return nfs_ok; 4466 return nfs_ok;
4443 else if (locks_in_grace(net)) { 4467 else if (opens_in_grace(net)) {
4444 /* Answer in remaining cases depends on existence of 4468 /* Answer in remaining cases depends on existence of
4445 * conflicting state; so we must wait out the grace period. */ 4469 * conflicting state; so we must wait out the grace period. */
4446 return nfserr_grace; 4470 return nfserr_grace;
@@ -4459,7 +4483,7 @@ check_special_stateids(struct net *net, svc_fh *current_fh, stateid_t *stateid,
4459static inline int 4483static inline int
4460grace_disallows_io(struct net *net, struct inode *inode) 4484grace_disallows_io(struct net *net, struct inode *inode)
4461{ 4485{
4462 return locks_in_grace(net) && mandatory_lock(inode); 4486 return opens_in_grace(net) && mandatory_lock(inode);
4463} 4487}
4464 4488
4465/* Returns true iff a is later than b: */ 4489/* Returns true iff a is later than b: */
@@ -4751,7 +4775,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4751 if (check_for_locks(stp->st_stid.sc_file, 4775 if (check_for_locks(stp->st_stid.sc_file,
4752 lockowner(stp->st_stateowner))) 4776 lockowner(stp->st_stateowner)))
4753 break; 4777 break;
4754 unhash_lock_stateid(stp); 4778 WARN_ON(!unhash_lock_stateid(stp));
4755 spin_unlock(&cl->cl_lock); 4779 spin_unlock(&cl->cl_lock);
4756 nfs4_put_stid(s); 4780 nfs4_put_stid(s);
4757 ret = nfs_ok; 4781 ret = nfs_ok;
@@ -4967,20 +4991,23 @@ out:
4967static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) 4991static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
4968{ 4992{
4969 struct nfs4_client *clp = s->st_stid.sc_client; 4993 struct nfs4_client *clp = s->st_stid.sc_client;
4994 bool unhashed;
4970 LIST_HEAD(reaplist); 4995 LIST_HEAD(reaplist);
4971 4996
4972 s->st_stid.sc_type = NFS4_CLOSED_STID; 4997 s->st_stid.sc_type = NFS4_CLOSED_STID;
4973 spin_lock(&clp->cl_lock); 4998 spin_lock(&clp->cl_lock);
4974 unhash_open_stateid(s, &reaplist); 4999 unhashed = unhash_open_stateid(s, &reaplist);
4975 5000
4976 if (clp->cl_minorversion) { 5001 if (clp->cl_minorversion) {
4977 put_ol_stateid_locked(s, &reaplist); 5002 if (unhashed)
5003 put_ol_stateid_locked(s, &reaplist);
4978 spin_unlock(&clp->cl_lock); 5004 spin_unlock(&clp->cl_lock);
4979 free_ol_stateid_reaplist(&reaplist); 5005 free_ol_stateid_reaplist(&reaplist);
4980 } else { 5006 } else {
4981 spin_unlock(&clp->cl_lock); 5007 spin_unlock(&clp->cl_lock);
4982 free_ol_stateid_reaplist(&reaplist); 5008 free_ol_stateid_reaplist(&reaplist);
4983 move_to_close_lru(s, clp->net); 5009 if (unhashed)
5010 move_to_close_lru(s, clp->net);
4984 } 5011 }
4985} 5012}
4986 5013
@@ -5045,9 +5072,6 @@ out:
5045 return status; 5072 return status;
5046} 5073}
5047 5074
5048
5049#define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start))
5050
5051static inline u64 5075static inline u64
5052end_offset(u64 start, u64 len) 5076end_offset(u64 start, u64 len)
5053{ 5077{
@@ -5139,8 +5163,7 @@ nevermind:
5139} 5163}
5140 5164
5141static struct nfs4_lockowner * 5165static struct nfs4_lockowner *
5142find_lockowner_str_locked(clientid_t *clid, struct xdr_netobj *owner, 5166find_lockowner_str_locked(struct nfs4_client *clp, struct xdr_netobj *owner)
5143 struct nfs4_client *clp)
5144{ 5167{
5145 unsigned int strhashval = ownerstr_hashval(owner); 5168 unsigned int strhashval = ownerstr_hashval(owner);
5146 struct nfs4_stateowner *so; 5169 struct nfs4_stateowner *so;
@@ -5158,13 +5181,12 @@ find_lockowner_str_locked(clientid_t *clid, struct xdr_netobj *owner,
5158} 5181}
5159 5182
5160static struct nfs4_lockowner * 5183static struct nfs4_lockowner *
5161find_lockowner_str(clientid_t *clid, struct xdr_netobj *owner, 5184find_lockowner_str(struct nfs4_client *clp, struct xdr_netobj *owner)
5162 struct nfs4_client *clp)
5163{ 5185{
5164 struct nfs4_lockowner *lo; 5186 struct nfs4_lockowner *lo;
5165 5187
5166 spin_lock(&clp->cl_lock); 5188 spin_lock(&clp->cl_lock);
5167 lo = find_lockowner_str_locked(clid, owner, clp); 5189 lo = find_lockowner_str_locked(clp, owner);
5168 spin_unlock(&clp->cl_lock); 5190 spin_unlock(&clp->cl_lock);
5169 return lo; 5191 return lo;
5170} 5192}
@@ -5208,14 +5230,14 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
5208 lo->lo_owner.so_seqid = lock->lk_new_lock_seqid; 5230 lo->lo_owner.so_seqid = lock->lk_new_lock_seqid;
5209 lo->lo_owner.so_ops = &lockowner_ops; 5231 lo->lo_owner.so_ops = &lockowner_ops;
5210 spin_lock(&clp->cl_lock); 5232 spin_lock(&clp->cl_lock);
5211 ret = find_lockowner_str_locked(&clp->cl_clientid, 5233 ret = find_lockowner_str_locked(clp, &lock->lk_new_owner);
5212 &lock->lk_new_owner, clp);
5213 if (ret == NULL) { 5234 if (ret == NULL) {
5214 list_add(&lo->lo_owner.so_strhash, 5235 list_add(&lo->lo_owner.so_strhash,
5215 &clp->cl_ownerstr_hashtbl[strhashval]); 5236 &clp->cl_ownerstr_hashtbl[strhashval]);
5216 ret = lo; 5237 ret = lo;
5217 } else 5238 } else
5218 nfs4_free_lockowner(&lo->lo_owner); 5239 nfs4_free_stateowner(&lo->lo_owner);
5240
5219 spin_unlock(&clp->cl_lock); 5241 spin_unlock(&clp->cl_lock);
5220 return ret; 5242 return ret;
5221} 5243}
@@ -5298,8 +5320,8 @@ find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi,
5298static int 5320static int
5299check_lock_length(u64 offset, u64 length) 5321check_lock_length(u64 offset, u64 length)
5300{ 5322{
5301 return ((length == 0) || ((length != NFS4_MAX_UINT64) && 5323 return ((length == 0) || ((length != NFS4_MAX_UINT64) &&
5302 LOFF_OVERFLOW(offset, length))); 5324 (length > ~offset)));
5303} 5325}
5304 5326
5305static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) 5327static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access)
@@ -5328,9 +5350,9 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
5328 struct nfs4_lockowner *lo; 5350 struct nfs4_lockowner *lo;
5329 unsigned int strhashval; 5351 unsigned int strhashval;
5330 5352
5331 lo = find_lockowner_str(&cl->cl_clientid, &lock->v.new.owner, cl); 5353 lo = find_lockowner_str(cl, &lock->lk_new_owner);
5332 if (!lo) { 5354 if (!lo) {
5333 strhashval = ownerstr_hashval(&lock->v.new.owner); 5355 strhashval = ownerstr_hashval(&lock->lk_new_owner);
5334 lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); 5356 lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock);
5335 if (lo == NULL) 5357 if (lo == NULL)
5336 return nfserr_jukebox; 5358 return nfserr_jukebox;
@@ -5391,7 +5413,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5391 if (lock->lk_is_new) { 5413 if (lock->lk_is_new) {
5392 if (nfsd4_has_session(cstate)) 5414 if (nfsd4_has_session(cstate))
5393 /* See rfc 5661 18.10.3: given clientid is ignored: */ 5415 /* See rfc 5661 18.10.3: given clientid is ignored: */
5394 memcpy(&lock->v.new.clientid, 5416 memcpy(&lock->lk_new_clientid,
5395 &cstate->session->se_client->cl_clientid, 5417 &cstate->session->se_client->cl_clientid,
5396 sizeof(clientid_t)); 5418 sizeof(clientid_t));
5397 5419
@@ -5409,7 +5431,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5409 open_sop = openowner(open_stp->st_stateowner); 5431 open_sop = openowner(open_stp->st_stateowner);
5410 status = nfserr_bad_stateid; 5432 status = nfserr_bad_stateid;
5411 if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, 5433 if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
5412 &lock->v.new.clientid)) 5434 &lock->lk_new_clientid))
5413 goto out; 5435 goto out;
5414 status = lookup_or_create_lock_state(cstate, open_stp, lock, 5436 status = lookup_or_create_lock_state(cstate, open_stp, lock,
5415 &lock_stp, &new); 5437 &lock_stp, &new);
@@ -5603,8 +5625,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5603 goto out; 5625 goto out;
5604 } 5626 }
5605 5627
5606 lo = find_lockowner_str(&lockt->lt_clientid, &lockt->lt_owner, 5628 lo = find_lockowner_str(cstate->clp, &lockt->lt_owner);
5607 cstate->clp);
5608 if (lo) 5629 if (lo)
5609 file_lock->fl_owner = (fl_owner_t)lo; 5630 file_lock->fl_owner = (fl_owner_t)lo;
5610 file_lock->fl_pid = current->tgid; 5631 file_lock->fl_pid = current->tgid;
@@ -6019,7 +6040,7 @@ nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst,
6019 6040
6020static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, 6041static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max,
6021 struct list_head *collect, 6042 struct list_head *collect,
6022 void (*func)(struct nfs4_ol_stateid *)) 6043 bool (*func)(struct nfs4_ol_stateid *))
6023{ 6044{
6024 struct nfs4_openowner *oop; 6045 struct nfs4_openowner *oop;
6025 struct nfs4_ol_stateid *stp, *st_next; 6046 struct nfs4_ol_stateid *stp, *st_next;
@@ -6033,9 +6054,9 @@ static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max,
6033 list_for_each_entry_safe(lst, lst_next, 6054 list_for_each_entry_safe(lst, lst_next,
6034 &stp->st_locks, st_locks) { 6055 &stp->st_locks, st_locks) {
6035 if (func) { 6056 if (func) {
6036 func(lst); 6057 if (func(lst))
6037 nfsd_inject_add_lock_to_list(lst, 6058 nfsd_inject_add_lock_to_list(lst,
6038 collect); 6059 collect);
6039 } 6060 }
6040 ++count; 6061 ++count;
6041 /* 6062 /*
@@ -6305,7 +6326,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
6305 continue; 6326 continue;
6306 6327
6307 atomic_inc(&clp->cl_refcount); 6328 atomic_inc(&clp->cl_refcount);
6308 unhash_delegation_locked(dp); 6329 WARN_ON(!unhash_delegation_locked(dp));
6309 list_add(&dp->dl_recall_lru, victims); 6330 list_add(&dp->dl_recall_lru, victims);
6310 } 6331 }
6311 ++count; 6332 ++count;
@@ -6584,6 +6605,7 @@ nfs4_state_start_net(struct net *net)
6584 return ret; 6605 return ret;
6585 nn->boot_time = get_seconds(); 6606 nn->boot_time = get_seconds();
6586 nn->grace_ended = false; 6607 nn->grace_ended = false;
6608 nn->nfsd4_manager.block_opens = true;
6587 locks_start_grace(net, &nn->nfsd4_manager); 6609 locks_start_grace(net, &nn->nfsd4_manager);
6588 nfsd4_client_tracking_init(net); 6610 nfsd4_client_tracking_init(net);
6589 printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", 6611 printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n",
@@ -6602,7 +6624,7 @@ nfs4_state_start(void)
6602 ret = set_callback_cred(); 6624 ret = set_callback_cred();
6603 if (ret) 6625 if (ret)
6604 return -ENOMEM; 6626 return -ENOMEM;
6605 laundry_wq = create_singlethread_workqueue("nfsd4"); 6627 laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4");
6606 if (laundry_wq == NULL) { 6628 if (laundry_wq == NULL) {
6607 ret = -ENOMEM; 6629 ret = -ENOMEM;
6608 goto out_recovery; 6630 goto out_recovery;
@@ -6635,7 +6657,7 @@ nfs4_state_shutdown_net(struct net *net)
6635 spin_lock(&state_lock); 6657 spin_lock(&state_lock);
6636 list_for_each_safe(pos, next, &nn->del_recall_lru) { 6658 list_for_each_safe(pos, next, &nn->del_recall_lru) {
6637 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); 6659 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
6638 unhash_delegation_locked(dp); 6660 WARN_ON(!unhash_delegation_locked(dp));
6639 list_add(&dp->dl_recall_lru, &reaplist); 6661 list_add(&dp->dl_recall_lru, &reaplist);
6640 } 6662 }
6641 spin_unlock(&state_lock); 6663 spin_unlock(&state_lock);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 75e0563c09d1..51c9e9ca39a4 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2140,6 +2140,27 @@ nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp,
2140 return nfsd4_encode_user(xdr, rqstp, ace->who_uid); 2140 return nfsd4_encode_user(xdr, rqstp, ace->who_uid);
2141} 2141}
2142 2142
2143static inline __be32
2144nfsd4_encode_layout_type(struct xdr_stream *xdr, enum pnfs_layouttype layout_type)
2145{
2146 __be32 *p;
2147
2148 if (layout_type) {
2149 p = xdr_reserve_space(xdr, 8);
2150 if (!p)
2151 return nfserr_resource;
2152 *p++ = cpu_to_be32(1);
2153 *p++ = cpu_to_be32(layout_type);
2154 } else {
2155 p = xdr_reserve_space(xdr, 4);
2156 if (!p)
2157 return nfserr_resource;
2158 *p++ = cpu_to_be32(0);
2159 }
2160
2161 return 0;
2162}
2163
2143#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ 2164#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \
2144 FATTR4_WORD0_RDATTR_ERROR) 2165 FATTR4_WORD0_RDATTR_ERROR)
2145#define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID 2166#define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID
@@ -2205,6 +2226,39 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat)
2205 return err; 2226 return err;
2206} 2227}
2207 2228
2229static __be32
2230nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
2231{
2232 __be32 *p;
2233
2234 if (bmval2) {
2235 p = xdr_reserve_space(xdr, 16);
2236 if (!p)
2237 goto out_resource;
2238 *p++ = cpu_to_be32(3);
2239 *p++ = cpu_to_be32(bmval0);
2240 *p++ = cpu_to_be32(bmval1);
2241 *p++ = cpu_to_be32(bmval2);
2242 } else if (bmval1) {
2243 p = xdr_reserve_space(xdr, 12);
2244 if (!p)
2245 goto out_resource;
2246 *p++ = cpu_to_be32(2);
2247 *p++ = cpu_to_be32(bmval0);
2248 *p++ = cpu_to_be32(bmval1);
2249 } else {
2250 p = xdr_reserve_space(xdr, 8);
2251 if (!p)
2252 goto out_resource;
2253 *p++ = cpu_to_be32(1);
2254 *p++ = cpu_to_be32(bmval0);
2255 }
2256
2257 return 0;
2258out_resource:
2259 return nfserr_resource;
2260}
2261
2208/* 2262/*
2209 * Note: @fhp can be NULL; in this case, we might have to compose the filehandle 2263 * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
2210 * ourselves. 2264 * ourselves.
@@ -2301,28 +2355,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
2301 } 2355 }
2302#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ 2356#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
2303 2357
2304 if (bmval2) { 2358 status = nfsd4_encode_bitmap(xdr, bmval0, bmval1, bmval2);
2305 p = xdr_reserve_space(xdr, 16); 2359 if (status)
2306 if (!p) 2360 goto out;
2307 goto out_resource;
2308 *p++ = cpu_to_be32(3);
2309 *p++ = cpu_to_be32(bmval0);
2310 *p++ = cpu_to_be32(bmval1);
2311 *p++ = cpu_to_be32(bmval2);
2312 } else if (bmval1) {
2313 p = xdr_reserve_space(xdr, 12);
2314 if (!p)
2315 goto out_resource;
2316 *p++ = cpu_to_be32(2);
2317 *p++ = cpu_to_be32(bmval0);
2318 *p++ = cpu_to_be32(bmval1);
2319 } else {
2320 p = xdr_reserve_space(xdr, 8);
2321 if (!p)
2322 goto out_resource;
2323 *p++ = cpu_to_be32(1);
2324 *p++ = cpu_to_be32(bmval0);
2325 }
2326 2361
2327 attrlen_offset = xdr->buf->len; 2362 attrlen_offset = xdr->buf->len;
2328 p = xdr_reserve_space(xdr, 4); 2363 p = xdr_reserve_space(xdr, 4);
@@ -2675,6 +2710,9 @@ out_acl:
2675 *p++ = cpu_to_be32(stat.mtime.tv_nsec); 2710 *p++ = cpu_to_be32(stat.mtime.tv_nsec);
2676 } 2711 }
2677 if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { 2712 if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
2713 struct kstat parent_stat;
2714 u64 ino = stat.ino;
2715
2678 p = xdr_reserve_space(xdr, 8); 2716 p = xdr_reserve_space(xdr, 8);
2679 if (!p) 2717 if (!p)
2680 goto out_resource; 2718 goto out_resource;
@@ -2683,25 +2721,25 @@ out_acl:
2683 * and this is the root of a cross-mounted filesystem. 2721 * and this is the root of a cross-mounted filesystem.
2684 */ 2722 */
2685 if (ignore_crossmnt == 0 && 2723 if (ignore_crossmnt == 0 &&
2686 dentry == exp->ex_path.mnt->mnt_root) 2724 dentry == exp->ex_path.mnt->mnt_root) {
2687 get_parent_attributes(exp, &stat); 2725 err = get_parent_attributes(exp, &parent_stat);
2688 p = xdr_encode_hyper(p, stat.ino); 2726 if (err)
2727 goto out_nfserr;
2728 ino = parent_stat.ino;
2729 }
2730 p = xdr_encode_hyper(p, ino);
2689 } 2731 }
2690#ifdef CONFIG_NFSD_PNFS 2732#ifdef CONFIG_NFSD_PNFS
2691 if ((bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) || 2733 if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) {
2692 (bmval2 & FATTR4_WORD2_LAYOUT_TYPES)) { 2734 status = nfsd4_encode_layout_type(xdr, exp->ex_layout_type);
2693 if (exp->ex_layout_type) { 2735 if (status)
2694 p = xdr_reserve_space(xdr, 8); 2736 goto out;
2695 if (!p) 2737 }
2696 goto out_resource; 2738
2697 *p++ = cpu_to_be32(1); 2739 if (bmval2 & FATTR4_WORD2_LAYOUT_TYPES) {
2698 *p++ = cpu_to_be32(exp->ex_layout_type); 2740 status = nfsd4_encode_layout_type(xdr, exp->ex_layout_type);
2699 } else { 2741 if (status)
2700 p = xdr_reserve_space(xdr, 4); 2742 goto out;
2701 if (!p)
2702 goto out_resource;
2703 *p++ = cpu_to_be32(0);
2704 }
2705 } 2743 }
2706 2744
2707 if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) { 2745 if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) {
@@ -2711,21 +2749,20 @@ out_acl:
2711 *p++ = cpu_to_be32(stat.blksize); 2749 *p++ = cpu_to_be32(stat.blksize);
2712 } 2750 }
2713#endif /* CONFIG_NFSD_PNFS */ 2751#endif /* CONFIG_NFSD_PNFS */
2752 if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
2753 status = nfsd4_encode_bitmap(xdr, NFSD_SUPPATTR_EXCLCREAT_WORD0,
2754 NFSD_SUPPATTR_EXCLCREAT_WORD1,
2755 NFSD_SUPPATTR_EXCLCREAT_WORD2);
2756 if (status)
2757 goto out;
2758 }
2759
2714 if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { 2760 if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
2715 status = nfsd4_encode_security_label(xdr, rqstp, context, 2761 status = nfsd4_encode_security_label(xdr, rqstp, context,
2716 contextlen); 2762 contextlen);
2717 if (status) 2763 if (status)
2718 goto out; 2764 goto out;
2719 } 2765 }
2720 if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
2721 p = xdr_reserve_space(xdr, 16);
2722 if (!p)
2723 goto out_resource;
2724 *p++ = cpu_to_be32(3);
2725 *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
2726 *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD1);
2727 *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD2);
2728 }
2729 2766
2730 attrlen = htonl(xdr->buf->len - attrlen_offset - 4); 2767 attrlen = htonl(xdr->buf->len - attrlen_offset - 4);
2731 write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4); 2768 write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4);
@@ -3044,13 +3081,12 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
3044 __be32 *p; 3081 __be32 *p;
3045 3082
3046 if (!nfserr) { 3083 if (!nfserr) {
3047 p = xdr_reserve_space(xdr, 32); 3084 p = xdr_reserve_space(xdr, 20);
3048 if (!p) 3085 if (!p)
3049 return nfserr_resource; 3086 return nfserr_resource;
3050 p = encode_cinfo(p, &create->cr_cinfo); 3087 encode_cinfo(p, &create->cr_cinfo);
3051 *p++ = cpu_to_be32(2); 3088 nfserr = nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
3052 *p++ = cpu_to_be32(create->cr_bmval[0]); 3089 create->cr_bmval[1], create->cr_bmval[2]);
3053 *p++ = cpu_to_be32(create->cr_bmval[1]);
3054 } 3090 }
3055 return nfserr; 3091 return nfserr;
3056} 3092}
@@ -3190,16 +3226,22 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
3190 nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid); 3226 nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid);
3191 if (nfserr) 3227 if (nfserr)
3192 goto out; 3228 goto out;
3193 p = xdr_reserve_space(xdr, 40); 3229 p = xdr_reserve_space(xdr, 24);
3194 if (!p) 3230 if (!p)
3195 return nfserr_resource; 3231 return nfserr_resource;
3196 p = encode_cinfo(p, &open->op_cinfo); 3232 p = encode_cinfo(p, &open->op_cinfo);
3197 *p++ = cpu_to_be32(open->op_rflags); 3233 *p++ = cpu_to_be32(open->op_rflags);
3198 *p++ = cpu_to_be32(2);
3199 *p++ = cpu_to_be32(open->op_bmval[0]);
3200 *p++ = cpu_to_be32(open->op_bmval[1]);
3201 *p++ = cpu_to_be32(open->op_delegate_type);
3202 3234
3235 nfserr = nfsd4_encode_bitmap(xdr, open->op_bmval[0], open->op_bmval[1],
3236 open->op_bmval[2]);
3237 if (nfserr)
3238 goto out;
3239
3240 p = xdr_reserve_space(xdr, 4);
3241 if (!p)
3242 return nfserr_resource;
3243
3244 *p++ = cpu_to_be32(open->op_delegate_type);
3203 switch (open->op_delegate_type) { 3245 switch (open->op_delegate_type) {
3204 case NFS4_OPEN_DELEGATE_NONE: 3246 case NFS4_OPEN_DELEGATE_NONE:
3205 break; 3247 break;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 9277cc91c21b..ad4e2377dd63 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -391,6 +391,14 @@ static int nfsd_get_default_max_blksize(void)
391 return ret; 391 return ret;
392} 392}
393 393
394static struct svc_serv_ops nfsd_thread_sv_ops = {
395 .svo_shutdown = nfsd_last_thread,
396 .svo_function = nfsd,
397 .svo_enqueue_xprt = svc_xprt_do_enqueue,
398 .svo_setup = svc_set_num_threads,
399 .svo_module = THIS_MODULE,
400};
401
394int nfsd_create_serv(struct net *net) 402int nfsd_create_serv(struct net *net)
395{ 403{
396 int error; 404 int error;
@@ -405,7 +413,7 @@ int nfsd_create_serv(struct net *net)
405 nfsd_max_blksize = nfsd_get_default_max_blksize(); 413 nfsd_max_blksize = nfsd_get_default_max_blksize();
406 nfsd_reset_versions(); 414 nfsd_reset_versions();
407 nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, 415 nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
408 nfsd_last_thread, nfsd, THIS_MODULE); 416 &nfsd_thread_sv_ops);
409 if (nn->nfsd_serv == NULL) 417 if (nn->nfsd_serv == NULL)
410 return -ENOMEM; 418 return -ENOMEM;
411 419
@@ -500,8 +508,8 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
500 /* apply the new numbers */ 508 /* apply the new numbers */
501 svc_get(nn->nfsd_serv); 509 svc_get(nn->nfsd_serv);
502 for (i = 0; i < n; i++) { 510 for (i = 0; i < n; i++) {
503 err = svc_set_num_threads(nn->nfsd_serv, &nn->nfsd_serv->sv_pools[i], 511 err = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv,
504 nthreads[i]); 512 &nn->nfsd_serv->sv_pools[i], nthreads[i]);
505 if (err) 513 if (err)
506 break; 514 break;
507 } 515 }
@@ -540,7 +548,8 @@ nfsd_svc(int nrservs, struct net *net)
540 error = nfsd_startup_net(nrservs, net); 548 error = nfsd_startup_net(nrservs, net);
541 if (error) 549 if (error)
542 goto out_destroy; 550 goto out_destroy;
543 error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs); 551 error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv,
552 NULL, nrservs);
544 if (error) 553 if (error)
545 goto out_shutdown; 554 goto out_shutdown;
546 /* We are holding a reference to nn->nfsd_serv which 555 /* We are holding a reference to nn->nfsd_serv which
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 4874ce515fc1..583ffc13cae2 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -67,8 +67,8 @@ struct nfsd4_callback {
67 struct rpc_message cb_msg; 67 struct rpc_message cb_msg;
68 struct nfsd4_callback_ops *cb_ops; 68 struct nfsd4_callback_ops *cb_ops;
69 struct work_struct cb_work; 69 struct work_struct cb_work;
70 int cb_seq_status;
70 int cb_status; 71 int cb_status;
71 bool cb_update_seq_nr;
72 bool cb_need_restart; 72 bool cb_need_restart;
73}; 73};
74 74
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index b5e077a6e7d4..45c04979e7b3 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1249,12 +1249,6 @@ out_nfserr:
1249 1249
1250#ifdef CONFIG_NFSD_V3 1250#ifdef CONFIG_NFSD_V3
1251 1251
1252static inline int nfsd_create_is_exclusive(int createmode)
1253{
1254 return createmode == NFS3_CREATE_EXCLUSIVE
1255 || createmode == NFS4_CREATE_EXCLUSIVE4_1;
1256}
1257
1258/* 1252/*
1259 * NFSv3 and NFSv4 version of nfsd_create 1253 * NFSv3 and NFSv4 version of nfsd_create
1260 */ 1254 */
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 5be875e3e638..fee2451ae248 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -131,4 +131,10 @@ static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat)
131 return nfserrno(vfs_getattr(&p, stat)); 131 return nfserrno(vfs_getattr(&p, stat));
132} 132}
133 133
134static inline int nfsd_create_is_exclusive(int createmode)
135{
136 return createmode == NFS3_CREATE_EXCLUSIVE
137 || createmode == NFS4_CREATE_EXCLUSIVE4_1;
138}
139
134#endif /* LINUX_NFSD_VFS_H */ 140#endif /* LINUX_NFSD_VFS_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 864203c10dbc..dc634a55163b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -943,12 +943,18 @@ struct lock_manager_operations {
943 943
944struct lock_manager { 944struct lock_manager {
945 struct list_head list; 945 struct list_head list;
946 /*
947 * NFSv4 and up also want opens blocked during the grace period;
948 * NLM doesn't care:
949 */
950 bool block_opens;
946}; 951};
947 952
948struct net; 953struct net;
949void locks_start_grace(struct net *, struct lock_manager *); 954void locks_start_grace(struct net *, struct lock_manager *);
950void locks_end_grace(struct lock_manager *); 955void locks_end_grace(struct lock_manager *);
951int locks_in_grace(struct net *); 956int locks_in_grace(struct net *);
957int opens_in_grace(struct net *);
952 958
953/* that will die - we need it for nfs_lock_info */ 959/* that will die - we need it for nfs_lock_info */
954#include <linux/nfs_fs_i.h> 960#include <linux/nfs_fs_i.h>
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 437ddb6c4aef..03d3b4c92d9f 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -46,7 +46,7 @@
46 * 46 *
47 */ 47 */
48struct cache_head { 48struct cache_head {
49 struct cache_head * next; 49 struct hlist_node cache_list;
50 time_t expiry_time; /* After time time, don't use the data */ 50 time_t expiry_time; /* After time time, don't use the data */
51 time_t last_refresh; /* If CACHE_PENDING, this is when upcall 51 time_t last_refresh; /* If CACHE_PENDING, this is when upcall
52 * was sent, else this is when update was received 52 * was sent, else this is when update was received
@@ -73,7 +73,7 @@ struct cache_detail_pipefs {
73struct cache_detail { 73struct cache_detail {
74 struct module * owner; 74 struct module * owner;
75 int hash_size; 75 int hash_size;
76 struct cache_head ** hash_table; 76 struct hlist_head * hash_table;
77 rwlock_t hash_lock; 77 rwlock_t hash_lock;
78 78
79 atomic_t inuse; /* active user-space update or lookup */ 79 atomic_t inuse; /* active user-space update or lookup */
@@ -224,6 +224,11 @@ extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *,
224 umode_t, struct cache_detail *); 224 umode_t, struct cache_detail *);
225extern void sunrpc_cache_unregister_pipefs(struct cache_detail *); 225extern void sunrpc_cache_unregister_pipefs(struct cache_detail *);
226 226
227/* Must store cache_detail in seq_file->private if using next three functions */
228extern void *cache_seq_start(struct seq_file *file, loff_t *pos);
229extern void *cache_seq_next(struct seq_file *file, void *p, loff_t *pos);
230extern void cache_seq_stop(struct seq_file *file, void *p);
231
227extern void qword_add(char **bpp, int *lp, char *str); 232extern void qword_add(char **bpp, int *lp, char *str);
228extern void qword_addhex(char **bpp, int *lp, char *buf, int blen); 233extern void qword_addhex(char **bpp, int *lp, char *buf, int blen);
229extern int qword_get(char **bpp, char *dest, int bufsize); 234extern int qword_get(char **bpp, char *dest, int bufsize);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index fae6fb947fc8..cc0fc712bb82 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -19,11 +19,6 @@
19#include <linux/wait.h> 19#include <linux/wait.h>
20#include <linux/mm.h> 20#include <linux/mm.h>
21 21
22/*
23 * This is the RPC server thread function prototype
24 */
25typedef int (*svc_thread_fn)(void *);
26
27/* statistics for svc_pool structures */ 22/* statistics for svc_pool structures */
28struct svc_pool_stats { 23struct svc_pool_stats {
29 atomic_long_t packets; 24 atomic_long_t packets;
@@ -54,6 +49,25 @@ struct svc_pool {
54 unsigned long sp_flags; 49 unsigned long sp_flags;
55} ____cacheline_aligned_in_smp; 50} ____cacheline_aligned_in_smp;
56 51
52struct svc_serv;
53
54struct svc_serv_ops {
55 /* Callback to use when last thread exits. */
56 void (*svo_shutdown)(struct svc_serv *, struct net *);
57
58 /* function for service threads to run */
59 int (*svo_function)(void *);
60
61 /* queue up a transport for servicing */
62 void (*svo_enqueue_xprt)(struct svc_xprt *);
63
64 /* set up thread (or whatever) execution context */
65 int (*svo_setup)(struct svc_serv *, struct svc_pool *, int);
66
67 /* optional module to count when adding threads (pooled svcs only) */
68 struct module *svo_module;
69};
70
57/* 71/*
58 * RPC service. 72 * RPC service.
59 * 73 *
@@ -85,16 +99,7 @@ struct svc_serv {
85 99
86 unsigned int sv_nrpools; /* number of thread pools */ 100 unsigned int sv_nrpools; /* number of thread pools */
87 struct svc_pool * sv_pools; /* array of thread pools */ 101 struct svc_pool * sv_pools; /* array of thread pools */
88 102 struct svc_serv_ops *sv_ops; /* server operations */
89 void (*sv_shutdown)(struct svc_serv *serv,
90 struct net *net);
91 /* Callback to use when last thread
92 * exits.
93 */
94
95 struct module * sv_module; /* optional module to count when
96 * adding threads */
97 svc_thread_fn sv_function; /* main function for threads */
98#if defined(CONFIG_SUNRPC_BACKCHANNEL) 103#if defined(CONFIG_SUNRPC_BACKCHANNEL)
99 struct list_head sv_cb_list; /* queue for callback requests 104 struct list_head sv_cb_list; /* queue for callback requests
100 * that arrive over the same 105 * that arrive over the same
@@ -423,19 +428,46 @@ struct svc_procedure {
423}; 428};
424 429
425/* 430/*
431 * Mode for mapping cpus to pools.
432 */
433enum {
434 SVC_POOL_AUTO = -1, /* choose one of the others */
435 SVC_POOL_GLOBAL, /* no mapping, just a single global pool
436 * (legacy & UP mode) */
437 SVC_POOL_PERCPU, /* one pool per cpu */
438 SVC_POOL_PERNODE /* one pool per numa node */
439};
440
441struct svc_pool_map {
442 int count; /* How many svc_servs use us */
443 int mode; /* Note: int not enum to avoid
444 * warnings about "enumeration value
445 * not handled in switch" */
446 unsigned int npools;
447 unsigned int *pool_to; /* maps pool id to cpu or node */
448 unsigned int *to_pool; /* maps cpu or node to pool id */
449};
450
451extern struct svc_pool_map svc_pool_map;
452
453/*
426 * Function prototypes. 454 * Function prototypes.
427 */ 455 */
428int svc_rpcb_setup(struct svc_serv *serv, struct net *net); 456int svc_rpcb_setup(struct svc_serv *serv, struct net *net);
429void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); 457void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net);
430int svc_bind(struct svc_serv *serv, struct net *net); 458int svc_bind(struct svc_serv *serv, struct net *net);
431struct svc_serv *svc_create(struct svc_program *, unsigned int, 459struct svc_serv *svc_create(struct svc_program *, unsigned int,
432 void (*shutdown)(struct svc_serv *, struct net *net)); 460 struct svc_serv_ops *);
461struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv,
462 struct svc_pool *pool, int node);
433struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, 463struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
434 struct svc_pool *pool, int node); 464 struct svc_pool *pool, int node);
465void svc_rqst_free(struct svc_rqst *);
435void svc_exit_thread(struct svc_rqst *); 466void svc_exit_thread(struct svc_rqst *);
467unsigned int svc_pool_map_get(void);
468void svc_pool_map_put(void);
436struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, 469struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
437 void (*shutdown)(struct svc_serv *, struct net *net), 470 struct svc_serv_ops *);
438 svc_thread_fn, struct module *);
439int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); 471int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
440int svc_pool_stats_open(struct svc_serv *serv, struct file *file); 472int svc_pool_stats_open(struct svc_serv *serv, struct file *file);
441void svc_destroy(struct svc_serv *); 473void svc_destroy(struct svc_serv *);
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index cb94ee4181d4..d5ee6d8b7c58 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -172,13 +172,6 @@ struct svcxprt_rdma {
172#define RDMAXPRT_SQ_PENDING 2 172#define RDMAXPRT_SQ_PENDING 2
173#define RDMAXPRT_CONN_PENDING 3 173#define RDMAXPRT_CONN_PENDING 3
174 174
175#define RPCRDMA_MAX_SVC_SEGS (64) /* server max scatter/gather */
176#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT)
177#define RPCRDMA_MAXPAYLOAD RPCSVC_MAXPAYLOAD
178#else
179#define RPCRDMA_MAXPAYLOAD (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT)
180#endif
181
182#define RPCRDMA_LISTEN_BACKLOG 10 175#define RPCRDMA_LISTEN_BACKLOG 10
183/* The default ORD value is based on two outstanding full-size writes with a 176/* The default ORD value is based on two outstanding full-size writes with a
184 * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ 177 * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */
@@ -187,6 +180,8 @@ struct svcxprt_rdma {
187#define RPCRDMA_MAX_REQUESTS 32 180#define RPCRDMA_MAX_REQUESTS 32
188#define RPCRDMA_MAX_REQ_SIZE 4096 181#define RPCRDMA_MAX_REQ_SIZE 4096
189 182
183#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
184
190/* svc_rdma_marshal.c */ 185/* svc_rdma_marshal.c */
191extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); 186extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
192extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, 187extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
@@ -213,6 +208,8 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
213 208
214/* svc_rdma_sendto.c */ 209/* svc_rdma_sendto.c */
215extern int svc_rdma_sendto(struct svc_rqst *); 210extern int svc_rdma_sendto(struct svc_rqst *);
211extern struct rpcrdma_read_chunk *
212 svc_rdma_get_read_chunk(struct rpcrdma_msg *);
216 213
217/* svc_rdma_transport.c */ 214/* svc_rdma_transport.c */
218extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); 215extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
@@ -225,7 +222,6 @@ extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
225extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt); 222extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt);
226extern struct svc_rdma_req_map *svc_rdma_get_req_map(void); 223extern struct svc_rdma_req_map *svc_rdma_get_req_map(void);
227extern void svc_rdma_put_req_map(struct svc_rdma_req_map *); 224extern void svc_rdma_put_req_map(struct svc_rdma_req_map *);
228extern int svc_rdma_fastreg(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *);
229extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); 225extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *);
230extern void svc_rdma_put_frmr(struct svcxprt_rdma *, 226extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
231 struct svc_rdma_fastreg_mr *); 227 struct svc_rdma_fastreg_mr *);
@@ -238,83 +234,4 @@ extern void svc_rdma_prep_reply_hdr(struct svc_rqst *);
238extern int svc_rdma_init(void); 234extern int svc_rdma_init(void);
239extern void svc_rdma_cleanup(void); 235extern void svc_rdma_cleanup(void);
240 236
241/*
242 * Returns the address of the first read chunk or <nul> if no read chunk is
243 * present
244 */
245static inline struct rpcrdma_read_chunk *
246svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp)
247{
248 struct rpcrdma_read_chunk *ch =
249 (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
250
251 if (ch->rc_discrim == 0)
252 return NULL;
253
254 return ch;
255}
256
257/*
258 * Returns the address of the first read write array element or <nul> if no
259 * write array list is present
260 */
261static inline struct rpcrdma_write_array *
262svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp)
263{
264 if (rmsgp->rm_body.rm_chunks[0] != 0
265 || rmsgp->rm_body.rm_chunks[1] == 0)
266 return NULL;
267
268 return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1];
269}
270
271/*
272 * Returns the address of the first reply array element or <nul> if no
273 * reply array is present
274 */
275static inline struct rpcrdma_write_array *
276svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp)
277{
278 struct rpcrdma_read_chunk *rch;
279 struct rpcrdma_write_array *wr_ary;
280 struct rpcrdma_write_array *rp_ary;
281
282 /* XXX: Need to fix when reply list may occur with read-list and/or
283 * write list */
284 if (rmsgp->rm_body.rm_chunks[0] != 0 ||
285 rmsgp->rm_body.rm_chunks[1] != 0)
286 return NULL;
287
288 rch = svc_rdma_get_read_chunk(rmsgp);
289 if (rch) {
290 while (rch->rc_discrim)
291 rch++;
292
293 /* The reply list follows an empty write array located
294 * at 'rc_position' here. The reply array is at rc_target.
295 */
296 rp_ary = (struct rpcrdma_write_array *)&rch->rc_target;
297
298 goto found_it;
299 }
300
301 wr_ary = svc_rdma_get_write_array(rmsgp);
302 if (wr_ary) {
303 rp_ary = (struct rpcrdma_write_array *)
304 &wr_ary->
305 wc_array[ntohl(wr_ary->wc_nchunks)].wc_target.rs_length;
306
307 goto found_it;
308 }
309
310 /* No read list, no write list */
311 rp_ary = (struct rpcrdma_write_array *)
312 &rmsgp->rm_body.rm_chunks[2];
313
314 found_it:
315 if (rp_ary->wc_discrim == 0)
316 return NULL;
317
318 return rp_ary;
319}
320#endif 237#endif
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 79f6f8f3dc0a..78512cfe1fe6 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -116,6 +116,7 @@ void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *,
116 struct svc_serv *); 116 struct svc_serv *);
117int svc_create_xprt(struct svc_serv *, const char *, struct net *, 117int svc_create_xprt(struct svc_serv *, const char *, struct net *,
118 const int, const unsigned short, int); 118 const int, const unsigned short, int);
119void svc_xprt_do_enqueue(struct svc_xprt *xprt);
119void svc_xprt_enqueue(struct svc_xprt *xprt); 120void svc_xprt_enqueue(struct svc_xprt *xprt);
120void svc_xprt_put(struct svc_xprt *xprt); 121void svc_xprt_put(struct svc_xprt *xprt);
121void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt); 122void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index fd1a02cb3c82..003dca933803 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -529,18 +529,21 @@ TRACE_EVENT(svc_xprt_do_enqueue,
529 529
530 TP_STRUCT__entry( 530 TP_STRUCT__entry(
531 __field(struct svc_xprt *, xprt) 531 __field(struct svc_xprt *, xprt)
532 __field(struct svc_rqst *, rqst) 532 __field_struct(struct sockaddr_storage, ss)
533 __field(int, pid)
534 __field(unsigned long, flags)
533 ), 535 ),
534 536
535 TP_fast_assign( 537 TP_fast_assign(
536 __entry->xprt = xprt; 538 __entry->xprt = xprt;
537 __entry->rqst = rqst; 539 xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
540 __entry->pid = rqst? rqst->rq_task->pid : 0;
541 __entry->flags = xprt ? xprt->xpt_flags : 0;
538 ), 542 ),
539 543
540 TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt, 544 TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt,
541 (struct sockaddr *)&__entry->xprt->xpt_remote, 545 (struct sockaddr *)&__entry->ss,
542 __entry->rqst ? __entry->rqst->rq_task->pid : 0, 546 __entry->pid, show_svc_xprt_flags(__entry->flags))
543 show_svc_xprt_flags(__entry->xprt->xpt_flags))
544); 547);
545 548
546TRACE_EVENT(svc_xprt_dequeue, 549TRACE_EVENT(svc_xprt_dequeue,
@@ -589,16 +592,20 @@ TRACE_EVENT(svc_handle_xprt,
589 TP_STRUCT__entry( 592 TP_STRUCT__entry(
590 __field(struct svc_xprt *, xprt) 593 __field(struct svc_xprt *, xprt)
591 __field(int, len) 594 __field(int, len)
595 __field_struct(struct sockaddr_storage, ss)
596 __field(unsigned long, flags)
592 ), 597 ),
593 598
594 TP_fast_assign( 599 TP_fast_assign(
595 __entry->xprt = xprt; 600 __entry->xprt = xprt;
601 xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
596 __entry->len = len; 602 __entry->len = len;
603 __entry->flags = xprt ? xprt->xpt_flags : 0;
597 ), 604 ),
598 605
599 TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt, 606 TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt,
600 (struct sockaddr *)&__entry->xprt->xpt_remote, __entry->len, 607 (struct sockaddr *)&__entry->ss,
601 show_svc_xprt_flags(__entry->xprt->xpt_flags)) 608 __entry->len, show_svc_xprt_flags(__entry->flags))
602); 609);
603#endif /* _TRACE_SUNRPC_H */ 610#endif /* _TRACE_SUNRPC_H */
604 611
diff --git a/include/uapi/linux/nfsacl.h b/include/uapi/linux/nfsacl.h
index 9bb9771a107f..552726631162 100644
--- a/include/uapi/linux/nfsacl.h
+++ b/include/uapi/linux/nfsacl.h
@@ -22,6 +22,7 @@
22#define NFS_ACLCNT 0x0002 22#define NFS_ACLCNT 0x0002
23#define NFS_DFACL 0x0004 23#define NFS_DFACL 0x0004
24#define NFS_DFACLCNT 0x0008 24#define NFS_DFACLCNT 0x0008
25#define NFS_ACL_MASK 0x000f
25 26
26/* Flag for Default ACL entries */ 27/* Flag for Default ACL entries */
27#define NFS_ACL_DEFAULT 0x1000 28#define NFS_ACL_DEFAULT 0x1000
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 2928afffbb81..4a2340a54401 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -44,7 +44,7 @@ static void cache_revisit_request(struct cache_head *item);
44static void cache_init(struct cache_head *h) 44static void cache_init(struct cache_head *h)
45{ 45{
46 time_t now = seconds_since_boot(); 46 time_t now = seconds_since_boot();
47 h->next = NULL; 47 INIT_HLIST_NODE(&h->cache_list);
48 h->flags = 0; 48 h->flags = 0;
49 kref_init(&h->ref); 49 kref_init(&h->ref);
50 h->expiry_time = now + CACHE_NEW_EXPIRY; 50 h->expiry_time = now + CACHE_NEW_EXPIRY;
@@ -54,15 +54,14 @@ static void cache_init(struct cache_head *h)
54struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, 54struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
55 struct cache_head *key, int hash) 55 struct cache_head *key, int hash)
56{ 56{
57 struct cache_head **head, **hp; 57 struct cache_head *new = NULL, *freeme = NULL, *tmp = NULL;
58 struct cache_head *new = NULL, *freeme = NULL; 58 struct hlist_head *head;
59 59
60 head = &detail->hash_table[hash]; 60 head = &detail->hash_table[hash];
61 61
62 read_lock(&detail->hash_lock); 62 read_lock(&detail->hash_lock);
63 63
64 for (hp=head; *hp != NULL ; hp = &(*hp)->next) { 64 hlist_for_each_entry(tmp, head, cache_list) {
65 struct cache_head *tmp = *hp;
66 if (detail->match(tmp, key)) { 65 if (detail->match(tmp, key)) {
67 if (cache_is_expired(detail, tmp)) 66 if (cache_is_expired(detail, tmp))
68 /* This entry is expired, we will discard it. */ 67 /* This entry is expired, we will discard it. */
@@ -88,12 +87,10 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
88 write_lock(&detail->hash_lock); 87 write_lock(&detail->hash_lock);
89 88
90 /* check if entry appeared while we slept */ 89 /* check if entry appeared while we slept */
91 for (hp=head; *hp != NULL ; hp = &(*hp)->next) { 90 hlist_for_each_entry(tmp, head, cache_list) {
92 struct cache_head *tmp = *hp;
93 if (detail->match(tmp, key)) { 91 if (detail->match(tmp, key)) {
94 if (cache_is_expired(detail, tmp)) { 92 if (cache_is_expired(detail, tmp)) {
95 *hp = tmp->next; 93 hlist_del_init(&tmp->cache_list);
96 tmp->next = NULL;
97 detail->entries --; 94 detail->entries --;
98 freeme = tmp; 95 freeme = tmp;
99 break; 96 break;
@@ -104,8 +101,8 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
104 return tmp; 101 return tmp;
105 } 102 }
106 } 103 }
107 new->next = *head; 104
108 *head = new; 105 hlist_add_head(&new->cache_list, head);
109 detail->entries++; 106 detail->entries++;
110 cache_get(new); 107 cache_get(new);
111 write_unlock(&detail->hash_lock); 108 write_unlock(&detail->hash_lock);
@@ -143,7 +140,6 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
143 * If 'old' is not VALID, we update it directly, 140 * If 'old' is not VALID, we update it directly,
144 * otherwise we need to replace it 141 * otherwise we need to replace it
145 */ 142 */
146 struct cache_head **head;
147 struct cache_head *tmp; 143 struct cache_head *tmp;
148 144
149 if (!test_bit(CACHE_VALID, &old->flags)) { 145 if (!test_bit(CACHE_VALID, &old->flags)) {
@@ -168,15 +164,13 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
168 } 164 }
169 cache_init(tmp); 165 cache_init(tmp);
170 detail->init(tmp, old); 166 detail->init(tmp, old);
171 head = &detail->hash_table[hash];
172 167
173 write_lock(&detail->hash_lock); 168 write_lock(&detail->hash_lock);
174 if (test_bit(CACHE_NEGATIVE, &new->flags)) 169 if (test_bit(CACHE_NEGATIVE, &new->flags))
175 set_bit(CACHE_NEGATIVE, &tmp->flags); 170 set_bit(CACHE_NEGATIVE, &tmp->flags);
176 else 171 else
177 detail->update(tmp, new); 172 detail->update(tmp, new);
178 tmp->next = *head; 173 hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]);
179 *head = tmp;
180 detail->entries++; 174 detail->entries++;
181 cache_get(tmp); 175 cache_get(tmp);
182 cache_fresh_locked(tmp, new->expiry_time); 176 cache_fresh_locked(tmp, new->expiry_time);
@@ -416,28 +410,29 @@ static int cache_clean(void)
416 /* find a non-empty bucket in the table */ 410 /* find a non-empty bucket in the table */
417 while (current_detail && 411 while (current_detail &&
418 current_index < current_detail->hash_size && 412 current_index < current_detail->hash_size &&
419 current_detail->hash_table[current_index] == NULL) 413 hlist_empty(&current_detail->hash_table[current_index]))
420 current_index++; 414 current_index++;
421 415
422 /* find a cleanable entry in the bucket and clean it, or set to next bucket */ 416 /* find a cleanable entry in the bucket and clean it, or set to next bucket */
423 417
424 if (current_detail && current_index < current_detail->hash_size) { 418 if (current_detail && current_index < current_detail->hash_size) {
425 struct cache_head *ch, **cp; 419 struct cache_head *ch = NULL;
426 struct cache_detail *d; 420 struct cache_detail *d;
421 struct hlist_head *head;
422 struct hlist_node *tmp;
427 423
428 write_lock(&current_detail->hash_lock); 424 write_lock(&current_detail->hash_lock);
429 425
430 /* Ok, now to clean this strand */ 426 /* Ok, now to clean this strand */
431 427
432 cp = & current_detail->hash_table[current_index]; 428 head = &current_detail->hash_table[current_index];
433 for (ch = *cp ; ch ; cp = & ch->next, ch = *cp) { 429 hlist_for_each_entry_safe(ch, tmp, head, cache_list) {
434 if (current_detail->nextcheck > ch->expiry_time) 430 if (current_detail->nextcheck > ch->expiry_time)
435 current_detail->nextcheck = ch->expiry_time+1; 431 current_detail->nextcheck = ch->expiry_time+1;
436 if (!cache_is_expired(current_detail, ch)) 432 if (!cache_is_expired(current_detail, ch))
437 continue; 433 continue;
438 434
439 *cp = ch->next; 435 hlist_del_init(&ch->cache_list);
440 ch->next = NULL;
441 current_detail->entries--; 436 current_detail->entries--;
442 rv = 1; 437 rv = 1;
443 break; 438 break;
@@ -1270,18 +1265,13 @@ EXPORT_SYMBOL_GPL(qword_get);
1270 * get a header, then pass each real item in the cache 1265 * get a header, then pass each real item in the cache
1271 */ 1266 */
1272 1267
1273struct handle { 1268void *cache_seq_start(struct seq_file *m, loff_t *pos)
1274 struct cache_detail *cd;
1275};
1276
1277static void *c_start(struct seq_file *m, loff_t *pos)
1278 __acquires(cd->hash_lock) 1269 __acquires(cd->hash_lock)
1279{ 1270{
1280 loff_t n = *pos; 1271 loff_t n = *pos;
1281 unsigned int hash, entry; 1272 unsigned int hash, entry;
1282 struct cache_head *ch; 1273 struct cache_head *ch;
1283 struct cache_detail *cd = ((struct handle*)m->private)->cd; 1274 struct cache_detail *cd = m->private;
1284
1285 1275
1286 read_lock(&cd->hash_lock); 1276 read_lock(&cd->hash_lock);
1287 if (!n--) 1277 if (!n--)
@@ -1289,7 +1279,7 @@ static void *c_start(struct seq_file *m, loff_t *pos)
1289 hash = n >> 32; 1279 hash = n >> 32;
1290 entry = n & ((1LL<<32) - 1); 1280 entry = n & ((1LL<<32) - 1);
1291 1281
1292 for (ch=cd->hash_table[hash]; ch; ch=ch->next) 1282 hlist_for_each_entry(ch, &cd->hash_table[hash], cache_list)
1293 if (!entry--) 1283 if (!entry--)
1294 return ch; 1284 return ch;
1295 n &= ~((1LL<<32) - 1); 1285 n &= ~((1LL<<32) - 1);
@@ -1297,51 +1287,57 @@ static void *c_start(struct seq_file *m, loff_t *pos)
1297 hash++; 1287 hash++;
1298 n += 1LL<<32; 1288 n += 1LL<<32;
1299 } while(hash < cd->hash_size && 1289 } while(hash < cd->hash_size &&
1300 cd->hash_table[hash]==NULL); 1290 hlist_empty(&cd->hash_table[hash]));
1301 if (hash >= cd->hash_size) 1291 if (hash >= cd->hash_size)
1302 return NULL; 1292 return NULL;
1303 *pos = n+1; 1293 *pos = n+1;
1304 return cd->hash_table[hash]; 1294 return hlist_entry_safe(cd->hash_table[hash].first,
1295 struct cache_head, cache_list);
1305} 1296}
1297EXPORT_SYMBOL_GPL(cache_seq_start);
1306 1298
1307static void *c_next(struct seq_file *m, void *p, loff_t *pos) 1299void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
1308{ 1300{
1309 struct cache_head *ch = p; 1301 struct cache_head *ch = p;
1310 int hash = (*pos >> 32); 1302 int hash = (*pos >> 32);
1311 struct cache_detail *cd = ((struct handle*)m->private)->cd; 1303 struct cache_detail *cd = m->private;
1312 1304
1313 if (p == SEQ_START_TOKEN) 1305 if (p == SEQ_START_TOKEN)
1314 hash = 0; 1306 hash = 0;
1315 else if (ch->next == NULL) { 1307 else if (ch->cache_list.next == NULL) {
1316 hash++; 1308 hash++;
1317 *pos += 1LL<<32; 1309 *pos += 1LL<<32;
1318 } else { 1310 } else {
1319 ++*pos; 1311 ++*pos;
1320 return ch->next; 1312 return hlist_entry_safe(ch->cache_list.next,
1313 struct cache_head, cache_list);
1321 } 1314 }
1322 *pos &= ~((1LL<<32) - 1); 1315 *pos &= ~((1LL<<32) - 1);
1323 while (hash < cd->hash_size && 1316 while (hash < cd->hash_size &&
1324 cd->hash_table[hash] == NULL) { 1317 hlist_empty(&cd->hash_table[hash])) {
1325 hash++; 1318 hash++;
1326 *pos += 1LL<<32; 1319 *pos += 1LL<<32;
1327 } 1320 }
1328 if (hash >= cd->hash_size) 1321 if (hash >= cd->hash_size)
1329 return NULL; 1322 return NULL;
1330 ++*pos; 1323 ++*pos;
1331 return cd->hash_table[hash]; 1324 return hlist_entry_safe(cd->hash_table[hash].first,
1325 struct cache_head, cache_list);
1332} 1326}
1327EXPORT_SYMBOL_GPL(cache_seq_next);
1333 1328
1334static void c_stop(struct seq_file *m, void *p) 1329void cache_seq_stop(struct seq_file *m, void *p)
1335 __releases(cd->hash_lock) 1330 __releases(cd->hash_lock)
1336{ 1331{
1337 struct cache_detail *cd = ((struct handle*)m->private)->cd; 1332 struct cache_detail *cd = m->private;
1338 read_unlock(&cd->hash_lock); 1333 read_unlock(&cd->hash_lock);
1339} 1334}
1335EXPORT_SYMBOL_GPL(cache_seq_stop);
1340 1336
1341static int c_show(struct seq_file *m, void *p) 1337static int c_show(struct seq_file *m, void *p)
1342{ 1338{
1343 struct cache_head *cp = p; 1339 struct cache_head *cp = p;
1344 struct cache_detail *cd = ((struct handle*)m->private)->cd; 1340 struct cache_detail *cd = m->private;
1345 1341
1346 if (p == SEQ_START_TOKEN) 1342 if (p == SEQ_START_TOKEN)
1347 return cd->cache_show(m, cd, NULL); 1343 return cd->cache_show(m, cd, NULL);
@@ -1364,33 +1360,36 @@ static int c_show(struct seq_file *m, void *p)
1364} 1360}
1365 1361
1366static const struct seq_operations cache_content_op = { 1362static const struct seq_operations cache_content_op = {
1367 .start = c_start, 1363 .start = cache_seq_start,
1368 .next = c_next, 1364 .next = cache_seq_next,
1369 .stop = c_stop, 1365 .stop = cache_seq_stop,
1370 .show = c_show, 1366 .show = c_show,
1371}; 1367};
1372 1368
1373static int content_open(struct inode *inode, struct file *file, 1369static int content_open(struct inode *inode, struct file *file,
1374 struct cache_detail *cd) 1370 struct cache_detail *cd)
1375{ 1371{
1376 struct handle *han; 1372 struct seq_file *seq;
1373 int err;
1377 1374
1378 if (!cd || !try_module_get(cd->owner)) 1375 if (!cd || !try_module_get(cd->owner))
1379 return -EACCES; 1376 return -EACCES;
1380 han = __seq_open_private(file, &cache_content_op, sizeof(*han)); 1377
1381 if (han == NULL) { 1378 err = seq_open(file, &cache_content_op);
1379 if (err) {
1382 module_put(cd->owner); 1380 module_put(cd->owner);
1383 return -ENOMEM; 1381 return err;
1384 } 1382 }
1385 1383
1386 han->cd = cd; 1384 seq = file->private_data;
1385 seq->private = cd;
1387 return 0; 1386 return 0;
1388} 1387}
1389 1388
1390static int content_release(struct inode *inode, struct file *file, 1389static int content_release(struct inode *inode, struct file *file,
1391 struct cache_detail *cd) 1390 struct cache_detail *cd)
1392{ 1391{
1393 int ret = seq_release_private(inode, file); 1392 int ret = seq_release(inode, file);
1394 module_put(cd->owner); 1393 module_put(cd->owner);
1395 return ret; 1394 return ret;
1396} 1395}
@@ -1665,17 +1664,21 @@ EXPORT_SYMBOL_GPL(cache_unregister_net);
1665struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net) 1664struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net)
1666{ 1665{
1667 struct cache_detail *cd; 1666 struct cache_detail *cd;
1667 int i;
1668 1668
1669 cd = kmemdup(tmpl, sizeof(struct cache_detail), GFP_KERNEL); 1669 cd = kmemdup(tmpl, sizeof(struct cache_detail), GFP_KERNEL);
1670 if (cd == NULL) 1670 if (cd == NULL)
1671 return ERR_PTR(-ENOMEM); 1671 return ERR_PTR(-ENOMEM);
1672 1672
1673 cd->hash_table = kzalloc(cd->hash_size * sizeof(struct cache_head *), 1673 cd->hash_table = kzalloc(cd->hash_size * sizeof(struct hlist_head),
1674 GFP_KERNEL); 1674 GFP_KERNEL);
1675 if (cd->hash_table == NULL) { 1675 if (cd->hash_table == NULL) {
1676 kfree(cd); 1676 kfree(cd);
1677 return ERR_PTR(-ENOMEM); 1677 return ERR_PTR(-ENOMEM);
1678 } 1678 }
1679
1680 for (i = 0; i < cd->hash_size; i++)
1681 INIT_HLIST_HEAD(&cd->hash_table[i]);
1679 cd->net = net; 1682 cd->net = net;
1680 return cd; 1683 return cd;
1681} 1684}
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 5a16d8d8c831..a8f579df14d8 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -34,36 +34,19 @@
34 34
35static void svc_unregister(const struct svc_serv *serv, struct net *net); 35static void svc_unregister(const struct svc_serv *serv, struct net *net);
36 36
37#define svc_serv_is_pooled(serv) ((serv)->sv_function) 37#define svc_serv_is_pooled(serv) ((serv)->sv_ops->svo_function)
38 38
39/*
40 * Mode for mapping cpus to pools.
41 */
42enum {
43 SVC_POOL_AUTO = -1, /* choose one of the others */
44 SVC_POOL_GLOBAL, /* no mapping, just a single global pool
45 * (legacy & UP mode) */
46 SVC_POOL_PERCPU, /* one pool per cpu */
47 SVC_POOL_PERNODE /* one pool per numa node */
48};
49#define SVC_POOL_DEFAULT SVC_POOL_GLOBAL 39#define SVC_POOL_DEFAULT SVC_POOL_GLOBAL
50 40
51/* 41/*
52 * Structure for mapping cpus to pools and vice versa. 42 * Structure for mapping cpus to pools and vice versa.
53 * Setup once during sunrpc initialisation. 43 * Setup once during sunrpc initialisation.
54 */ 44 */
55static struct svc_pool_map { 45struct svc_pool_map svc_pool_map = {
56 int count; /* How many svc_servs use us */
57 int mode; /* Note: int not enum to avoid
58 * warnings about "enumeration value
59 * not handled in switch" */
60 unsigned int npools;
61 unsigned int *pool_to; /* maps pool id to cpu or node */
62 unsigned int *to_pool; /* maps cpu or node to pool id */
63} svc_pool_map = {
64 .count = 0,
65 .mode = SVC_POOL_DEFAULT 46 .mode = SVC_POOL_DEFAULT
66}; 47};
48EXPORT_SYMBOL_GPL(svc_pool_map);
49
67static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */ 50static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
68 51
69static int 52static int
@@ -236,7 +219,7 @@ svc_pool_map_init_pernode(struct svc_pool_map *m)
236 * vice versa). Initialise the map if we're the first user. 219 * vice versa). Initialise the map if we're the first user.
237 * Returns the number of pools. 220 * Returns the number of pools.
238 */ 221 */
239static unsigned int 222unsigned int
240svc_pool_map_get(void) 223svc_pool_map_get(void)
241{ 224{
242 struct svc_pool_map *m = &svc_pool_map; 225 struct svc_pool_map *m = &svc_pool_map;
@@ -271,7 +254,7 @@ svc_pool_map_get(void)
271 mutex_unlock(&svc_pool_map_mutex); 254 mutex_unlock(&svc_pool_map_mutex);
272 return m->npools; 255 return m->npools;
273} 256}
274 257EXPORT_SYMBOL_GPL(svc_pool_map_get);
275 258
276/* 259/*
277 * Drop a reference to the global map of cpus to pools. 260 * Drop a reference to the global map of cpus to pools.
@@ -280,7 +263,7 @@ svc_pool_map_get(void)
280 * mode using the pool_mode module option without 263 * mode using the pool_mode module option without
281 * rebooting or re-loading sunrpc.ko. 264 * rebooting or re-loading sunrpc.ko.
282 */ 265 */
283static void 266void
284svc_pool_map_put(void) 267svc_pool_map_put(void)
285{ 268{
286 struct svc_pool_map *m = &svc_pool_map; 269 struct svc_pool_map *m = &svc_pool_map;
@@ -297,7 +280,7 @@ svc_pool_map_put(void)
297 280
298 mutex_unlock(&svc_pool_map_mutex); 281 mutex_unlock(&svc_pool_map_mutex);
299} 282}
300 283EXPORT_SYMBOL_GPL(svc_pool_map_put);
301 284
302static int svc_pool_map_get_node(unsigned int pidx) 285static int svc_pool_map_get_node(unsigned int pidx)
303{ 286{
@@ -423,7 +406,7 @@ EXPORT_SYMBOL_GPL(svc_bind);
423 */ 406 */
424static struct svc_serv * 407static struct svc_serv *
425__svc_create(struct svc_program *prog, unsigned int bufsize, int npools, 408__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
426 void (*shutdown)(struct svc_serv *serv, struct net *net)) 409 struct svc_serv_ops *ops)
427{ 410{
428 struct svc_serv *serv; 411 struct svc_serv *serv;
429 unsigned int vers; 412 unsigned int vers;
@@ -440,7 +423,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
440 bufsize = RPCSVC_MAXPAYLOAD; 423 bufsize = RPCSVC_MAXPAYLOAD;
441 serv->sv_max_payload = bufsize? bufsize : 4096; 424 serv->sv_max_payload = bufsize? bufsize : 4096;
442 serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE); 425 serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE);
443 serv->sv_shutdown = shutdown; 426 serv->sv_ops = ops;
444 xdrsize = 0; 427 xdrsize = 0;
445 while (prog) { 428 while (prog) {
446 prog->pg_lovers = prog->pg_nvers-1; 429 prog->pg_lovers = prog->pg_nvers-1;
@@ -486,26 +469,22 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
486 469
487struct svc_serv * 470struct svc_serv *
488svc_create(struct svc_program *prog, unsigned int bufsize, 471svc_create(struct svc_program *prog, unsigned int bufsize,
489 void (*shutdown)(struct svc_serv *serv, struct net *net)) 472 struct svc_serv_ops *ops)
490{ 473{
491 return __svc_create(prog, bufsize, /*npools*/1, shutdown); 474 return __svc_create(prog, bufsize, /*npools*/1, ops);
492} 475}
493EXPORT_SYMBOL_GPL(svc_create); 476EXPORT_SYMBOL_GPL(svc_create);
494 477
495struct svc_serv * 478struct svc_serv *
496svc_create_pooled(struct svc_program *prog, unsigned int bufsize, 479svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
497 void (*shutdown)(struct svc_serv *serv, struct net *net), 480 struct svc_serv_ops *ops)
498 svc_thread_fn func, struct module *mod)
499{ 481{
500 struct svc_serv *serv; 482 struct svc_serv *serv;
501 unsigned int npools = svc_pool_map_get(); 483 unsigned int npools = svc_pool_map_get();
502 484
503 serv = __svc_create(prog, bufsize, npools, shutdown); 485 serv = __svc_create(prog, bufsize, npools, ops);
504 if (!serv) 486 if (!serv)
505 goto out_err; 487 goto out_err;
506
507 serv->sv_function = func;
508 serv->sv_module = mod;
509 return serv; 488 return serv;
510out_err: 489out_err:
511 svc_pool_map_put(); 490 svc_pool_map_put();
@@ -517,8 +496,8 @@ void svc_shutdown_net(struct svc_serv *serv, struct net *net)
517{ 496{
518 svc_close_net(serv, net); 497 svc_close_net(serv, net);
519 498
520 if (serv->sv_shutdown) 499 if (serv->sv_ops->svo_shutdown)
521 serv->sv_shutdown(serv, net); 500 serv->sv_ops->svo_shutdown(serv, net);
522} 501}
523EXPORT_SYMBOL_GPL(svc_shutdown_net); 502EXPORT_SYMBOL_GPL(svc_shutdown_net);
524 503
@@ -604,40 +583,52 @@ svc_release_buffer(struct svc_rqst *rqstp)
604} 583}
605 584
606struct svc_rqst * 585struct svc_rqst *
607svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) 586svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node)
608{ 587{
609 struct svc_rqst *rqstp; 588 struct svc_rqst *rqstp;
610 589
611 rqstp = kzalloc_node(sizeof(*rqstp), GFP_KERNEL, node); 590 rqstp = kzalloc_node(sizeof(*rqstp), GFP_KERNEL, node);
612 if (!rqstp) 591 if (!rqstp)
613 goto out_enomem; 592 return rqstp;
614 593
615 serv->sv_nrthreads++;
616 __set_bit(RQ_BUSY, &rqstp->rq_flags); 594 __set_bit(RQ_BUSY, &rqstp->rq_flags);
617 spin_lock_init(&rqstp->rq_lock); 595 spin_lock_init(&rqstp->rq_lock);
618 rqstp->rq_server = serv; 596 rqstp->rq_server = serv;
619 rqstp->rq_pool = pool; 597 rqstp->rq_pool = pool;
620 spin_lock_bh(&pool->sp_lock);
621 pool->sp_nrthreads++;
622 list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads);
623 spin_unlock_bh(&pool->sp_lock);
624 598
625 rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); 599 rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node);
626 if (!rqstp->rq_argp) 600 if (!rqstp->rq_argp)
627 goto out_thread; 601 goto out_enomem;
628 602
629 rqstp->rq_resp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); 603 rqstp->rq_resp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node);
630 if (!rqstp->rq_resp) 604 if (!rqstp->rq_resp)
631 goto out_thread; 605 goto out_enomem;
632 606
633 if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node)) 607 if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node))
634 goto out_thread; 608 goto out_enomem;
635 609
636 return rqstp; 610 return rqstp;
637out_thread:
638 svc_exit_thread(rqstp);
639out_enomem: 611out_enomem:
640 return ERR_PTR(-ENOMEM); 612 svc_rqst_free(rqstp);
613 return NULL;
614}
615EXPORT_SYMBOL_GPL(svc_rqst_alloc);
616
617struct svc_rqst *
618svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
619{
620 struct svc_rqst *rqstp;
621
622 rqstp = svc_rqst_alloc(serv, pool, node);
623 if (!rqstp)
624 return ERR_PTR(-ENOMEM);
625
626 serv->sv_nrthreads++;
627 spin_lock_bh(&pool->sp_lock);
628 pool->sp_nrthreads++;
629 list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads);
630 spin_unlock_bh(&pool->sp_lock);
631 return rqstp;
641} 632}
642EXPORT_SYMBOL_GPL(svc_prepare_thread); 633EXPORT_SYMBOL_GPL(svc_prepare_thread);
643 634
@@ -739,12 +730,12 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
739 break; 730 break;
740 } 731 }
741 732
742 __module_get(serv->sv_module); 733 __module_get(serv->sv_ops->svo_module);
743 task = kthread_create_on_node(serv->sv_function, rqstp, 734 task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp,
744 node, "%s", serv->sv_name); 735 node, "%s", serv->sv_name);
745 if (IS_ERR(task)) { 736 if (IS_ERR(task)) {
746 error = PTR_ERR(task); 737 error = PTR_ERR(task);
747 module_put(serv->sv_module); 738 module_put(serv->sv_ops->svo_module);
748 svc_exit_thread(rqstp); 739 svc_exit_thread(rqstp);
749 break; 740 break;
750 } 741 }
@@ -772,15 +763,21 @@ EXPORT_SYMBOL_GPL(svc_set_num_threads);
772 * mutex" for the service. 763 * mutex" for the service.
773 */ 764 */
774void 765void
775svc_exit_thread(struct svc_rqst *rqstp) 766svc_rqst_free(struct svc_rqst *rqstp)
776{ 767{
777 struct svc_serv *serv = rqstp->rq_server;
778 struct svc_pool *pool = rqstp->rq_pool;
779
780 svc_release_buffer(rqstp); 768 svc_release_buffer(rqstp);
781 kfree(rqstp->rq_resp); 769 kfree(rqstp->rq_resp);
782 kfree(rqstp->rq_argp); 770 kfree(rqstp->rq_argp);
783 kfree(rqstp->rq_auth_data); 771 kfree(rqstp->rq_auth_data);
772 kfree_rcu(rqstp, rq_rcu_head);
773}
774EXPORT_SYMBOL_GPL(svc_rqst_free);
775
776void
777svc_exit_thread(struct svc_rqst *rqstp)
778{
779 struct svc_serv *serv = rqstp->rq_server;
780 struct svc_pool *pool = rqstp->rq_pool;
784 781
785 spin_lock_bh(&pool->sp_lock); 782 spin_lock_bh(&pool->sp_lock);
786 pool->sp_nrthreads--; 783 pool->sp_nrthreads--;
@@ -788,7 +785,7 @@ svc_exit_thread(struct svc_rqst *rqstp)
788 list_del_rcu(&rqstp->rq_all); 785 list_del_rcu(&rqstp->rq_all);
789 spin_unlock_bh(&pool->sp_lock); 786 spin_unlock_bh(&pool->sp_lock);
790 787
791 kfree_rcu(rqstp, rq_rcu_head); 788 svc_rqst_free(rqstp);
792 789
793 /* Release the server */ 790 /* Release the server */
794 if (serv) 791 if (serv)
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 163ac45c3639..a6cbb2104667 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -24,7 +24,6 @@ static int svc_deferred_recv(struct svc_rqst *rqstp);
24static struct cache_deferred_req *svc_defer(struct cache_req *req); 24static struct cache_deferred_req *svc_defer(struct cache_req *req);
25static void svc_age_temp_xprts(unsigned long closure); 25static void svc_age_temp_xprts(unsigned long closure);
26static void svc_delete_xprt(struct svc_xprt *xprt); 26static void svc_delete_xprt(struct svc_xprt *xprt);
27static void svc_xprt_do_enqueue(struct svc_xprt *xprt);
28 27
29/* apparently the "standard" is that clients close 28/* apparently the "standard" is that clients close
30 * idle connections after 5 minutes, servers after 29 * idle connections after 5 minutes, servers after
@@ -225,12 +224,12 @@ static void svc_xprt_received(struct svc_xprt *xprt)
225 } 224 }
226 225
227 /* As soon as we clear busy, the xprt could be closed and 226 /* As soon as we clear busy, the xprt could be closed and
228 * 'put', so we need a reference to call svc_xprt_do_enqueue with: 227 * 'put', so we need a reference to call svc_enqueue_xprt with:
229 */ 228 */
230 svc_xprt_get(xprt); 229 svc_xprt_get(xprt);
231 smp_mb__before_atomic(); 230 smp_mb__before_atomic();
232 clear_bit(XPT_BUSY, &xprt->xpt_flags); 231 clear_bit(XPT_BUSY, &xprt->xpt_flags);
233 svc_xprt_do_enqueue(xprt); 232 xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt);
234 svc_xprt_put(xprt); 233 svc_xprt_put(xprt);
235} 234}
236 235
@@ -320,7 +319,7 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
320 return false; 319 return false;
321} 320}
322 321
323static void svc_xprt_do_enqueue(struct svc_xprt *xprt) 322void svc_xprt_do_enqueue(struct svc_xprt *xprt)
324{ 323{
325 struct svc_pool *pool; 324 struct svc_pool *pool;
326 struct svc_rqst *rqstp = NULL; 325 struct svc_rqst *rqstp = NULL;
@@ -402,6 +401,7 @@ redo_search:
402out: 401out:
403 trace_svc_xprt_do_enqueue(xprt, rqstp); 402 trace_svc_xprt_do_enqueue(xprt, rqstp);
404} 403}
404EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue);
405 405
406/* 406/*
407 * Queue up a transport with data pending. If there are idle nfsd 407 * Queue up a transport with data pending. If there are idle nfsd
@@ -412,7 +412,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
412{ 412{
413 if (test_bit(XPT_BUSY, &xprt->xpt_flags)) 413 if (test_bit(XPT_BUSY, &xprt->xpt_flags))
414 return; 414 return;
415 svc_xprt_do_enqueue(xprt); 415 xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt);
416} 416}
417EXPORT_SYMBOL_GPL(svc_xprt_enqueue); 417EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
418 418
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index d25cd430f9ff..1dfae8317065 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -136,6 +136,79 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
136 return dma_addr; 136 return dma_addr;
137} 137}
138 138
139/* Returns the address of the first read chunk or <nul> if no read chunk
140 * is present
141 */
142struct rpcrdma_read_chunk *
143svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp)
144{
145 struct rpcrdma_read_chunk *ch =
146 (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
147
148 if (ch->rc_discrim == xdr_zero)
149 return NULL;
150 return ch;
151}
152
153/* Returns the address of the first read write array element or <nul>
154 * if no write array list is present
155 */
156static struct rpcrdma_write_array *
157svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp)
158{
159 if (rmsgp->rm_body.rm_chunks[0] != xdr_zero ||
160 rmsgp->rm_body.rm_chunks[1] == xdr_zero)
161 return NULL;
162 return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1];
163}
164
165/* Returns the address of the first reply array element or <nul> if no
166 * reply array is present
167 */
168static struct rpcrdma_write_array *
169svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp)
170{
171 struct rpcrdma_read_chunk *rch;
172 struct rpcrdma_write_array *wr_ary;
173 struct rpcrdma_write_array *rp_ary;
174
175 /* XXX: Need to fix when reply chunk may occur with read list
176 * and/or write list.
177 */
178 if (rmsgp->rm_body.rm_chunks[0] != xdr_zero ||
179 rmsgp->rm_body.rm_chunks[1] != xdr_zero)
180 return NULL;
181
182 rch = svc_rdma_get_read_chunk(rmsgp);
183 if (rch) {
184 while (rch->rc_discrim != xdr_zero)
185 rch++;
186
187 /* The reply chunk follows an empty write array located
188 * at 'rc_position' here. The reply array is at rc_target.
189 */
190 rp_ary = (struct rpcrdma_write_array *)&rch->rc_target;
191 goto found_it;
192 }
193
194 wr_ary = svc_rdma_get_write_array(rmsgp);
195 if (wr_ary) {
196 int chunk = be32_to_cpu(wr_ary->wc_nchunks);
197
198 rp_ary = (struct rpcrdma_write_array *)
199 &wr_ary->wc_array[chunk].wc_target.rs_length;
200 goto found_it;
201 }
202
203 /* No read list, no write list */
204 rp_ary = (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[2];
205
206 found_it:
207 if (rp_ary->wc_discrim == xdr_zero)
208 return NULL;
209 return rp_ary;
210}
211
139/* Assumptions: 212/* Assumptions:
140 * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE 213 * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
141 */ 214 */
@@ -384,6 +457,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
384 int byte_count) 457 int byte_count)
385{ 458{
386 struct ib_send_wr send_wr; 459 struct ib_send_wr send_wr;
460 u32 xdr_off;
387 int sge_no; 461 int sge_no;
388 int sge_bytes; 462 int sge_bytes;
389 int page_no; 463 int page_no;
@@ -418,8 +492,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
418 ctxt->direction = DMA_TO_DEVICE; 492 ctxt->direction = DMA_TO_DEVICE;
419 493
420 /* Map the payload indicated by 'byte_count' */ 494 /* Map the payload indicated by 'byte_count' */
495 xdr_off = 0;
421 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { 496 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
422 int xdr_off = 0;
423 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); 497 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
424 byte_count -= sge_bytes; 498 byte_count -= sge_bytes;
425 ctxt->sge[sge_no].addr = 499 ctxt->sge[sge_no].addr =
@@ -457,6 +531,13 @@ static int send_reply(struct svcxprt_rdma *rdma,
457 } 531 }
458 rqstp->rq_next_page = rqstp->rq_respages + 1; 532 rqstp->rq_next_page = rqstp->rq_respages + 1;
459 533
534 /* The loop above bumps sc_dma_used for each sge. The
535 * xdr_buf.tail gets a separate sge, but resides in the
536 * same page as xdr_buf.head. Don't count it twice.
537 */
538 if (sge_no > ctxt->count)
539 atomic_dec(&rdma->sc_dma_used);
540
460 if (sge_no > rdma->sc_max_sge) { 541 if (sge_no > rdma->sc_max_sge) {
461 pr_err("svcrdma: Too many sges (%d)\n", sge_no); 542 pr_err("svcrdma: Too many sges (%d)\n", sge_no);
462 goto err; 543 goto err;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 6b36279e4288..21e40365042c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -91,7 +91,7 @@ struct svc_xprt_class svc_rdma_class = {
91 .xcl_name = "rdma", 91 .xcl_name = "rdma",
92 .xcl_owner = THIS_MODULE, 92 .xcl_owner = THIS_MODULE,
93 .xcl_ops = &svc_rdma_ops, 93 .xcl_ops = &svc_rdma_ops,
94 .xcl_max_payload = RPCRDMA_MAXPAYLOAD, 94 .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA,
95 .xcl_ident = XPRT_TRANSPORT_RDMA, 95 .xcl_ident = XPRT_TRANSPORT_RDMA,
96}; 96};
97 97
@@ -659,6 +659,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
659 if (xprt) { 659 if (xprt) {
660 set_bit(XPT_CLOSE, &xprt->xpt_flags); 660 set_bit(XPT_CLOSE, &xprt->xpt_flags);
661 svc_xprt_enqueue(xprt); 661 svc_xprt_enqueue(xprt);
662 svc_xprt_put(xprt);
662 } 663 }
663 break; 664 break;
664 default: 665 default:
@@ -1201,40 +1202,6 @@ static int svc_rdma_secure_port(struct svc_rqst *rqstp)
1201 return 1; 1202 return 1;
1202} 1203}
1203 1204
1204/*
1205 * Attempt to register the kvec representing the RPC memory with the
1206 * device.
1207 *
1208 * Returns:
1209 * NULL : The device does not support fastreg or there were no more
1210 * fastreg mr.
1211 * frmr : The kvec register request was successfully posted.
1212 * <0 : An error was encountered attempting to register the kvec.
1213 */
1214int svc_rdma_fastreg(struct svcxprt_rdma *xprt,
1215 struct svc_rdma_fastreg_mr *frmr)
1216{
1217 struct ib_send_wr fastreg_wr;
1218 u8 key;
1219
1220 /* Bump the key */
1221 key = (u8)(frmr->mr->lkey & 0x000000FF);
1222 ib_update_fast_reg_key(frmr->mr, ++key);
1223
1224 /* Prepare FASTREG WR */
1225 memset(&fastreg_wr, 0, sizeof fastreg_wr);
1226 fastreg_wr.opcode = IB_WR_FAST_REG_MR;
1227 fastreg_wr.send_flags = IB_SEND_SIGNALED;
1228 fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
1229 fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
1230 fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
1231 fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1232 fastreg_wr.wr.fast_reg.length = frmr->map_len;
1233 fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
1234 fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
1235 return svc_rdma_send(xprt, &fastreg_wr);
1236}
1237
1238int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) 1205int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1239{ 1206{
1240 struct ib_send_wr *bad_wr, *n_wr; 1207 struct ib_send_wr *bad_wr, *n_wr;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index f49dd8b38122..e718d0959af3 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -51,7 +51,6 @@
51#include <linux/sunrpc/clnt.h> /* rpc_xprt */ 51#include <linux/sunrpc/clnt.h> /* rpc_xprt */
52#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ 52#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
53#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ 53#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
54#include <linux/sunrpc/svc.h> /* RPCSVC_MAXPAYLOAD */
55 54
56#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ 55#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
57#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ 56#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */