aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/lockd/clntlock.c2
-rw-r--r--fs/lockd/clntproc.c4
-rw-r--r--fs/lockd/host.c10
-rw-r--r--fs/lockd/mon.c1
-rw-r--r--fs/nfs/client.c16
-rw-r--r--fs/nfs/delegation.c12
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c7
-rw-r--r--fs/nfs/direct.c11
-rw-r--r--fs/nfs/file.c31
-rw-r--r--fs/nfs/filelayout/filelayout.c6
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c14
-rw-r--r--fs/nfs/inode.c13
-rw-r--r--fs/nfs/internal.h8
-rw-r--r--fs/nfs/mount_clnt.c2
-rw-r--r--fs/nfs/nfs2xdr.c58
-rw-r--r--fs/nfs/nfs3client.c1
-rw-r--r--fs/nfs/nfs3xdr.c142
-rw-r--r--fs/nfs/nfs4_fs.h1
-rw-r--r--fs/nfs/nfs4client.c6
-rw-r--r--fs/nfs/nfs4file.c4
-rw-r--r--fs/nfs/nfs4idmap.c27
-rw-r--r--fs/nfs/nfs4proc.c159
-rw-r--r--fs/nfs/nfs4state.c7
-rw-r--r--fs/nfs/pagelist.c123
-rw-r--r--fs/nfs/pnfs.c4
-rw-r--r--fs/nfs/pnfs.h4
-rw-r--r--fs/nfs/read.c6
-rw-r--r--fs/nfs/super.c32
-rw-r--r--fs/nfs/symlink.c7
-rw-r--r--fs/nfs/write.c70
-rw-r--r--fs/nfsd/nfs4callback.c5
-rw-r--r--include/linux/lockd/bind.h1
-rw-r--r--include/linux/lockd/lockd.h4
-rw-r--r--include/linux/nfs_fs.h1
-rw-r--r--include/linux/nfs_fs_sb.h13
-rw-r--r--include/linux/nfs_page.h12
-rw-r--r--include/linux/sunrpc/clnt.h4
-rw-r--r--include/linux/sunrpc/sched.h20
-rw-r--r--include/linux/sunrpc/xprt.h6
-rw-r--r--include/trace/events/rpcrdma.h27
-rw-r--r--include/trace/events/sunrpc.h8
-rw-r--r--include/uapi/linux/nfs_mount.h9
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c71
-rw-r--r--net/sunrpc/auth_unix.c9
-rw-r--r--net/sunrpc/clnt.c132
-rw-r--r--net/sunrpc/debugfs.c2
-rw-r--r--net/sunrpc/rpcb_clnt.c12
-rw-r--r--net/sunrpc/sched.c158
-rw-r--r--net/sunrpc/socklib.c2
-rw-r--r--net/sunrpc/xprt.c154
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c120
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c63
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c115
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c2
-rw-r--r--net/sunrpc/xprtrdma/transport.c105
-rw-r--r--net/sunrpc/xprtrdma/verbs.c338
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h121
-rw-r--r--net/sunrpc/xprtsock.c9
59 files changed, 1366 insertions, 946 deletions
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index c2a128678e6e..70f520b41a19 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -63,7 +63,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
63 host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen, 63 host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen,
64 nlm_init->protocol, nlm_version, 64 nlm_init->protocol, nlm_version,
65 nlm_init->hostname, nlm_init->noresvport, 65 nlm_init->hostname, nlm_init->noresvport,
66 nlm_init->net); 66 nlm_init->net, nlm_init->cred);
67 if (host == NULL) 67 if (host == NULL)
68 goto out_nohost; 68 goto out_nohost;
69 if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL) 69 if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL)
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index e8a004097d18..d9c32d1a20c0 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -715,7 +715,7 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
715 struct nlm_rqst *req = data; 715 struct nlm_rqst *req = data;
716 u32 status = ntohl(req->a_res.status); 716 u32 status = ntohl(req->a_res.status);
717 717
718 if (RPC_ASSASSINATED(task)) 718 if (RPC_SIGNALLED(task))
719 goto die; 719 goto die;
720 720
721 if (task->tk_status < 0) { 721 if (task->tk_status < 0) {
@@ -783,7 +783,7 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
783 struct nlm_rqst *req = data; 783 struct nlm_rqst *req = data;
784 u32 status = ntohl(req->a_res.status); 784 u32 status = ntohl(req->a_res.status);
785 785
786 if (RPC_ASSASSINATED(task)) 786 if (RPC_SIGNALLED(task))
787 goto die; 787 goto die;
788 788
789 if (task->tk_status < 0) { 789 if (task->tk_status < 0) {
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index f0b5c987d6ae..7d46fafdbbe5 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -60,6 +60,7 @@ struct nlm_lookup_host_info {
60 const size_t hostname_len; /* it's length */ 60 const size_t hostname_len; /* it's length */
61 const int noresvport; /* use non-priv port */ 61 const int noresvport; /* use non-priv port */
62 struct net *net; /* network namespace to bind */ 62 struct net *net; /* network namespace to bind */
63 const struct cred *cred;
63}; 64};
64 65
65/* 66/*
@@ -162,6 +163,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
162 host->h_nsmhandle = nsm; 163 host->h_nsmhandle = nsm;
163 host->h_addrbuf = nsm->sm_addrbuf; 164 host->h_addrbuf = nsm->sm_addrbuf;
164 host->net = ni->net; 165 host->net = ni->net;
166 host->h_cred = get_cred(ni->cred),
165 strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename)); 167 strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename));
166 168
167out: 169out:
@@ -188,6 +190,7 @@ static void nlm_destroy_host_locked(struct nlm_host *host)
188 clnt = host->h_rpcclnt; 190 clnt = host->h_rpcclnt;
189 if (clnt != NULL) 191 if (clnt != NULL)
190 rpc_shutdown_client(clnt); 192 rpc_shutdown_client(clnt);
193 put_cred(host->h_cred);
191 kfree(host); 194 kfree(host);
192 195
193 ln->nrhosts--; 196 ln->nrhosts--;
@@ -202,6 +205,8 @@ static void nlm_destroy_host_locked(struct nlm_host *host)
202 * @version: NLM protocol version 205 * @version: NLM protocol version
203 * @hostname: '\0'-terminated hostname of server 206 * @hostname: '\0'-terminated hostname of server
204 * @noresvport: 1 if non-privileged port should be used 207 * @noresvport: 1 if non-privileged port should be used
208 * @net: pointer to net namespace
209 * @cred: pointer to cred
205 * 210 *
206 * Returns an nlm_host structure that matches the passed-in 211 * Returns an nlm_host structure that matches the passed-in
207 * [server address, transport protocol, NLM version, server hostname]. 212 * [server address, transport protocol, NLM version, server hostname].
@@ -214,7 +219,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
214 const u32 version, 219 const u32 version,
215 const char *hostname, 220 const char *hostname,
216 int noresvport, 221 int noresvport,
217 struct net *net) 222 struct net *net,
223 const struct cred *cred)
218{ 224{
219 struct nlm_lookup_host_info ni = { 225 struct nlm_lookup_host_info ni = {
220 .server = 0, 226 .server = 0,
@@ -226,6 +232,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
226 .hostname_len = strlen(hostname), 232 .hostname_len = strlen(hostname),
227 .noresvport = noresvport, 233 .noresvport = noresvport,
228 .net = net, 234 .net = net,
235 .cred = cred,
229 }; 236 };
230 struct hlist_head *chain; 237 struct hlist_head *chain;
231 struct nlm_host *host; 238 struct nlm_host *host;
@@ -458,6 +465,7 @@ nlm_bind_host(struct nlm_host *host)
458 .authflavor = RPC_AUTH_UNIX, 465 .authflavor = RPC_AUTH_UNIX,
459 .flags = (RPC_CLNT_CREATE_NOPING | 466 .flags = (RPC_CLNT_CREATE_NOPING |
460 RPC_CLNT_CREATE_AUTOBIND), 467 RPC_CLNT_CREATE_AUTOBIND),
468 .cred = host->h_cred,
461 }; 469 };
462 470
463 /* 471 /*
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 654594ef4f94..1eabd91870e6 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -82,6 +82,7 @@ static struct rpc_clnt *nsm_create(struct net *net, const char *nodename)
82 .version = NSM_VERSION, 82 .version = NSM_VERSION,
83 .authflavor = RPC_AUTH_NULL, 83 .authflavor = RPC_AUTH_NULL,
84 .flags = RPC_CLNT_CREATE_NOPING, 84 .flags = RPC_CLNT_CREATE_NOPING,
85 .cred = current_cred(),
85 }; 86 };
86 87
87 return rpc_create(&args); 88 return rpc_create(&args);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 90d71fda65ce..da74c4c4a244 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -284,6 +284,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
284 struct nfs_client *clp; 284 struct nfs_client *clp;
285 const struct sockaddr *sap = data->addr; 285 const struct sockaddr *sap = data->addr;
286 struct nfs_net *nn = net_generic(data->net, nfs_net_id); 286 struct nfs_net *nn = net_generic(data->net, nfs_net_id);
287 int error;
287 288
288again: 289again:
289 list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { 290 list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
@@ -296,9 +297,11 @@ again:
296 if (clp->cl_cons_state > NFS_CS_READY) { 297 if (clp->cl_cons_state > NFS_CS_READY) {
297 refcount_inc(&clp->cl_count); 298 refcount_inc(&clp->cl_count);
298 spin_unlock(&nn->nfs_client_lock); 299 spin_unlock(&nn->nfs_client_lock);
299 nfs_wait_client_init_complete(clp); 300 error = nfs_wait_client_init_complete(clp);
300 nfs_put_client(clp); 301 nfs_put_client(clp);
301 spin_lock(&nn->nfs_client_lock); 302 spin_lock(&nn->nfs_client_lock);
303 if (error < 0)
304 return ERR_PTR(error);
302 goto again; 305 goto again;
303 } 306 }
304 307
@@ -407,6 +410,8 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
407 clp = nfs_match_client(cl_init); 410 clp = nfs_match_client(cl_init);
408 if (clp) { 411 if (clp) {
409 spin_unlock(&nn->nfs_client_lock); 412 spin_unlock(&nn->nfs_client_lock);
413 if (IS_ERR(clp))
414 return clp;
410 if (new) 415 if (new)
411 new->rpc_ops->free_client(new); 416 new->rpc_ops->free_client(new);
412 return nfs_found_client(cl_init, clp); 417 return nfs_found_client(cl_init, clp);
@@ -500,6 +505,7 @@ int nfs_create_rpc_client(struct nfs_client *clp,
500 .program = &nfs_program, 505 .program = &nfs_program,
501 .version = clp->rpc_ops->version, 506 .version = clp->rpc_ops->version,
502 .authflavor = flavor, 507 .authflavor = flavor,
508 .cred = cl_init->cred,
503 }; 509 };
504 510
505 if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags)) 511 if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
@@ -598,6 +604,8 @@ int nfs_init_server_rpcclient(struct nfs_server *server,
598 sizeof(server->client->cl_timeout_default)); 604 sizeof(server->client->cl_timeout_default));
599 server->client->cl_timeout = &server->client->cl_timeout_default; 605 server->client->cl_timeout = &server->client->cl_timeout_default;
600 server->client->cl_softrtry = 0; 606 server->client->cl_softrtry = 0;
607 if (server->flags & NFS_MOUNT_SOFTERR)
608 server->client->cl_softerr = 1;
601 if (server->flags & NFS_MOUNT_SOFT) 609 if (server->flags & NFS_MOUNT_SOFT)
602 server->client->cl_softrtry = 1; 610 server->client->cl_softrtry = 1;
603 611
@@ -652,6 +660,7 @@ static int nfs_init_server(struct nfs_server *server,
652 .proto = data->nfs_server.protocol, 660 .proto = data->nfs_server.protocol,
653 .net = data->net, 661 .net = data->net,
654 .timeparms = &timeparms, 662 .timeparms = &timeparms,
663 .cred = server->cred,
655 }; 664 };
656 struct nfs_client *clp; 665 struct nfs_client *clp;
657 int error; 666 int error;
@@ -920,6 +929,7 @@ void nfs_free_server(struct nfs_server *server)
920 ida_destroy(&server->lockowner_id); 929 ida_destroy(&server->lockowner_id);
921 ida_destroy(&server->openowner_id); 930 ida_destroy(&server->openowner_id);
922 nfs_free_iostats(server->io_stats); 931 nfs_free_iostats(server->io_stats);
932 put_cred(server->cred);
923 kfree(server); 933 kfree(server);
924 nfs_release_automount_timer(); 934 nfs_release_automount_timer();
925} 935}
@@ -940,6 +950,8 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info,
940 if (!server) 950 if (!server)
941 return ERR_PTR(-ENOMEM); 951 return ERR_PTR(-ENOMEM);
942 952
953 server->cred = get_cred(current_cred());
954
943 error = -ENOMEM; 955 error = -ENOMEM;
944 fattr = nfs_alloc_fattr(); 956 fattr = nfs_alloc_fattr();
945 if (fattr == NULL) 957 if (fattr == NULL)
@@ -1006,6 +1018,8 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
1006 if (!server) 1018 if (!server)
1007 return ERR_PTR(-ENOMEM); 1019 return ERR_PTR(-ENOMEM);
1008 1020
1021 server->cred = get_cred(source->cred);
1022
1009 error = -ENOMEM; 1023 error = -ENOMEM;
1010 fattr_fsinfo = nfs_alloc_fattr(); 1024 fattr_fsinfo = nfs_alloc_fattr();
1011 if (fattr_fsinfo == NULL) 1025 if (fattr_fsinfo == NULL)
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 2f6b447cdd82..8b78274e3e56 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -1034,6 +1034,18 @@ void nfs_mark_test_expired_all_delegations(struct nfs_client *clp)
1034} 1034}
1035 1035
1036/** 1036/**
1037 * nfs_test_expired_all_delegations - test all delegations for a client
1038 * @clp: nfs_client to process
1039 *
1040 * Helper for handling "recallable state revoked" status from server.
1041 */
1042void nfs_test_expired_all_delegations(struct nfs_client *clp)
1043{
1044 nfs_mark_test_expired_all_delegations(clp);
1045 nfs4_schedule_state_manager(clp);
1046}
1047
1048/**
1037 * nfs_reap_expired_delegations - reap expired delegations 1049 * nfs_reap_expired_delegations - reap expired delegations
1038 * @clp: nfs_client to process 1050 * @clp: nfs_client to process
1039 * 1051 *
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 35b4b02c1ae0..5799777df5ec 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -58,6 +58,7 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp);
58void nfs_delegation_reap_unclaimed(struct nfs_client *clp); 58void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
59 59
60void nfs_mark_test_expired_all_delegations(struct nfs_client *clp); 60void nfs_mark_test_expired_all_delegations(struct nfs_client *clp);
61void nfs_test_expired_all_delegations(struct nfs_client *clp);
61void nfs_reap_expired_delegations(struct nfs_client *clp); 62void nfs_reap_expired_delegations(struct nfs_client *clp);
62 63
63/* NFSv4 delegation-related procedures */ 64/* NFSv4 delegation-related procedures */
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a71d0b42d160..47d445bec8c9 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -714,8 +714,9 @@ out:
714 * We only need to convert from xdr once so future lookups are much simpler 714 * We only need to convert from xdr once so future lookups are much simpler
715 */ 715 */
716static 716static
717int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) 717int nfs_readdir_filler(void *data, struct page* page)
718{ 718{
719 nfs_readdir_descriptor_t *desc = data;
719 struct inode *inode = file_inode(desc->file); 720 struct inode *inode = file_inode(desc->file);
720 int ret; 721 int ret;
721 722
@@ -762,8 +763,8 @@ void cache_page_release(nfs_readdir_descriptor_t *desc)
762static 763static
763struct page *get_cache_page(nfs_readdir_descriptor_t *desc) 764struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
764{ 765{
765 return read_cache_page(desc->file->f_mapping, 766 return read_cache_page(desc->file->f_mapping, desc->page_index,
766 desc->page_index, (filler_t *)nfs_readdir_filler, desc); 767 nfs_readdir_filler, desc);
767} 768}
768 769
769/* 770/*
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 0fd811ac08b5..2436bd92bc00 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -492,7 +492,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
492 struct nfs_page *req; 492 struct nfs_page *req;
493 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); 493 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
494 /* XXX do we need to do the eof zeroing found in async_filler? */ 494 /* XXX do we need to do the eof zeroing found in async_filler? */
495 req = nfs_create_request(dreq->ctx, pagevec[i], NULL, 495 req = nfs_create_request(dreq->ctx, pagevec[i],
496 pgbase, req_len); 496 pgbase, req_len);
497 if (IS_ERR(req)) { 497 if (IS_ERR(req)) {
498 result = PTR_ERR(req); 498 result = PTR_ERR(req);
@@ -663,6 +663,8 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
663 } 663 }
664 664
665 list_for_each_entry_safe(req, tmp, &reqs, wb_list) { 665 list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
666 /* Bump the transmission count */
667 req->wb_nio++;
666 if (!nfs_pageio_add_request(&desc, req)) { 668 if (!nfs_pageio_add_request(&desc, req)) {
667 nfs_list_move_request(req, &failed); 669 nfs_list_move_request(req, &failed);
668 spin_lock(&cinfo.inode->i_lock); 670 spin_lock(&cinfo.inode->i_lock);
@@ -703,6 +705,11 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
703 req = nfs_list_entry(data->pages.next); 705 req = nfs_list_entry(data->pages.next);
704 nfs_list_remove_request(req); 706 nfs_list_remove_request(req);
705 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { 707 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
708 /*
709 * Despite the reboot, the write was successful,
710 * so reset wb_nio.
711 */
712 req->wb_nio = 0;
706 /* Note the rewrite will go through mds */ 713 /* Note the rewrite will go through mds */
707 nfs_mark_request_commit(req, NULL, &cinfo, 0); 714 nfs_mark_request_commit(req, NULL, &cinfo, 0);
708 } else 715 } else
@@ -899,7 +906,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
899 struct nfs_page *req; 906 struct nfs_page *req;
900 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); 907 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
901 908
902 req = nfs_create_request(dreq->ctx, pagevec[i], NULL, 909 req = nfs_create_request(dreq->ctx, pagevec[i],
903 pgbase, req_len); 910 pgbase, req_len);
904 if (IS_ERR(req)) { 911 if (IS_ERR(req)) {
905 result = PTR_ERR(req); 912 result = PTR_ERR(req);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 4899b85f9b3c..144e183250c3 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -147,7 +147,7 @@ nfs_file_flush(struct file *file, fl_owner_t id)
147 return 0; 147 return 0;
148 148
149 /* Flush writes to the server and return any errors */ 149 /* Flush writes to the server and return any errors */
150 return vfs_fsync(file, 0); 150 return nfs_wb_all(inode);
151} 151}
152 152
153ssize_t 153ssize_t
@@ -199,13 +199,6 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap);
199 * Flush any dirty pages for this process, and check for write errors. 199 * Flush any dirty pages for this process, and check for write errors.
200 * The return status from this call provides a reliable indication of 200 * The return status from this call provides a reliable indication of
201 * whether any write errors occurred for this process. 201 * whether any write errors occurred for this process.
202 *
203 * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
204 * disk, but it retrieves and clears ctx->error after synching, despite
205 * the two being set at the same time in nfs_context_set_write_error().
206 * This is because the former is used to notify the _next_ call to
207 * nfs_file_write() that a write error occurred, and hence cause it to
208 * fall back to doing a synchronous write.
209 */ 202 */
210static int 203static int
211nfs_file_fsync_commit(struct file *file, int datasync) 204nfs_file_fsync_commit(struct file *file, int datasync)
@@ -220,11 +213,8 @@ nfs_file_fsync_commit(struct file *file, int datasync)
220 nfs_inc_stats(inode, NFSIOS_VFSFSYNC); 213 nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
221 do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); 214 do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
222 status = nfs_commit_inode(inode, FLUSH_SYNC); 215 status = nfs_commit_inode(inode, FLUSH_SYNC);
223 if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) { 216 if (status == 0)
224 ret = xchg(&ctx->error, 0); 217 status = file_check_and_advance_wb_err(file);
225 if (ret)
226 goto out;
227 }
228 if (status < 0) { 218 if (status < 0) {
229 ret = status; 219 ret = status;
230 goto out; 220 goto out;
@@ -245,13 +235,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
245 trace_nfs_fsync_enter(inode); 235 trace_nfs_fsync_enter(inode);
246 236
247 do { 237 do {
248 struct nfs_open_context *ctx = nfs_file_open_context(file); 238 ret = file_write_and_wait_range(file, start, end);
249 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
250 if (test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) {
251 int ret2 = xchg(&ctx->error, 0);
252 if (ret2)
253 ret = ret2;
254 }
255 if (ret != 0) 239 if (ret != 0)
256 break; 240 break;
257 ret = nfs_file_fsync_commit(file, datasync); 241 ret = nfs_file_fsync_commit(file, datasync);
@@ -600,8 +584,7 @@ static int nfs_need_check_write(struct file *filp, struct inode *inode)
600 struct nfs_open_context *ctx; 584 struct nfs_open_context *ctx;
601 585
602 ctx = nfs_file_open_context(filp); 586 ctx = nfs_file_open_context(filp);
603 if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags) || 587 if (nfs_ctx_key_to_expire(ctx, inode))
604 nfs_ctx_key_to_expire(ctx, inode))
605 return 1; 588 return 1;
606 return 0; 589 return 0;
607} 590}
@@ -655,7 +638,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
655 638
656 /* Return error values */ 639 /* Return error values */
657 if (nfs_need_check_write(file, inode)) { 640 if (nfs_need_check_write(file, inode)) {
658 int err = vfs_fsync(file, 0); 641 int err = nfs_wb_all(inode);
659 if (err < 0) 642 if (err < 0)
660 result = err; 643 result = err;
661 } 644 }
@@ -709,7 +692,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
709 * Flush all pending writes before doing anything 692 * Flush all pending writes before doing anything
710 * with locks.. 693 * with locks..
711 */ 694 */
712 vfs_fsync(filp, 0); 695 nfs_wb_all(inode);
713 696
714 l_ctx = nfs_get_lock_context(nfs_file_open_context(filp)); 697 l_ctx = nfs_get_lock_context(nfs_file_open_context(filp));
715 if (!IS_ERR(l_ctx)) { 698 if (!IS_ERR(l_ctx)) {
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 61f46facb39c..3cb073c50fa6 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -904,7 +904,7 @@ fl_pnfs_update_layout(struct inode *ino,
904 status = filelayout_check_deviceid(lo, fl, gfp_flags); 904 status = filelayout_check_deviceid(lo, fl, gfp_flags);
905 if (status) { 905 if (status) {
906 pnfs_put_lseg(lseg); 906 pnfs_put_lseg(lseg);
907 lseg = ERR_PTR(status); 907 lseg = NULL;
908 } 908 }
909out: 909out:
910 return lseg; 910 return lseg;
@@ -917,7 +917,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
917 pnfs_generic_pg_check_layout(pgio); 917 pnfs_generic_pg_check_layout(pgio);
918 if (!pgio->pg_lseg) { 918 if (!pgio->pg_lseg) {
919 pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, 919 pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
920 req->wb_context, 920 nfs_req_openctx(req),
921 0, 921 0,
922 NFS4_MAX_UINT64, 922 NFS4_MAX_UINT64,
923 IOMODE_READ, 923 IOMODE_READ,
@@ -944,7 +944,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
944 pnfs_generic_pg_check_layout(pgio); 944 pnfs_generic_pg_check_layout(pgio);
945 if (!pgio->pg_lseg) { 945 if (!pgio->pg_lseg) {
946 pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, 946 pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
947 req->wb_context, 947 nfs_req_openctx(req),
948 0, 948 0,
949 NFS4_MAX_UINT64, 949 NFS4_MAX_UINT64,
950 IOMODE_RW, 950 IOMODE_RW,
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 6673d4ff5a2a..9920c52bd0cd 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -28,6 +28,8 @@
28#define FF_LAYOUT_POLL_RETRY_MAX (15*HZ) 28#define FF_LAYOUT_POLL_RETRY_MAX (15*HZ)
29#define FF_LAYOUTRETURN_MAXERR 20 29#define FF_LAYOUTRETURN_MAXERR 20
30 30
31static unsigned short io_maxretrans;
32
31static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, 33static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
32 struct nfs_pgio_header *hdr); 34 struct nfs_pgio_header *hdr);
33static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, 35static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
@@ -871,7 +873,7 @@ ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
871{ 873{
872 pnfs_put_lseg(pgio->pg_lseg); 874 pnfs_put_lseg(pgio->pg_lseg);
873 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 875 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
874 req->wb_context, 876 nfs_req_openctx(req),
875 0, 877 0,
876 NFS4_MAX_UINT64, 878 NFS4_MAX_UINT64,
877 IOMODE_READ, 879 IOMODE_READ,
@@ -925,6 +927,7 @@ retry:
925 pgm = &pgio->pg_mirrors[0]; 927 pgm = &pgio->pg_mirrors[0];
926 pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; 928 pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
927 929
930 pgio->pg_maxretrans = io_maxretrans;
928 return; 931 return;
929out_nolseg: 932out_nolseg:
930 if (pgio->pg_error < 0) 933 if (pgio->pg_error < 0)
@@ -950,7 +953,7 @@ retry:
950 pnfs_generic_pg_check_layout(pgio); 953 pnfs_generic_pg_check_layout(pgio);
951 if (!pgio->pg_lseg) { 954 if (!pgio->pg_lseg) {
952 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 955 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
953 req->wb_context, 956 nfs_req_openctx(req),
954 0, 957 0,
955 NFS4_MAX_UINT64, 958 NFS4_MAX_UINT64,
956 IOMODE_RW, 959 IOMODE_RW,
@@ -992,6 +995,7 @@ retry:
992 pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize; 995 pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize;
993 } 996 }
994 997
998 pgio->pg_maxretrans = io_maxretrans;
995 return; 999 return;
996 1000
997out_mds: 1001out_mds:
@@ -1006,7 +1010,7 @@ ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio,
1006{ 1010{
1007 if (!pgio->pg_lseg) { 1011 if (!pgio->pg_lseg) {
1008 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1012 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1009 req->wb_context, 1013 nfs_req_openctx(req),
1010 0, 1014 0,
1011 NFS4_MAX_UINT64, 1015 NFS4_MAX_UINT64,
1012 IOMODE_RW, 1016 IOMODE_RW,
@@ -2515,3 +2519,7 @@ MODULE_DESCRIPTION("The NFSv4 flexfile layout driver");
2515 2519
2516module_init(nfs4flexfilelayout_init); 2520module_init(nfs4flexfilelayout_init);
2517module_exit(nfs4flexfilelayout_exit); 2521module_exit(nfs4flexfilelayout_exit);
2522
2523module_param(io_maxretrans, ushort, 0644);
2524MODULE_PARM_DESC(io_maxretrans, "The number of times the NFSv4.1 client "
2525 "retries an I/O request before returning an error. ");
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index f61af8307dc8..3bc2550cfe4e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -885,10 +885,14 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
885 spin_lock(&inode->i_lock); 885 spin_lock(&inode->i_lock);
886 res = __nfs_find_lock_context(ctx); 886 res = __nfs_find_lock_context(ctx);
887 if (res == NULL) { 887 if (res == NULL) {
888 list_add_tail_rcu(&new->list, &ctx->lock_context.list); 888 new->open_context = get_nfs_open_context(ctx);
889 new->open_context = ctx; 889 if (new->open_context) {
890 res = new; 890 list_add_tail_rcu(&new->list,
891 new = NULL; 891 &ctx->lock_context.list);
892 res = new;
893 new = NULL;
894 } else
895 res = ERR_PTR(-EBADF);
892 } 896 }
893 spin_unlock(&inode->i_lock); 897 spin_unlock(&inode->i_lock);
894 kfree(new); 898 kfree(new);
@@ -906,6 +910,7 @@ void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
906 return; 910 return;
907 list_del_rcu(&l_ctx->list); 911 list_del_rcu(&l_ctx->list);
908 spin_unlock(&inode->i_lock); 912 spin_unlock(&inode->i_lock);
913 put_nfs_open_context(ctx);
909 kfree_rcu(l_ctx, rcu_head); 914 kfree_rcu(l_ctx, rcu_head);
910} 915}
911EXPORT_SYMBOL_GPL(nfs_put_lock_context); 916EXPORT_SYMBOL_GPL(nfs_put_lock_context);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 331a0504eaf8..498fab72f70b 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -84,6 +84,7 @@ struct nfs_client_initdata {
84 u32 minorversion; 84 u32 minorversion;
85 struct net *net; 85 struct net *net;
86 const struct rpc_timeout *timeparms; 86 const struct rpc_timeout *timeparms;
87 const struct cred *cred;
87}; 88};
88 89
89/* 90/*
@@ -766,15 +767,10 @@ static inline bool nfs_error_is_fatal(int err)
766 case -ESTALE: 767 case -ESTALE:
767 case -E2BIG: 768 case -E2BIG:
768 case -ENOMEM: 769 case -ENOMEM:
770 case -ETIMEDOUT:
769 return true; 771 return true;
770 default: 772 default:
771 return false; 773 return false;
772 } 774 }
773} 775}
774 776
775static inline void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
776{
777 ctx->error = error;
778 smp_wmb();
779 set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
780}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index d979ff4fee7e..cb7c10e9721e 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -163,6 +163,7 @@ int nfs_mount(struct nfs_mount_request *info)
163 .program = &mnt_program, 163 .program = &mnt_program,
164 .version = info->version, 164 .version = info->version,
165 .authflavor = RPC_AUTH_UNIX, 165 .authflavor = RPC_AUTH_UNIX,
166 .cred = current_cred(),
166 }; 167 };
167 struct rpc_clnt *mnt_clnt; 168 struct rpc_clnt *mnt_clnt;
168 int status; 169 int status;
@@ -249,6 +250,7 @@ void nfs_umount(const struct nfs_mount_request *info)
249 .version = info->version, 250 .version = info->version,
250 .authflavor = RPC_AUTH_UNIX, 251 .authflavor = RPC_AUTH_UNIX,
251 .flags = RPC_CLNT_CREATE_NOPING, 252 .flags = RPC_CLNT_CREATE_NOPING,
253 .cred = current_cred(),
252 }; 254 };
253 struct rpc_message msg = { 255 struct rpc_message msg = {
254 .rpc_argp = info->dirpath, 256 .rpc_argp = info->dirpath,
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index a7ed29de0a40..572794dab4b1 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -76,6 +76,20 @@ static int nfs_stat_to_errno(enum nfs_stat);
76 * or decoded inline. 76 * or decoded inline.
77 */ 77 */
78 78
79static struct user_namespace *rpc_userns(const struct rpc_clnt *clnt)
80{
81 if (clnt && clnt->cl_cred)
82 return clnt->cl_cred->user_ns;
83 return &init_user_ns;
84}
85
86static struct user_namespace *rpc_rqst_userns(const struct rpc_rqst *rqstp)
87{
88 if (rqstp->rq_task)
89 return rpc_userns(rqstp->rq_task->tk_client);
90 return &init_user_ns;
91}
92
79/* 93/*
80 * typedef opaque nfsdata<>; 94 * typedef opaque nfsdata<>;
81 */ 95 */
@@ -248,7 +262,8 @@ static __be32 *xdr_decode_time(__be32 *p, struct timespec *timep)
248 * }; 262 * };
249 * 263 *
250 */ 264 */
251static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr) 265static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
266 struct user_namespace *userns)
252{ 267{
253 u32 rdev, type; 268 u32 rdev, type;
254 __be32 *p; 269 __be32 *p;
@@ -263,10 +278,10 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
263 278
264 fattr->mode = be32_to_cpup(p++); 279 fattr->mode = be32_to_cpup(p++);
265 fattr->nlink = be32_to_cpup(p++); 280 fattr->nlink = be32_to_cpup(p++);
266 fattr->uid = make_kuid(&init_user_ns, be32_to_cpup(p++)); 281 fattr->uid = make_kuid(userns, be32_to_cpup(p++));
267 if (!uid_valid(fattr->uid)) 282 if (!uid_valid(fattr->uid))
268 goto out_uid; 283 goto out_uid;
269 fattr->gid = make_kgid(&init_user_ns, be32_to_cpup(p++)); 284 fattr->gid = make_kgid(userns, be32_to_cpup(p++));
270 if (!gid_valid(fattr->gid)) 285 if (!gid_valid(fattr->gid))
271 goto out_gid; 286 goto out_gid;
272 287
@@ -321,7 +336,8 @@ static __be32 *xdr_time_not_set(__be32 *p)
321 return p; 336 return p;
322} 337}
323 338
324static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr) 339static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr,
340 struct user_namespace *userns)
325{ 341{
326 struct timespec ts; 342 struct timespec ts;
327 __be32 *p; 343 __be32 *p;
@@ -333,11 +349,11 @@ static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr)
333 else 349 else
334 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); 350 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
335 if (attr->ia_valid & ATTR_UID) 351 if (attr->ia_valid & ATTR_UID)
336 *p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid)); 352 *p++ = cpu_to_be32(from_kuid_munged(userns, attr->ia_uid));
337 else 353 else
338 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); 354 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
339 if (attr->ia_valid & ATTR_GID) 355 if (attr->ia_valid & ATTR_GID)
340 *p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid)); 356 *p++ = cpu_to_be32(from_kgid_munged(userns, attr->ia_gid));
341 else 357 else
342 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); 358 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
343 if (attr->ia_valid & ATTR_SIZE) 359 if (attr->ia_valid & ATTR_SIZE)
@@ -451,7 +467,8 @@ out_cheating:
451 * }; 467 * };
452 */ 468 */
453static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result, 469static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result,
454 __u32 *op_status) 470 __u32 *op_status,
471 struct user_namespace *userns)
455{ 472{
456 enum nfs_stat status; 473 enum nfs_stat status;
457 int error; 474 int error;
@@ -463,7 +480,7 @@ static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result,
463 *op_status = status; 480 *op_status = status;
464 if (status != NFS_OK) 481 if (status != NFS_OK)
465 goto out_default; 482 goto out_default;
466 error = decode_fattr(xdr, result); 483 error = decode_fattr(xdr, result, userns);
467out: 484out:
468 return error; 485 return error;
469out_default: 486out_default:
@@ -498,19 +515,21 @@ static void encode_diropargs(struct xdr_stream *xdr, const struct nfs_fh *fh,
498 * void; 515 * void;
499 * }; 516 * };
500 */ 517 */
501static int decode_diropok(struct xdr_stream *xdr, struct nfs_diropok *result) 518static int decode_diropok(struct xdr_stream *xdr, struct nfs_diropok *result,
519 struct user_namespace *userns)
502{ 520{
503 int error; 521 int error;
504 522
505 error = decode_fhandle(xdr, result->fh); 523 error = decode_fhandle(xdr, result->fh);
506 if (unlikely(error)) 524 if (unlikely(error))
507 goto out; 525 goto out;
508 error = decode_fattr(xdr, result->fattr); 526 error = decode_fattr(xdr, result->fattr, userns);
509out: 527out:
510 return error; 528 return error;
511} 529}
512 530
513static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result) 531static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result,
532 struct user_namespace *userns)
514{ 533{
515 enum nfs_stat status; 534 enum nfs_stat status;
516 int error; 535 int error;
@@ -520,7 +539,7 @@ static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result)
520 goto out; 539 goto out;
521 if (status != NFS_OK) 540 if (status != NFS_OK)
522 goto out_default; 541 goto out_default;
523 error = decode_diropok(xdr, result); 542 error = decode_diropok(xdr, result, userns);
524out: 543out:
525 return error; 544 return error;
526out_default: 545out_default:
@@ -559,7 +578,7 @@ static void nfs2_xdr_enc_sattrargs(struct rpc_rqst *req,
559 const struct nfs_sattrargs *args = data; 578 const struct nfs_sattrargs *args = data;
560 579
561 encode_fhandle(xdr, args->fh); 580 encode_fhandle(xdr, args->fh);
562 encode_sattr(xdr, args->sattr); 581 encode_sattr(xdr, args->sattr, rpc_rqst_userns(req));
563} 582}
564 583
565static void nfs2_xdr_enc_diropargs(struct rpc_rqst *req, 584static void nfs2_xdr_enc_diropargs(struct rpc_rqst *req,
@@ -674,7 +693,7 @@ static void nfs2_xdr_enc_createargs(struct rpc_rqst *req,
674 const struct nfs_createargs *args = data; 693 const struct nfs_createargs *args = data;
675 694
676 encode_diropargs(xdr, args->fh, args->name, args->len); 695 encode_diropargs(xdr, args->fh, args->name, args->len);
677 encode_sattr(xdr, args->sattr); 696 encode_sattr(xdr, args->sattr, rpc_rqst_userns(req));
678} 697}
679 698
680static void nfs2_xdr_enc_removeargs(struct rpc_rqst *req, 699static void nfs2_xdr_enc_removeargs(struct rpc_rqst *req,
@@ -741,7 +760,7 @@ static void nfs2_xdr_enc_symlinkargs(struct rpc_rqst *req,
741 760
742 encode_diropargs(xdr, args->fromfh, args->fromname, args->fromlen); 761 encode_diropargs(xdr, args->fromfh, args->fromname, args->fromlen);
743 encode_path(xdr, args->pages, args->pathlen); 762 encode_path(xdr, args->pages, args->pathlen);
744 encode_sattr(xdr, args->sattr); 763 encode_sattr(xdr, args->sattr, rpc_rqst_userns(req));
745} 764}
746 765
747/* 766/*
@@ -803,13 +822,13 @@ out_default:
803static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr, 822static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr,
804 void *result) 823 void *result)
805{ 824{
806 return decode_attrstat(xdr, result, NULL); 825 return decode_attrstat(xdr, result, NULL, rpc_rqst_userns(req));
807} 826}
808 827
809static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr, 828static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr,
810 void *result) 829 void *result)
811{ 830{
812 return decode_diropres(xdr, result); 831 return decode_diropres(xdr, result, rpc_rqst_userns(req));
813} 832}
814 833
815/* 834/*
@@ -864,7 +883,7 @@ static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
864 result->op_status = status; 883 result->op_status = status;
865 if (status != NFS_OK) 884 if (status != NFS_OK)
866 goto out_default; 885 goto out_default;
867 error = decode_fattr(xdr, result->fattr); 886 error = decode_fattr(xdr, result->fattr, rpc_rqst_userns(req));
868 if (unlikely(error)) 887 if (unlikely(error))
869 goto out; 888 goto out;
870 error = decode_nfsdata(xdr, result); 889 error = decode_nfsdata(xdr, result);
@@ -881,7 +900,8 @@ static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
881 900
882 /* All NFSv2 writes are "file sync" writes */ 901 /* All NFSv2 writes are "file sync" writes */
883 result->verf->committed = NFS_FILE_SYNC; 902 result->verf->committed = NFS_FILE_SYNC;
884 return decode_attrstat(xdr, result->fattr, &result->op_status); 903 return decode_attrstat(xdr, result->fattr, &result->op_status,
904 rpc_rqst_userns(req));
885} 905}
886 906
887/** 907/**
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index 7879f2a0fcfd..1afdb0f7473f 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -91,6 +91,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
91 .proto = ds_proto, 91 .proto = ds_proto,
92 .net = mds_clp->cl_net, 92 .net = mds_clp->cl_net,
93 .timeparms = &ds_timeout, 93 .timeparms = &ds_timeout,
94 .cred = mds_srv->cred,
94 }; 95 };
95 struct nfs_client *clp; 96 struct nfs_client *clp;
96 char buf[INET6_ADDRSTRLEN + 1]; 97 char buf[INET6_ADDRSTRLEN + 1];
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 110358f4986d..abbbdde97e31 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -104,6 +104,20 @@ static const umode_t nfs_type2fmt[] = {
104 [NF3FIFO] = S_IFIFO, 104 [NF3FIFO] = S_IFIFO,
105}; 105};
106 106
107static struct user_namespace *rpc_userns(const struct rpc_clnt *clnt)
108{
109 if (clnt && clnt->cl_cred)
110 return clnt->cl_cred->user_ns;
111 return &init_user_ns;
112}
113
114static struct user_namespace *rpc_rqst_userns(const struct rpc_rqst *rqstp)
115{
116 if (rqstp->rq_task)
117 return rpc_userns(rqstp->rq_task->tk_client);
118 return &init_user_ns;
119}
120
107/* 121/*
108 * Encode/decode NFSv3 basic data types 122 * Encode/decode NFSv3 basic data types
109 * 123 *
@@ -516,7 +530,8 @@ static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec *timep)
516 * set_mtime mtime; 530 * set_mtime mtime;
517 * }; 531 * };
518 */ 532 */
519static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr) 533static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr,
534 struct user_namespace *userns)
520{ 535{
521 struct timespec ts; 536 struct timespec ts;
522 u32 nbytes; 537 u32 nbytes;
@@ -551,13 +566,13 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
551 566
552 if (attr->ia_valid & ATTR_UID) { 567 if (attr->ia_valid & ATTR_UID) {
553 *p++ = xdr_one; 568 *p++ = xdr_one;
554 *p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid)); 569 *p++ = cpu_to_be32(from_kuid_munged(userns, attr->ia_uid));
555 } else 570 } else
556 *p++ = xdr_zero; 571 *p++ = xdr_zero;
557 572
558 if (attr->ia_valid & ATTR_GID) { 573 if (attr->ia_valid & ATTR_GID) {
559 *p++ = xdr_one; 574 *p++ = xdr_one;
560 *p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid)); 575 *p++ = cpu_to_be32(from_kgid_munged(userns, attr->ia_gid));
561 } else 576 } else
562 *p++ = xdr_zero; 577 *p++ = xdr_zero;
563 578
@@ -606,7 +621,8 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
606 * nfstime3 ctime; 621 * nfstime3 ctime;
607 * }; 622 * };
608 */ 623 */
609static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr) 624static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr,
625 struct user_namespace *userns)
610{ 626{
611 umode_t fmode; 627 umode_t fmode;
612 __be32 *p; 628 __be32 *p;
@@ -619,10 +635,10 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
619 635
620 fattr->mode = (be32_to_cpup(p++) & ~S_IFMT) | fmode; 636 fattr->mode = (be32_to_cpup(p++) & ~S_IFMT) | fmode;
621 fattr->nlink = be32_to_cpup(p++); 637 fattr->nlink = be32_to_cpup(p++);
622 fattr->uid = make_kuid(&init_user_ns, be32_to_cpup(p++)); 638 fattr->uid = make_kuid(userns, be32_to_cpup(p++));
623 if (!uid_valid(fattr->uid)) 639 if (!uid_valid(fattr->uid))
624 goto out_uid; 640 goto out_uid;
625 fattr->gid = make_kgid(&init_user_ns, be32_to_cpup(p++)); 641 fattr->gid = make_kgid(userns, be32_to_cpup(p++));
626 if (!gid_valid(fattr->gid)) 642 if (!gid_valid(fattr->gid))
627 goto out_gid; 643 goto out_gid;
628 644
@@ -659,7 +675,8 @@ out_gid:
659 * void; 675 * void;
660 * }; 676 * };
661 */ 677 */
662static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr) 678static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
679 struct user_namespace *userns)
663{ 680{
664 __be32 *p; 681 __be32 *p;
665 682
@@ -667,7 +684,7 @@ static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
667 if (unlikely(!p)) 684 if (unlikely(!p))
668 return -EIO; 685 return -EIO;
669 if (*p != xdr_zero) 686 if (*p != xdr_zero)
670 return decode_fattr3(xdr, fattr); 687 return decode_fattr3(xdr, fattr, userns);
671 return 0; 688 return 0;
672} 689}
673 690
@@ -728,14 +745,15 @@ static int decode_pre_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
728 return 0; 745 return 0;
729} 746}
730 747
731static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr) 748static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr,
749 struct user_namespace *userns)
732{ 750{
733 int error; 751 int error;
734 752
735 error = decode_pre_op_attr(xdr, fattr); 753 error = decode_pre_op_attr(xdr, fattr);
736 if (unlikely(error)) 754 if (unlikely(error))
737 goto out; 755 goto out;
738 error = decode_post_op_attr(xdr, fattr); 756 error = decode_post_op_attr(xdr, fattr, userns);
739out: 757out:
740 return error; 758 return error;
741} 759}
@@ -837,7 +855,7 @@ static void nfs3_xdr_enc_setattr3args(struct rpc_rqst *req,
837{ 855{
838 const struct nfs3_sattrargs *args = data; 856 const struct nfs3_sattrargs *args = data;
839 encode_nfs_fh3(xdr, args->fh); 857 encode_nfs_fh3(xdr, args->fh);
840 encode_sattr3(xdr, args->sattr); 858 encode_sattr3(xdr, args->sattr, rpc_rqst_userns(req));
841 encode_sattrguard3(xdr, args); 859 encode_sattrguard3(xdr, args);
842} 860}
843 861
@@ -998,13 +1016,14 @@ static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
998 * }; 1016 * };
999 */ 1017 */
1000static void encode_createhow3(struct xdr_stream *xdr, 1018static void encode_createhow3(struct xdr_stream *xdr,
1001 const struct nfs3_createargs *args) 1019 const struct nfs3_createargs *args,
1020 struct user_namespace *userns)
1002{ 1021{
1003 encode_uint32(xdr, args->createmode); 1022 encode_uint32(xdr, args->createmode);
1004 switch (args->createmode) { 1023 switch (args->createmode) {
1005 case NFS3_CREATE_UNCHECKED: 1024 case NFS3_CREATE_UNCHECKED:
1006 case NFS3_CREATE_GUARDED: 1025 case NFS3_CREATE_GUARDED:
1007 encode_sattr3(xdr, args->sattr); 1026 encode_sattr3(xdr, args->sattr, userns);
1008 break; 1027 break;
1009 case NFS3_CREATE_EXCLUSIVE: 1028 case NFS3_CREATE_EXCLUSIVE:
1010 encode_createverf3(xdr, args->verifier); 1029 encode_createverf3(xdr, args->verifier);
@@ -1021,7 +1040,7 @@ static void nfs3_xdr_enc_create3args(struct rpc_rqst *req,
1021 const struct nfs3_createargs *args = data; 1040 const struct nfs3_createargs *args = data;
1022 1041
1023 encode_diropargs3(xdr, args->fh, args->name, args->len); 1042 encode_diropargs3(xdr, args->fh, args->name, args->len);
1024 encode_createhow3(xdr, args); 1043 encode_createhow3(xdr, args, rpc_rqst_userns(req));
1025} 1044}
1026 1045
1027/* 1046/*
@@ -1039,7 +1058,7 @@ static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
1039 const struct nfs3_mkdirargs *args = data; 1058 const struct nfs3_mkdirargs *args = data;
1040 1059
1041 encode_diropargs3(xdr, args->fh, args->name, args->len); 1060 encode_diropargs3(xdr, args->fh, args->name, args->len);
1042 encode_sattr3(xdr, args->sattr); 1061 encode_sattr3(xdr, args->sattr, rpc_rqst_userns(req));
1043} 1062}
1044 1063
1045/* 1064/*
@@ -1056,11 +1075,12 @@ static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
1056 * }; 1075 * };
1057 */ 1076 */
1058static void encode_symlinkdata3(struct xdr_stream *xdr, 1077static void encode_symlinkdata3(struct xdr_stream *xdr,
1059 const void *data) 1078 const void *data,
1079 struct user_namespace *userns)
1060{ 1080{
1061 const struct nfs3_symlinkargs *args = data; 1081 const struct nfs3_symlinkargs *args = data;
1062 1082
1063 encode_sattr3(xdr, args->sattr); 1083 encode_sattr3(xdr, args->sattr, userns);
1064 encode_nfspath3(xdr, args->pages, args->pathlen); 1084 encode_nfspath3(xdr, args->pages, args->pathlen);
1065} 1085}
1066 1086
@@ -1071,7 +1091,7 @@ static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req,
1071 const struct nfs3_symlinkargs *args = data; 1091 const struct nfs3_symlinkargs *args = data;
1072 1092
1073 encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen); 1093 encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
1074 encode_symlinkdata3(xdr, args); 1094 encode_symlinkdata3(xdr, args, rpc_rqst_userns(req));
1075 xdr->buf->flags |= XDRBUF_WRITE; 1095 xdr->buf->flags |= XDRBUF_WRITE;
1076} 1096}
1077 1097
@@ -1100,24 +1120,26 @@ static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req,
1100 * }; 1120 * };
1101 */ 1121 */
1102static void encode_devicedata3(struct xdr_stream *xdr, 1122static void encode_devicedata3(struct xdr_stream *xdr,
1103 const struct nfs3_mknodargs *args) 1123 const struct nfs3_mknodargs *args,
1124 struct user_namespace *userns)
1104{ 1125{
1105 encode_sattr3(xdr, args->sattr); 1126 encode_sattr3(xdr, args->sattr, userns);
1106 encode_specdata3(xdr, args->rdev); 1127 encode_specdata3(xdr, args->rdev);
1107} 1128}
1108 1129
1109static void encode_mknoddata3(struct xdr_stream *xdr, 1130static void encode_mknoddata3(struct xdr_stream *xdr,
1110 const struct nfs3_mknodargs *args) 1131 const struct nfs3_mknodargs *args,
1132 struct user_namespace *userns)
1111{ 1133{
1112 encode_ftype3(xdr, args->type); 1134 encode_ftype3(xdr, args->type);
1113 switch (args->type) { 1135 switch (args->type) {
1114 case NF3CHR: 1136 case NF3CHR:
1115 case NF3BLK: 1137 case NF3BLK:
1116 encode_devicedata3(xdr, args); 1138 encode_devicedata3(xdr, args, userns);
1117 break; 1139 break;
1118 case NF3SOCK: 1140 case NF3SOCK:
1119 case NF3FIFO: 1141 case NF3FIFO:
1120 encode_sattr3(xdr, args->sattr); 1142 encode_sattr3(xdr, args->sattr, userns);
1121 break; 1143 break;
1122 case NF3REG: 1144 case NF3REG:
1123 case NF3DIR: 1145 case NF3DIR:
@@ -1134,7 +1156,7 @@ static void nfs3_xdr_enc_mknod3args(struct rpc_rqst *req,
1134 const struct nfs3_mknodargs *args = data; 1156 const struct nfs3_mknodargs *args = data;
1135 1157
1136 encode_diropargs3(xdr, args->fh, args->name, args->len); 1158 encode_diropargs3(xdr, args->fh, args->name, args->len);
1137 encode_mknoddata3(xdr, args); 1159 encode_mknoddata3(xdr, args, rpc_rqst_userns(req));
1138} 1160}
1139 1161
1140/* 1162/*
@@ -1379,7 +1401,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
1379 goto out; 1401 goto out;
1380 if (status != NFS3_OK) 1402 if (status != NFS3_OK)
1381 goto out_default; 1403 goto out_default;
1382 error = decode_fattr3(xdr, result); 1404 error = decode_fattr3(xdr, result, rpc_rqst_userns(req));
1383out: 1405out:
1384 return error; 1406 return error;
1385out_default: 1407out_default:
@@ -1414,7 +1436,7 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
1414 error = decode_nfsstat3(xdr, &status); 1436 error = decode_nfsstat3(xdr, &status);
1415 if (unlikely(error)) 1437 if (unlikely(error))
1416 goto out; 1438 goto out;
1417 error = decode_wcc_data(xdr, result); 1439 error = decode_wcc_data(xdr, result, rpc_rqst_userns(req));
1418 if (unlikely(error)) 1440 if (unlikely(error))
1419 goto out; 1441 goto out;
1420 if (status != NFS3_OK) 1442 if (status != NFS3_OK)
@@ -1449,6 +1471,7 @@ static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
1449 struct xdr_stream *xdr, 1471 struct xdr_stream *xdr,
1450 void *data) 1472 void *data)
1451{ 1473{
1474 struct user_namespace *userns = rpc_rqst_userns(req);
1452 struct nfs3_diropres *result = data; 1475 struct nfs3_diropres *result = data;
1453 enum nfs_stat status; 1476 enum nfs_stat status;
1454 int error; 1477 int error;
@@ -1461,14 +1484,14 @@ static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
1461 error = decode_nfs_fh3(xdr, result->fh); 1484 error = decode_nfs_fh3(xdr, result->fh);
1462 if (unlikely(error)) 1485 if (unlikely(error))
1463 goto out; 1486 goto out;
1464 error = decode_post_op_attr(xdr, result->fattr); 1487 error = decode_post_op_attr(xdr, result->fattr, userns);
1465 if (unlikely(error)) 1488 if (unlikely(error))
1466 goto out; 1489 goto out;
1467 error = decode_post_op_attr(xdr, result->dir_attr); 1490 error = decode_post_op_attr(xdr, result->dir_attr, userns);
1468out: 1491out:
1469 return error; 1492 return error;
1470out_default: 1493out_default:
1471 error = decode_post_op_attr(xdr, result->dir_attr); 1494 error = decode_post_op_attr(xdr, result->dir_attr, userns);
1472 if (unlikely(error)) 1495 if (unlikely(error))
1473 goto out; 1496 goto out;
1474 return nfs3_stat_to_errno(status); 1497 return nfs3_stat_to_errno(status);
@@ -1504,7 +1527,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
1504 error = decode_nfsstat3(xdr, &status); 1527 error = decode_nfsstat3(xdr, &status);
1505 if (unlikely(error)) 1528 if (unlikely(error))
1506 goto out; 1529 goto out;
1507 error = decode_post_op_attr(xdr, result->fattr); 1530 error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
1508 if (unlikely(error)) 1531 if (unlikely(error))
1509 goto out; 1532 goto out;
1510 if (status != NFS3_OK) 1533 if (status != NFS3_OK)
@@ -1545,7 +1568,7 @@ static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
1545 error = decode_nfsstat3(xdr, &status); 1568 error = decode_nfsstat3(xdr, &status);
1546 if (unlikely(error)) 1569 if (unlikely(error))
1547 goto out; 1570 goto out;
1548 error = decode_post_op_attr(xdr, result); 1571 error = decode_post_op_attr(xdr, result, rpc_rqst_userns(req));
1549 if (unlikely(error)) 1572 if (unlikely(error))
1550 goto out; 1573 goto out;
1551 if (status != NFS3_OK) 1574 if (status != NFS3_OK)
@@ -1623,7 +1646,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1623 error = decode_nfsstat3(xdr, &status); 1646 error = decode_nfsstat3(xdr, &status);
1624 if (unlikely(error)) 1647 if (unlikely(error))
1625 goto out; 1648 goto out;
1626 error = decode_post_op_attr(xdr, result->fattr); 1649 error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
1627 if (unlikely(error)) 1650 if (unlikely(error))
1628 goto out; 1651 goto out;
1629 result->op_status = status; 1652 result->op_status = status;
@@ -1694,7 +1717,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1694 error = decode_nfsstat3(xdr, &status); 1717 error = decode_nfsstat3(xdr, &status);
1695 if (unlikely(error)) 1718 if (unlikely(error))
1696 goto out; 1719 goto out;
1697 error = decode_wcc_data(xdr, result->fattr); 1720 error = decode_wcc_data(xdr, result->fattr, rpc_rqst_userns(req));
1698 if (unlikely(error)) 1721 if (unlikely(error))
1699 goto out; 1722 goto out;
1700 result->op_status = status; 1723 result->op_status = status;
@@ -1728,14 +1751,15 @@ out_status:
1728 * }; 1751 * };
1729 */ 1752 */
1730static int decode_create3resok(struct xdr_stream *xdr, 1753static int decode_create3resok(struct xdr_stream *xdr,
1731 struct nfs3_diropres *result) 1754 struct nfs3_diropres *result,
1755 struct user_namespace *userns)
1732{ 1756{
1733 int error; 1757 int error;
1734 1758
1735 error = decode_post_op_fh3(xdr, result->fh); 1759 error = decode_post_op_fh3(xdr, result->fh);
1736 if (unlikely(error)) 1760 if (unlikely(error))
1737 goto out; 1761 goto out;
1738 error = decode_post_op_attr(xdr, result->fattr); 1762 error = decode_post_op_attr(xdr, result->fattr, userns);
1739 if (unlikely(error)) 1763 if (unlikely(error))
1740 goto out; 1764 goto out;
1741 /* The server isn't required to return a file handle. 1765 /* The server isn't required to return a file handle.
@@ -1744,7 +1768,7 @@ static int decode_create3resok(struct xdr_stream *xdr,
1744 * values for the new object. */ 1768 * values for the new object. */
1745 if (result->fh->size == 0) 1769 if (result->fh->size == 0)
1746 result->fattr->valid = 0; 1770 result->fattr->valid = 0;
1747 error = decode_wcc_data(xdr, result->dir_attr); 1771 error = decode_wcc_data(xdr, result->dir_attr, userns);
1748out: 1772out:
1749 return error; 1773 return error;
1750} 1774}
@@ -1753,6 +1777,7 @@ static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
1753 struct xdr_stream *xdr, 1777 struct xdr_stream *xdr,
1754 void *data) 1778 void *data)
1755{ 1779{
1780 struct user_namespace *userns = rpc_rqst_userns(req);
1756 struct nfs3_diropres *result = data; 1781 struct nfs3_diropres *result = data;
1757 enum nfs_stat status; 1782 enum nfs_stat status;
1758 int error; 1783 int error;
@@ -1762,11 +1787,11 @@ static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
1762 goto out; 1787 goto out;
1763 if (status != NFS3_OK) 1788 if (status != NFS3_OK)
1764 goto out_default; 1789 goto out_default;
1765 error = decode_create3resok(xdr, result); 1790 error = decode_create3resok(xdr, result, userns);
1766out: 1791out:
1767 return error; 1792 return error;
1768out_default: 1793out_default:
1769 error = decode_wcc_data(xdr, result->dir_attr); 1794 error = decode_wcc_data(xdr, result->dir_attr, userns);
1770 if (unlikely(error)) 1795 if (unlikely(error))
1771 goto out; 1796 goto out;
1772 return nfs3_stat_to_errno(status); 1797 return nfs3_stat_to_errno(status);
@@ -1801,7 +1826,7 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
1801 error = decode_nfsstat3(xdr, &status); 1826 error = decode_nfsstat3(xdr, &status);
1802 if (unlikely(error)) 1827 if (unlikely(error))
1803 goto out; 1828 goto out;
1804 error = decode_wcc_data(xdr, result->dir_attr); 1829 error = decode_wcc_data(xdr, result->dir_attr, rpc_rqst_userns(req));
1805 if (unlikely(error)) 1830 if (unlikely(error))
1806 goto out; 1831 goto out;
1807 if (status != NFS3_OK) 1832 if (status != NFS3_OK)
@@ -1836,6 +1861,7 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
1836 struct xdr_stream *xdr, 1861 struct xdr_stream *xdr,
1837 void *data) 1862 void *data)
1838{ 1863{
1864 struct user_namespace *userns = rpc_rqst_userns(req);
1839 struct nfs_renameres *result = data; 1865 struct nfs_renameres *result = data;
1840 enum nfs_stat status; 1866 enum nfs_stat status;
1841 int error; 1867 int error;
@@ -1843,10 +1869,10 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
1843 error = decode_nfsstat3(xdr, &status); 1869 error = decode_nfsstat3(xdr, &status);
1844 if (unlikely(error)) 1870 if (unlikely(error))
1845 goto out; 1871 goto out;
1846 error = decode_wcc_data(xdr, result->old_fattr); 1872 error = decode_wcc_data(xdr, result->old_fattr, userns);
1847 if (unlikely(error)) 1873 if (unlikely(error))
1848 goto out; 1874 goto out;
1849 error = decode_wcc_data(xdr, result->new_fattr); 1875 error = decode_wcc_data(xdr, result->new_fattr, userns);
1850 if (unlikely(error)) 1876 if (unlikely(error))
1851 goto out; 1877 goto out;
1852 if (status != NFS3_OK) 1878 if (status != NFS3_OK)
@@ -1880,6 +1906,7 @@ out_status:
1880static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr, 1906static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1881 void *data) 1907 void *data)
1882{ 1908{
1909 struct user_namespace *userns = rpc_rqst_userns(req);
1883 struct nfs3_linkres *result = data; 1910 struct nfs3_linkres *result = data;
1884 enum nfs_stat status; 1911 enum nfs_stat status;
1885 int error; 1912 int error;
@@ -1887,10 +1914,10 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1887 error = decode_nfsstat3(xdr, &status); 1914 error = decode_nfsstat3(xdr, &status);
1888 if (unlikely(error)) 1915 if (unlikely(error))
1889 goto out; 1916 goto out;
1890 error = decode_post_op_attr(xdr, result->fattr); 1917 error = decode_post_op_attr(xdr, result->fattr, userns);
1891 if (unlikely(error)) 1918 if (unlikely(error))
1892 goto out; 1919 goto out;
1893 error = decode_wcc_data(xdr, result->dir_attr); 1920 error = decode_wcc_data(xdr, result->dir_attr, userns);
1894 if (unlikely(error)) 1921 if (unlikely(error))
1895 goto out; 1922 goto out;
1896 if (status != NFS3_OK) 1923 if (status != NFS3_OK)
@@ -1939,6 +1966,7 @@ out_status:
1939int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, 1966int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
1940 bool plus) 1967 bool plus)
1941{ 1968{
1969 struct user_namespace *userns = rpc_userns(entry->server->client);
1942 struct nfs_entry old = *entry; 1970 struct nfs_entry old = *entry;
1943 __be32 *p; 1971 __be32 *p;
1944 int error; 1972 int error;
@@ -1973,7 +2001,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
1973 2001
1974 if (plus) { 2002 if (plus) {
1975 entry->fattr->valid = 0; 2003 entry->fattr->valid = 0;
1976 error = decode_post_op_attr(xdr, entry->fattr); 2004 error = decode_post_op_attr(xdr, entry->fattr, userns);
1977 if (unlikely(error)) 2005 if (unlikely(error))
1978 return error; 2006 return error;
1979 if (entry->fattr->valid & NFS_ATTR_FATTR_V3) 2007 if (entry->fattr->valid & NFS_ATTR_FATTR_V3)
@@ -2045,11 +2073,12 @@ static int decode_dirlist3(struct xdr_stream *xdr)
2045} 2073}
2046 2074
2047static int decode_readdir3resok(struct xdr_stream *xdr, 2075static int decode_readdir3resok(struct xdr_stream *xdr,
2048 struct nfs3_readdirres *result) 2076 struct nfs3_readdirres *result,
2077 struct user_namespace *userns)
2049{ 2078{
2050 int error; 2079 int error;
2051 2080
2052 error = decode_post_op_attr(xdr, result->dir_attr); 2081 error = decode_post_op_attr(xdr, result->dir_attr, userns);
2053 if (unlikely(error)) 2082 if (unlikely(error))
2054 goto out; 2083 goto out;
2055 /* XXX: do we need to check if result->verf != NULL ? */ 2084 /* XXX: do we need to check if result->verf != NULL ? */
@@ -2074,11 +2103,11 @@ static int nfs3_xdr_dec_readdir3res(struct rpc_rqst *req,
2074 goto out; 2103 goto out;
2075 if (status != NFS3_OK) 2104 if (status != NFS3_OK)
2076 goto out_default; 2105 goto out_default;
2077 error = decode_readdir3resok(xdr, result); 2106 error = decode_readdir3resok(xdr, result, rpc_rqst_userns(req));
2078out: 2107out:
2079 return error; 2108 return error;
2080out_default: 2109out_default:
2081 error = decode_post_op_attr(xdr, result->dir_attr); 2110 error = decode_post_op_attr(xdr, result->dir_attr, rpc_rqst_userns(req));
2082 if (unlikely(error)) 2111 if (unlikely(error))
2083 goto out; 2112 goto out;
2084 return nfs3_stat_to_errno(status); 2113 return nfs3_stat_to_errno(status);
@@ -2138,7 +2167,7 @@ static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
2138 error = decode_nfsstat3(xdr, &status); 2167 error = decode_nfsstat3(xdr, &status);
2139 if (unlikely(error)) 2168 if (unlikely(error))
2140 goto out; 2169 goto out;
2141 error = decode_post_op_attr(xdr, result->fattr); 2170 error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
2142 if (unlikely(error)) 2171 if (unlikely(error))
2143 goto out; 2172 goto out;
2144 if (status != NFS3_OK) 2173 if (status != NFS3_OK)
@@ -2212,7 +2241,7 @@ static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
2212 error = decode_nfsstat3(xdr, &status); 2241 error = decode_nfsstat3(xdr, &status);
2213 if (unlikely(error)) 2242 if (unlikely(error))
2214 goto out; 2243 goto out;
2215 error = decode_post_op_attr(xdr, result->fattr); 2244 error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
2216 if (unlikely(error)) 2245 if (unlikely(error))
2217 goto out; 2246 goto out;
2218 if (status != NFS3_OK) 2247 if (status != NFS3_OK)
@@ -2273,7 +2302,7 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
2273 error = decode_nfsstat3(xdr, &status); 2302 error = decode_nfsstat3(xdr, &status);
2274 if (unlikely(error)) 2303 if (unlikely(error))
2275 goto out; 2304 goto out;
2276 error = decode_post_op_attr(xdr, result->fattr); 2305 error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
2277 if (unlikely(error)) 2306 if (unlikely(error))
2278 goto out; 2307 goto out;
2279 if (status != NFS3_OK) 2308 if (status != NFS3_OK)
@@ -2315,7 +2344,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
2315 error = decode_nfsstat3(xdr, &status); 2344 error = decode_nfsstat3(xdr, &status);
2316 if (unlikely(error)) 2345 if (unlikely(error))
2317 goto out; 2346 goto out;
2318 error = decode_wcc_data(xdr, result->fattr); 2347 error = decode_wcc_data(xdr, result->fattr, rpc_rqst_userns(req));
2319 if (unlikely(error)) 2348 if (unlikely(error))
2320 goto out; 2349 goto out;
2321 result->op_status = status; 2350 result->op_status = status;
@@ -2331,14 +2360,15 @@ out_status:
2331#ifdef CONFIG_NFS_V3_ACL 2360#ifdef CONFIG_NFS_V3_ACL
2332 2361
2333static inline int decode_getacl3resok(struct xdr_stream *xdr, 2362static inline int decode_getacl3resok(struct xdr_stream *xdr,
2334 struct nfs3_getaclres *result) 2363 struct nfs3_getaclres *result,
2364 struct user_namespace *userns)
2335{ 2365{
2336 struct posix_acl **acl; 2366 struct posix_acl **acl;
2337 unsigned int *aclcnt; 2367 unsigned int *aclcnt;
2338 size_t hdrlen; 2368 size_t hdrlen;
2339 int error; 2369 int error;
2340 2370
2341 error = decode_post_op_attr(xdr, result->fattr); 2371 error = decode_post_op_attr(xdr, result->fattr, userns);
2342 if (unlikely(error)) 2372 if (unlikely(error))
2343 goto out; 2373 goto out;
2344 error = decode_uint32(xdr, &result->mask); 2374 error = decode_uint32(xdr, &result->mask);
@@ -2386,7 +2416,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
2386 goto out; 2416 goto out;
2387 if (status != NFS3_OK) 2417 if (status != NFS3_OK)
2388 goto out_default; 2418 goto out_default;
2389 error = decode_getacl3resok(xdr, result); 2419 error = decode_getacl3resok(xdr, result, rpc_rqst_userns(req));
2390out: 2420out:
2391 return error; 2421 return error;
2392out_default: 2422out_default:
@@ -2405,7 +2435,7 @@ static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
2405 goto out; 2435 goto out;
2406 if (status != NFS3_OK) 2436 if (status != NFS3_OK)
2407 goto out_default; 2437 goto out_default;
2408 error = decode_post_op_attr(xdr, result); 2438 error = decode_post_op_attr(xdr, result, rpc_rqst_userns(req));
2409out: 2439out:
2410 return error; 2440 return error;
2411out_default: 2441out_default:
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 06ac3d9ac7c6..8a38a254f516 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -206,6 +206,7 @@ struct nfs4_exception {
206 unsigned char delay : 1, 206 unsigned char delay : 1,
207 recovering : 1, 207 recovering : 1,
208 retry : 1; 208 retry : 1;
209 bool interruptible;
209}; 210};
210 211
211struct nfs4_state_recovery_ops { 212struct nfs4_state_recovery_ops {
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 1339ede979af..3ce246346f02 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -870,6 +870,7 @@ static int nfs4_set_client(struct nfs_server *server,
870 .minorversion = minorversion, 870 .minorversion = minorversion,
871 .net = net, 871 .net = net,
872 .timeparms = timeparms, 872 .timeparms = timeparms,
873 .cred = server->cred,
873 }; 874 };
874 struct nfs_client *clp; 875 struct nfs_client *clp;
875 876
@@ -931,6 +932,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
931 .minorversion = minor_version, 932 .minorversion = minor_version,
932 .net = mds_clp->cl_net, 933 .net = mds_clp->cl_net,
933 .timeparms = &ds_timeout, 934 .timeparms = &ds_timeout,
935 .cred = mds_srv->cred,
934 }; 936 };
935 char buf[INET6_ADDRSTRLEN + 1]; 937 char buf[INET6_ADDRSTRLEN + 1];
936 938
@@ -1107,6 +1109,8 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
1107 if (!server) 1109 if (!server)
1108 return ERR_PTR(-ENOMEM); 1110 return ERR_PTR(-ENOMEM);
1109 1111
1112 server->cred = get_cred(current_cred());
1113
1110 auth_probe = mount_info->parsed->auth_info.flavor_len < 1; 1114 auth_probe = mount_info->parsed->auth_info.flavor_len < 1;
1111 1115
1112 /* set up the general RPC client */ 1116 /* set up the general RPC client */
@@ -1143,6 +1147,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1143 parent_server = NFS_SB(data->sb); 1147 parent_server = NFS_SB(data->sb);
1144 parent_client = parent_server->nfs_client; 1148 parent_client = parent_server->nfs_client;
1145 1149
1150 server->cred = get_cred(parent_server->cred);
1151
1146 /* Initialise the client representation from the parent server */ 1152 /* Initialise the client representation from the parent server */
1147 nfs_server_copy_userdata(server, parent_server); 1153 nfs_server_copy_userdata(server, parent_server);
1148 1154
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 00d17198ee12..cf42a8b939e3 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -125,7 +125,7 @@ nfs4_file_flush(struct file *file, fl_owner_t id)
125 return filemap_fdatawrite(file->f_mapping); 125 return filemap_fdatawrite(file->f_mapping);
126 126
127 /* Flush writes to the server and return any errors */ 127 /* Flush writes to the server and return any errors */
128 return vfs_fsync(file, 0); 128 return nfs_wb_all(inode);
129} 129}
130 130
131#ifdef CONFIG_NFS_V4_2 131#ifdef CONFIG_NFS_V4_2
@@ -187,7 +187,7 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off,
187 bool same_inode = false; 187 bool same_inode = false;
188 int ret; 188 int ret;
189 189
190 if (remap_flags & ~REMAP_FILE_ADVISORY) 190 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
191 return -EINVAL; 191 return -EINVAL;
192 192
193 /* check alignment w.r.t. clone_blksize */ 193 /* check alignment w.r.t. clone_blksize */
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index bf34ddaa2ad7..4884fdae28fb 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -69,8 +69,16 @@ struct idmap {
69 struct rpc_pipe *idmap_pipe; 69 struct rpc_pipe *idmap_pipe;
70 struct idmap_legacy_upcalldata *idmap_upcall_data; 70 struct idmap_legacy_upcalldata *idmap_upcall_data;
71 struct mutex idmap_mutex; 71 struct mutex idmap_mutex;
72 const struct cred *cred;
72}; 73};
73 74
75static struct user_namespace *idmap_userns(const struct idmap *idmap)
76{
77 if (idmap && idmap->cred)
78 return idmap->cred->user_ns;
79 return &init_user_ns;
80}
81
74/** 82/**
75 * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields 83 * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
76 * @fattr: fully initialised struct nfs_fattr 84 * @fattr: fully initialised struct nfs_fattr
@@ -271,14 +279,15 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
271 const char *type, struct idmap *idmap) 279 const char *type, struct idmap *idmap)
272{ 280{
273 char *desc; 281 char *desc;
274 struct key *rkey; 282 struct key *rkey = ERR_PTR(-EAGAIN);
275 ssize_t ret; 283 ssize_t ret;
276 284
277 ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc); 285 ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc);
278 if (ret < 0) 286 if (ret < 0)
279 return ERR_PTR(ret); 287 return ERR_PTR(ret);
280 288
281 rkey = request_key(&key_type_id_resolver, desc, ""); 289 if (!idmap->cred || idmap->cred->user_ns == &init_user_ns)
290 rkey = request_key(&key_type_id_resolver, desc, "");
282 if (IS_ERR(rkey)) { 291 if (IS_ERR(rkey)) {
283 mutex_lock(&idmap->idmap_mutex); 292 mutex_lock(&idmap->idmap_mutex);
284 rkey = request_key_with_auxdata(&key_type_id_resolver_legacy, 293 rkey = request_key_with_auxdata(&key_type_id_resolver_legacy,
@@ -452,6 +461,9 @@ nfs_idmap_new(struct nfs_client *clp)
452 if (idmap == NULL) 461 if (idmap == NULL)
453 return -ENOMEM; 462 return -ENOMEM;
454 463
464 mutex_init(&idmap->idmap_mutex);
465 idmap->cred = get_cred(clp->cl_rpcclient->cl_cred);
466
455 rpc_init_pipe_dir_object(&idmap->idmap_pdo, 467 rpc_init_pipe_dir_object(&idmap->idmap_pdo,
456 &nfs_idmap_pipe_dir_object_ops, 468 &nfs_idmap_pipe_dir_object_ops,
457 idmap); 469 idmap);
@@ -462,7 +474,6 @@ nfs_idmap_new(struct nfs_client *clp)
462 goto err; 474 goto err;
463 } 475 }
464 idmap->idmap_pipe = pipe; 476 idmap->idmap_pipe = pipe;
465 mutex_init(&idmap->idmap_mutex);
466 477
467 error = rpc_add_pipe_dir_object(clp->cl_net, 478 error = rpc_add_pipe_dir_object(clp->cl_net,
468 &clp->cl_rpcclient->cl_pipedir_objects, 479 &clp->cl_rpcclient->cl_pipedir_objects,
@@ -475,6 +486,7 @@ nfs_idmap_new(struct nfs_client *clp)
475err_destroy_pipe: 486err_destroy_pipe:
476 rpc_destroy_pipe_data(idmap->idmap_pipe); 487 rpc_destroy_pipe_data(idmap->idmap_pipe);
477err: 488err:
489 put_cred(idmap->cred);
478 kfree(idmap); 490 kfree(idmap);
479 return error; 491 return error;
480} 492}
@@ -491,6 +503,7 @@ nfs_idmap_delete(struct nfs_client *clp)
491 &clp->cl_rpcclient->cl_pipedir_objects, 503 &clp->cl_rpcclient->cl_pipedir_objects,
492 &idmap->idmap_pdo); 504 &idmap->idmap_pdo);
493 rpc_destroy_pipe_data(idmap->idmap_pipe); 505 rpc_destroy_pipe_data(idmap->idmap_pipe);
506 put_cred(idmap->cred);
494 kfree(idmap); 507 kfree(idmap);
495} 508}
496 509
@@ -735,7 +748,7 @@ int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_
735 if (!nfs_map_string_to_numeric(name, namelen, &id)) 748 if (!nfs_map_string_to_numeric(name, namelen, &id))
736 ret = nfs_idmap_lookup_id(name, namelen, "uid", &id, idmap); 749 ret = nfs_idmap_lookup_id(name, namelen, "uid", &id, idmap);
737 if (ret == 0) { 750 if (ret == 0) {
738 *uid = make_kuid(&init_user_ns, id); 751 *uid = make_kuid(idmap_userns(idmap), id);
739 if (!uid_valid(*uid)) 752 if (!uid_valid(*uid))
740 ret = -ERANGE; 753 ret = -ERANGE;
741 } 754 }
@@ -752,7 +765,7 @@ int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size
752 if (!nfs_map_string_to_numeric(name, namelen, &id)) 765 if (!nfs_map_string_to_numeric(name, namelen, &id))
753 ret = nfs_idmap_lookup_id(name, namelen, "gid", &id, idmap); 766 ret = nfs_idmap_lookup_id(name, namelen, "gid", &id, idmap);
754 if (ret == 0) { 767 if (ret == 0) {
755 *gid = make_kgid(&init_user_ns, id); 768 *gid = make_kgid(idmap_userns(idmap), id);
756 if (!gid_valid(*gid)) 769 if (!gid_valid(*gid))
757 ret = -ERANGE; 770 ret = -ERANGE;
758 } 771 }
@@ -766,7 +779,7 @@ int nfs_map_uid_to_name(const struct nfs_server *server, kuid_t uid, char *buf,
766 int ret = -EINVAL; 779 int ret = -EINVAL;
767 __u32 id; 780 __u32 id;
768 781
769 id = from_kuid(&init_user_ns, uid); 782 id = from_kuid_munged(idmap_userns(idmap), uid);
770 if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) 783 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
771 ret = nfs_idmap_lookup_name(id, "user", buf, buflen, idmap); 784 ret = nfs_idmap_lookup_name(id, "user", buf, buflen, idmap);
772 if (ret < 0) 785 if (ret < 0)
@@ -780,7 +793,7 @@ int nfs_map_gid_to_group(const struct nfs_server *server, kgid_t gid, char *buf,
780 int ret = -EINVAL; 793 int ret = -EINVAL;
781 __u32 id; 794 __u32 id;
782 795
783 id = from_kgid(&init_user_ns, gid); 796 id = from_kgid_munged(idmap_userns(idmap), gid);
784 if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) 797 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
785 ret = nfs_idmap_lookup_name(id, "group", buf, buflen, idmap); 798 ret = nfs_idmap_lookup_name(id, "group", buf, buflen, idmap);
786 if (ret < 0) 799 if (ret < 0)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 741ff8c9c6ed..c29cbef6b53f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -400,17 +400,32 @@ static long nfs4_update_delay(long *timeout)
400 return ret; 400 return ret;
401} 401}
402 402
403static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) 403static int nfs4_delay_killable(long *timeout)
404{ 404{
405 int res = 0;
406
407 might_sleep(); 405 might_sleep();
408 406
409 freezable_schedule_timeout_killable_unsafe( 407 freezable_schedule_timeout_killable_unsafe(
410 nfs4_update_delay(timeout)); 408 nfs4_update_delay(timeout));
411 if (fatal_signal_pending(current)) 409 if (!__fatal_signal_pending(current))
412 res = -ERESTARTSYS; 410 return 0;
413 return res; 411 return -EINTR;
412}
413
414static int nfs4_delay_interruptible(long *timeout)
415{
416 might_sleep();
417
418 freezable_schedule_timeout_interruptible(nfs4_update_delay(timeout));
419 if (!signal_pending(current))
420 return 0;
421 return __fatal_signal_pending(current) ? -EINTR :-ERESTARTSYS;
422}
423
424static int nfs4_delay(long *timeout, bool interruptible)
425{
426 if (interruptible)
427 return nfs4_delay_interruptible(timeout);
428 return nfs4_delay_killable(timeout);
414} 429}
415 430
416/* This is the error handling routine for processes that are allowed 431/* This is the error handling routine for processes that are allowed
@@ -546,7 +561,8 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_
546 561
547 ret = nfs4_do_handle_exception(server, errorcode, exception); 562 ret = nfs4_do_handle_exception(server, errorcode, exception);
548 if (exception->delay) { 563 if (exception->delay) {
549 ret = nfs4_delay(server->client, &exception->timeout); 564 ret = nfs4_delay(&exception->timeout,
565 exception->interruptible);
550 goto out_retry; 566 goto out_retry;
551 } 567 }
552 if (exception->recovering) { 568 if (exception->recovering) {
@@ -978,10 +994,8 @@ int nfs4_setup_sequence(struct nfs_client *client,
978 if (res->sr_slot != NULL) 994 if (res->sr_slot != NULL)
979 goto out_start; 995 goto out_start;
980 996
981 if (session) { 997 if (session)
982 tbl = &session->fc_slot_table; 998 tbl = &session->fc_slot_table;
983 task->tk_timeout = 0;
984 }
985 999
986 spin_lock(&tbl->slot_tbl_lock); 1000 spin_lock(&tbl->slot_tbl_lock);
987 /* The state manager will wait until the slot table is empty */ 1001 /* The state manager will wait until the slot table is empty */
@@ -990,9 +1004,8 @@ int nfs4_setup_sequence(struct nfs_client *client,
990 1004
991 slot = nfs4_alloc_slot(tbl); 1005 slot = nfs4_alloc_slot(tbl);
992 if (IS_ERR(slot)) { 1006 if (IS_ERR(slot)) {
993 /* Try again in 1/4 second */
994 if (slot == ERR_PTR(-ENOMEM)) 1007 if (slot == ERR_PTR(-ENOMEM))
995 task->tk_timeout = HZ >> 2; 1008 goto out_sleep_timeout;
996 goto out_sleep; 1009 goto out_sleep;
997 } 1010 }
998 spin_unlock(&tbl->slot_tbl_lock); 1011 spin_unlock(&tbl->slot_tbl_lock);
@@ -1004,11 +1017,20 @@ out_start:
1004 nfs41_sequence_res_init(res); 1017 nfs41_sequence_res_init(res);
1005 rpc_call_start(task); 1018 rpc_call_start(task);
1006 return 0; 1019 return 0;
1007 1020out_sleep_timeout:
1021 /* Try again in 1/4 second */
1022 if (args->sa_privileged)
1023 rpc_sleep_on_priority_timeout(&tbl->slot_tbl_waitq, task,
1024 jiffies + (HZ >> 2), RPC_PRIORITY_PRIVILEGED);
1025 else
1026 rpc_sleep_on_timeout(&tbl->slot_tbl_waitq, task,
1027 NULL, jiffies + (HZ >> 2));
1028 spin_unlock(&tbl->slot_tbl_lock);
1029 return -EAGAIN;
1008out_sleep: 1030out_sleep:
1009 if (args->sa_privileged) 1031 if (args->sa_privileged)
1010 rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task, 1032 rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task,
1011 NULL, RPC_PRIORITY_PRIVILEGED); 1033 RPC_PRIORITY_PRIVILEGED);
1012 else 1034 else
1013 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); 1035 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
1014 spin_unlock(&tbl->slot_tbl_lock); 1036 spin_unlock(&tbl->slot_tbl_lock);
@@ -3060,7 +3082,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
3060 int *opened) 3082 int *opened)
3061{ 3083{
3062 struct nfs_server *server = NFS_SERVER(dir); 3084 struct nfs_server *server = NFS_SERVER(dir);
3063 struct nfs4_exception exception = { }; 3085 struct nfs4_exception exception = {
3086 .interruptible = true,
3087 };
3064 struct nfs4_state *res; 3088 struct nfs4_state *res;
3065 struct nfs4_open_createattrs c = { 3089 struct nfs4_open_createattrs c = {
3066 .label = label, 3090 .label = label,
@@ -3673,7 +3697,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
3673 3697
3674int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) 3698int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
3675{ 3699{
3676 struct nfs4_exception exception = { }; 3700 struct nfs4_exception exception = {
3701 .interruptible = true,
3702 };
3677 int err; 3703 int err;
3678 do { 3704 do {
3679 err = nfs4_handle_exception(server, 3705 err = nfs4_handle_exception(server,
@@ -3715,7 +3741,9 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
3715static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, 3741static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
3716 struct nfs_fsinfo *info) 3742 struct nfs_fsinfo *info)
3717{ 3743{
3718 struct nfs4_exception exception = { }; 3744 struct nfs4_exception exception = {
3745 .interruptible = true,
3746 };
3719 int err; 3747 int err;
3720 do { 3748 do {
3721 err = _nfs4_lookup_root(server, fhandle, info); 3749 err = _nfs4_lookup_root(server, fhandle, info);
@@ -3942,7 +3970,9 @@ static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
3942 struct nfs_fattr *fattr, struct nfs4_label *label, 3970 struct nfs_fattr *fattr, struct nfs4_label *label,
3943 struct inode *inode) 3971 struct inode *inode)
3944{ 3972{
3945 struct nfs4_exception exception = { }; 3973 struct nfs4_exception exception = {
3974 .interruptible = true,
3975 };
3946 int err; 3976 int err;
3947 do { 3977 do {
3948 err = _nfs4_proc_getattr(server, fhandle, fattr, label, inode); 3978 err = _nfs4_proc_getattr(server, fhandle, fattr, label, inode);
@@ -4065,7 +4095,9 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
4065 const struct qstr *name, struct nfs_fh *fhandle, 4095 const struct qstr *name, struct nfs_fh *fhandle,
4066 struct nfs_fattr *fattr, struct nfs4_label *label) 4096 struct nfs_fattr *fattr, struct nfs4_label *label)
4067{ 4097{
4068 struct nfs4_exception exception = { }; 4098 struct nfs4_exception exception = {
4099 .interruptible = true,
4100 };
4069 struct rpc_clnt *client = *clnt; 4101 struct rpc_clnt *client = *clnt;
4070 int err; 4102 int err;
4071 do { 4103 do {
@@ -4169,7 +4201,9 @@ static int _nfs4_proc_lookupp(struct inode *inode,
4169static int nfs4_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle, 4201static int nfs4_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle,
4170 struct nfs_fattr *fattr, struct nfs4_label *label) 4202 struct nfs_fattr *fattr, struct nfs4_label *label)
4171{ 4203{
4172 struct nfs4_exception exception = { }; 4204 struct nfs4_exception exception = {
4205 .interruptible = true,
4206 };
4173 int err; 4207 int err;
4174 do { 4208 do {
4175 err = _nfs4_proc_lookupp(inode, fhandle, fattr, label); 4209 err = _nfs4_proc_lookupp(inode, fhandle, fattr, label);
@@ -4216,7 +4250,9 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
4216 4250
4217static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) 4251static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
4218{ 4252{
4219 struct nfs4_exception exception = { }; 4253 struct nfs4_exception exception = {
4254 .interruptible = true,
4255 };
4220 int err; 4256 int err;
4221 do { 4257 do {
4222 err = _nfs4_proc_access(inode, entry); 4258 err = _nfs4_proc_access(inode, entry);
@@ -4271,7 +4307,9 @@ static int _nfs4_proc_readlink(struct inode *inode, struct page *page,
4271static int nfs4_proc_readlink(struct inode *inode, struct page *page, 4307static int nfs4_proc_readlink(struct inode *inode, struct page *page,
4272 unsigned int pgbase, unsigned int pglen) 4308 unsigned int pgbase, unsigned int pglen)
4273{ 4309{
4274 struct nfs4_exception exception = { }; 4310 struct nfs4_exception exception = {
4311 .interruptible = true,
4312 };
4275 int err; 4313 int err;
4276 do { 4314 do {
4277 err = _nfs4_proc_readlink(inode, page, pgbase, pglen); 4315 err = _nfs4_proc_readlink(inode, page, pgbase, pglen);
@@ -4347,7 +4385,9 @@ _nfs4_proc_remove(struct inode *dir, const struct qstr *name, u32 ftype)
4347 4385
4348static int nfs4_proc_remove(struct inode *dir, struct dentry *dentry) 4386static int nfs4_proc_remove(struct inode *dir, struct dentry *dentry)
4349{ 4387{
4350 struct nfs4_exception exception = { }; 4388 struct nfs4_exception exception = {
4389 .interruptible = true,
4390 };
4351 struct inode *inode = d_inode(dentry); 4391 struct inode *inode = d_inode(dentry);
4352 int err; 4392 int err;
4353 4393
@@ -4368,7 +4408,9 @@ static int nfs4_proc_remove(struct inode *dir, struct dentry *dentry)
4368 4408
4369static int nfs4_proc_rmdir(struct inode *dir, const struct qstr *name) 4409static int nfs4_proc_rmdir(struct inode *dir, const struct qstr *name)
4370{ 4410{
4371 struct nfs4_exception exception = { }; 4411 struct nfs4_exception exception = {
4412 .interruptible = true,
4413 };
4372 int err; 4414 int err;
4373 4415
4374 do { 4416 do {
@@ -4527,7 +4569,9 @@ out:
4527 4569
4528static int nfs4_proc_link(struct inode *inode, struct inode *dir, const struct qstr *name) 4570static int nfs4_proc_link(struct inode *inode, struct inode *dir, const struct qstr *name)
4529{ 4571{
4530 struct nfs4_exception exception = { }; 4572 struct nfs4_exception exception = {
4573 .interruptible = true,
4574 };
4531 int err; 4575 int err;
4532 do { 4576 do {
4533 err = nfs4_handle_exception(NFS_SERVER(inode), 4577 err = nfs4_handle_exception(NFS_SERVER(inode),
@@ -4634,7 +4678,9 @@ out:
4634static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, 4678static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
4635 struct page *page, unsigned int len, struct iattr *sattr) 4679 struct page *page, unsigned int len, struct iattr *sattr)
4636{ 4680{
4637 struct nfs4_exception exception = { }; 4681 struct nfs4_exception exception = {
4682 .interruptible = true,
4683 };
4638 struct nfs4_label l, *label = NULL; 4684 struct nfs4_label l, *label = NULL;
4639 int err; 4685 int err;
4640 4686
@@ -4673,7 +4719,9 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
4673 struct iattr *sattr) 4719 struct iattr *sattr)
4674{ 4720{
4675 struct nfs_server *server = NFS_SERVER(dir); 4721 struct nfs_server *server = NFS_SERVER(dir);
4676 struct nfs4_exception exception = { }; 4722 struct nfs4_exception exception = {
4723 .interruptible = true,
4724 };
4677 struct nfs4_label l, *label = NULL; 4725 struct nfs4_label l, *label = NULL;
4678 int err; 4726 int err;
4679 4727
@@ -4733,7 +4781,9 @@ static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
4733static int nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred, 4781static int nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
4734 u64 cookie, struct page **pages, unsigned int count, bool plus) 4782 u64 cookie, struct page **pages, unsigned int count, bool plus)
4735{ 4783{
4736 struct nfs4_exception exception = { }; 4784 struct nfs4_exception exception = {
4785 .interruptible = true,
4786 };
4737 int err; 4787 int err;
4738 do { 4788 do {
4739 err = _nfs4_proc_readdir(dentry, cred, cookie, 4789 err = _nfs4_proc_readdir(dentry, cred, cookie,
@@ -4784,7 +4834,9 @@ static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
4784 struct iattr *sattr, dev_t rdev) 4834 struct iattr *sattr, dev_t rdev)
4785{ 4835{
4786 struct nfs_server *server = NFS_SERVER(dir); 4836 struct nfs_server *server = NFS_SERVER(dir);
4787 struct nfs4_exception exception = { }; 4837 struct nfs4_exception exception = {
4838 .interruptible = true,
4839 };
4788 struct nfs4_label l, *label = NULL; 4840 struct nfs4_label l, *label = NULL;
4789 int err; 4841 int err;
4790 4842
@@ -4826,7 +4878,9 @@ static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
4826 4878
4827static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat) 4879static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat)
4828{ 4880{
4829 struct nfs4_exception exception = { }; 4881 struct nfs4_exception exception = {
4882 .interruptible = true,
4883 };
4830 int err; 4884 int err;
4831 do { 4885 do {
4832 err = nfs4_handle_exception(server, 4886 err = nfs4_handle_exception(server,
@@ -4857,7 +4911,9 @@ static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
4857 4911
4858static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) 4912static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
4859{ 4913{
4860 struct nfs4_exception exception = { }; 4914 struct nfs4_exception exception = {
4915 .interruptible = true,
4916 };
4861 unsigned long now = jiffies; 4917 unsigned long now = jiffies;
4862 int err; 4918 int err;
4863 4919
@@ -4919,7 +4975,9 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle
4919static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, 4975static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
4920 struct nfs_pathconf *pathconf) 4976 struct nfs_pathconf *pathconf)
4921{ 4977{
4922 struct nfs4_exception exception = { }; 4978 struct nfs4_exception exception = {
4979 .interruptible = true,
4980 };
4923 int err; 4981 int err;
4924 4982
4925 do { 4983 do {
@@ -5488,7 +5546,9 @@ out_free:
5488 5546
5489static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) 5547static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
5490{ 5548{
5491 struct nfs4_exception exception = { }; 5549 struct nfs4_exception exception = {
5550 .interruptible = true,
5551 };
5492 ssize_t ret; 5552 ssize_t ret;
5493 do { 5553 do {
5494 ret = __nfs4_get_acl_uncached(inode, buf, buflen); 5554 ret = __nfs4_get_acl_uncached(inode, buf, buflen);
@@ -5622,7 +5682,9 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf,
5622static int nfs4_get_security_label(struct inode *inode, void *buf, 5682static int nfs4_get_security_label(struct inode *inode, void *buf,
5623 size_t buflen) 5683 size_t buflen)
5624{ 5684{
5625 struct nfs4_exception exception = { }; 5685 struct nfs4_exception exception = {
5686 .interruptible = true,
5687 };
5626 int err; 5688 int err;
5627 5689
5628 if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) 5690 if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL))
@@ -6263,7 +6325,9 @@ out:
6263 6325
6264static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request) 6326static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request)
6265{ 6327{
6266 struct nfs4_exception exception = { }; 6328 struct nfs4_exception exception = {
6329 .interruptible = true,
6330 };
6267 int err; 6331 int err;
6268 6332
6269 do { 6333 do {
@@ -6827,6 +6891,7 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *
6827 struct nfs4_exception exception = { 6891 struct nfs4_exception exception = {
6828 .state = state, 6892 .state = state,
6829 .inode = state->inode, 6893 .inode = state->inode,
6894 .interruptible = true,
6830 }; 6895 };
6831 int err; 6896 int err;
6832 6897
@@ -7240,7 +7305,9 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
7240 struct nfs4_fs_locations *fs_locations, 7305 struct nfs4_fs_locations *fs_locations,
7241 struct page *page) 7306 struct page *page)
7242{ 7307{
7243 struct nfs4_exception exception = { }; 7308 struct nfs4_exception exception = {
7309 .interruptible = true,
7310 };
7244 int err; 7311 int err;
7245 do { 7312 do {
7246 err = _nfs4_proc_fs_locations(client, dir, name, 7313 err = _nfs4_proc_fs_locations(client, dir, name,
@@ -7383,7 +7450,9 @@ int nfs4_proc_get_locations(struct inode *inode,
7383 struct nfs_client *clp = server->nfs_client; 7450 struct nfs_client *clp = server->nfs_client;
7384 const struct nfs4_mig_recovery_ops *ops = 7451 const struct nfs4_mig_recovery_ops *ops =
7385 clp->cl_mvops->mig_recovery_ops; 7452 clp->cl_mvops->mig_recovery_ops;
7386 struct nfs4_exception exception = { }; 7453 struct nfs4_exception exception = {
7454 .interruptible = true,
7455 };
7387 int status; 7456 int status;
7388 7457
7389 dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__, 7458 dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__,
@@ -7507,7 +7576,9 @@ int nfs4_proc_fsid_present(struct inode *inode, const struct cred *cred)
7507 struct nfs_client *clp = server->nfs_client; 7576 struct nfs_client *clp = server->nfs_client;
7508 const struct nfs4_mig_recovery_ops *ops = 7577 const struct nfs4_mig_recovery_ops *ops =
7509 clp->cl_mvops->mig_recovery_ops; 7578 clp->cl_mvops->mig_recovery_ops;
7510 struct nfs4_exception exception = { }; 7579 struct nfs4_exception exception = {
7580 .interruptible = true,
7581 };
7511 int status; 7582 int status;
7512 7583
7513 dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__, 7584 dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__,
@@ -7573,7 +7644,9 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct
7573int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, 7644int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name,
7574 struct nfs4_secinfo_flavors *flavors) 7645 struct nfs4_secinfo_flavors *flavors)
7575{ 7646{
7576 struct nfs4_exception exception = { }; 7647 struct nfs4_exception exception = {
7648 .interruptible = true,
7649 };
7577 int err; 7650 int err;
7578 do { 7651 do {
7579 err = -NFS4ERR_WRONGSEC; 7652 err = -NFS4ERR_WRONGSEC;
@@ -9263,7 +9336,9 @@ static int
9263nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, 9336nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
9264 struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) 9337 struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
9265{ 9338{
9266 struct nfs4_exception exception = { }; 9339 struct nfs4_exception exception = {
9340 .interruptible = true,
9341 };
9267 int err; 9342 int err;
9268 do { 9343 do {
9269 /* first try using integrity protection */ 9344 /* first try using integrity protection */
@@ -9430,7 +9505,9 @@ static int nfs41_test_stateid(struct nfs_server *server,
9430 nfs4_stateid *stateid, 9505 nfs4_stateid *stateid,
9431 const struct cred *cred) 9506 const struct cred *cred)
9432{ 9507{
9433 struct nfs4_exception exception = { }; 9508 struct nfs4_exception exception = {
9509 .interruptible = true,
9510 };
9434 int err; 9511 int err;
9435 do { 9512 do {
9436 err = _nfs41_test_stateid(server, stateid, cred); 9513 err = _nfs41_test_stateid(server, stateid, cred);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 3de36479ed7a..e2e3c4f04d3e 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -159,6 +159,10 @@ int nfs40_discover_server_trunking(struct nfs_client *clp,
159 /* Sustain the lease, even if it's empty. If the clientid4 159 /* Sustain the lease, even if it's empty. If the clientid4
160 * goes stale it's of no use for trunking discovery. */ 160 * goes stale it's of no use for trunking discovery. */
161 nfs4_schedule_state_renewal(*result); 161 nfs4_schedule_state_renewal(*result);
162
163 /* If the client state need to recover, do it. */
164 if (clp->cl_state)
165 nfs4_schedule_state_manager(clp);
162 } 166 }
163out: 167out:
164 return status; 168 return status;
@@ -2346,8 +2350,7 @@ static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp)
2346{ 2350{
2347 /* FIXME: For now, we destroy all layouts. */ 2351 /* FIXME: For now, we destroy all layouts. */
2348 pnfs_destroy_all_layouts(clp); 2352 pnfs_destroy_all_layouts(clp);
2349 /* FIXME: For now, we test all delegations+open state+locks. */ 2353 nfs_test_expired_all_delegations(clp);
2350 nfs41_handle_some_state_revoked(clp);
2351 dprintk("%s: Recallable state revoked on server %s!\n", __func__, 2354 dprintk("%s: Recallable state revoked on server %s!\n", __func__,
2352 clp->cl_hostname); 2355 clp->cl_hostname);
2353} 2356}
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index e9f39fa5964b..6ec30014a439 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -16,8 +16,8 @@
16#include <linux/nfs.h> 16#include <linux/nfs.h>
17#include <linux/nfs3.h> 17#include <linux/nfs3.h>
18#include <linux/nfs4.h> 18#include <linux/nfs4.h>
19#include <linux/nfs_page.h>
20#include <linux/nfs_fs.h> 19#include <linux/nfs_fs.h>
20#include <linux/nfs_page.h>
21#include <linux/nfs_mount.h> 21#include <linux/nfs_mount.h>
22#include <linux/export.h> 22#include <linux/export.h>
23 23
@@ -47,7 +47,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
47 47
48 hdr->req = nfs_list_entry(mirror->pg_list.next); 48 hdr->req = nfs_list_entry(mirror->pg_list.next);
49 hdr->inode = desc->pg_inode; 49 hdr->inode = desc->pg_inode;
50 hdr->cred = hdr->req->wb_context->cred; 50 hdr->cred = nfs_req_openctx(hdr->req)->cred;
51 hdr->io_start = req_offset(hdr->req); 51 hdr->io_start = req_offset(hdr->req);
52 hdr->good_bytes = mirror->pg_count; 52 hdr->good_bytes = mirror->pg_count;
53 hdr->io_completion = desc->pg_io_completion; 53 hdr->io_completion = desc->pg_io_completion;
@@ -295,25 +295,13 @@ out:
295 nfs_release_request(head); 295 nfs_release_request(head);
296} 296}
297 297
298/** 298static struct nfs_page *
299 * nfs_create_request - Create an NFS read/write request. 299__nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page,
300 * @ctx: open context to use 300 unsigned int pgbase, unsigned int offset,
301 * @page: page to write
302 * @last: last nfs request created for this page group or NULL if head
303 * @offset: starting offset within the page for the write
304 * @count: number of bytes to read/write
305 *
306 * The page must be locked by the caller. This makes sure we never
307 * create two different requests for the same page.
308 * User should ensure it is safe to sleep in this function.
309 */
310struct nfs_page *
311nfs_create_request(struct nfs_open_context *ctx, struct page *page,
312 struct nfs_page *last, unsigned int offset,
313 unsigned int count) 301 unsigned int count)
314{ 302{
315 struct nfs_page *req; 303 struct nfs_page *req;
316 struct nfs_lock_context *l_ctx; 304 struct nfs_open_context *ctx = l_ctx->open_context;
317 305
318 if (test_bit(NFS_CONTEXT_BAD, &ctx->flags)) 306 if (test_bit(NFS_CONTEXT_BAD, &ctx->flags))
319 return ERR_PTR(-EBADF); 307 return ERR_PTR(-EBADF);
@@ -322,13 +310,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
322 if (req == NULL) 310 if (req == NULL)
323 return ERR_PTR(-ENOMEM); 311 return ERR_PTR(-ENOMEM);
324 312
325 /* get lock context early so we can deal with alloc failures */
326 l_ctx = nfs_get_lock_context(ctx);
327 if (IS_ERR(l_ctx)) {
328 nfs_page_free(req);
329 return ERR_CAST(l_ctx);
330 }
331 req->wb_lock_context = l_ctx; 313 req->wb_lock_context = l_ctx;
314 refcount_inc(&l_ctx->count);
332 atomic_inc(&l_ctx->io_count); 315 atomic_inc(&l_ctx->io_count);
333 316
334 /* Initialize the request struct. Initially, we assume a 317 /* Initialize the request struct. Initially, we assume a
@@ -340,15 +323,59 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
340 get_page(page); 323 get_page(page);
341 } 324 }
342 req->wb_offset = offset; 325 req->wb_offset = offset;
343 req->wb_pgbase = offset; 326 req->wb_pgbase = pgbase;
344 req->wb_bytes = count; 327 req->wb_bytes = count;
345 req->wb_context = get_nfs_open_context(ctx);
346 kref_init(&req->wb_kref); 328 kref_init(&req->wb_kref);
347 nfs_page_group_init(req, last); 329 req->wb_nio = 0;
348 return req; 330 return req;
349} 331}
350 332
351/** 333/**
334 * nfs_create_request - Create an NFS read/write request.
335 * @ctx: open context to use
336 * @page: page to write
337 * @offset: starting offset within the page for the write
338 * @count: number of bytes to read/write
339 *
340 * The page must be locked by the caller. This makes sure we never
341 * create two different requests for the same page.
342 * User should ensure it is safe to sleep in this function.
343 */
344struct nfs_page *
345nfs_create_request(struct nfs_open_context *ctx, struct page *page,
346 unsigned int offset, unsigned int count)
347{
348 struct nfs_lock_context *l_ctx = nfs_get_lock_context(ctx);
349 struct nfs_page *ret;
350
351 if (IS_ERR(l_ctx))
352 return ERR_CAST(l_ctx);
353 ret = __nfs_create_request(l_ctx, page, offset, offset, count);
354 if (!IS_ERR(ret))
355 nfs_page_group_init(ret, NULL);
356 nfs_put_lock_context(l_ctx);
357 return ret;
358}
359
360static struct nfs_page *
361nfs_create_subreq(struct nfs_page *req, struct nfs_page *last,
362 unsigned int pgbase, unsigned int offset,
363 unsigned int count)
364{
365 struct nfs_page *ret;
366
367 ret = __nfs_create_request(req->wb_lock_context, req->wb_page,
368 pgbase, offset, count);
369 if (!IS_ERR(ret)) {
370 nfs_lock_request(ret);
371 ret->wb_index = req->wb_index;
372 nfs_page_group_init(ret, last);
373 ret->wb_nio = req->wb_nio;
374 }
375 return ret;
376}
377
378/**
352 * nfs_unlock_request - Unlock request and wake up sleepers. 379 * nfs_unlock_request - Unlock request and wake up sleepers.
353 * @req: pointer to request 380 * @req: pointer to request
354 */ 381 */
@@ -386,8 +413,8 @@ void nfs_unlock_and_release_request(struct nfs_page *req)
386static void nfs_clear_request(struct nfs_page *req) 413static void nfs_clear_request(struct nfs_page *req)
387{ 414{
388 struct page *page = req->wb_page; 415 struct page *page = req->wb_page;
389 struct nfs_open_context *ctx = req->wb_context;
390 struct nfs_lock_context *l_ctx = req->wb_lock_context; 416 struct nfs_lock_context *l_ctx = req->wb_lock_context;
417 struct nfs_open_context *ctx;
391 418
392 if (page != NULL) { 419 if (page != NULL) {
393 put_page(page); 420 put_page(page);
@@ -396,16 +423,13 @@ static void nfs_clear_request(struct nfs_page *req)
396 if (l_ctx != NULL) { 423 if (l_ctx != NULL) {
397 if (atomic_dec_and_test(&l_ctx->io_count)) { 424 if (atomic_dec_and_test(&l_ctx->io_count)) {
398 wake_up_var(&l_ctx->io_count); 425 wake_up_var(&l_ctx->io_count);
426 ctx = l_ctx->open_context;
399 if (test_bit(NFS_CONTEXT_UNLOCK, &ctx->flags)) 427 if (test_bit(NFS_CONTEXT_UNLOCK, &ctx->flags))
400 rpc_wake_up(&NFS_SERVER(d_inode(ctx->dentry))->uoc_rpcwaitq); 428 rpc_wake_up(&NFS_SERVER(d_inode(ctx->dentry))->uoc_rpcwaitq);
401 } 429 }
402 nfs_put_lock_context(l_ctx); 430 nfs_put_lock_context(l_ctx);
403 req->wb_lock_context = NULL; 431 req->wb_lock_context = NULL;
404 } 432 }
405 if (ctx != NULL) {
406 put_nfs_open_context(ctx);
407 req->wb_context = NULL;
408 }
409} 433}
410 434
411/** 435/**
@@ -550,7 +574,7 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
550 hdr->args.pgbase = req->wb_pgbase; 574 hdr->args.pgbase = req->wb_pgbase;
551 hdr->args.pages = hdr->page_array.pagevec; 575 hdr->args.pages = hdr->page_array.pagevec;
552 hdr->args.count = count; 576 hdr->args.count = count;
553 hdr->args.context = get_nfs_open_context(req->wb_context); 577 hdr->args.context = get_nfs_open_context(nfs_req_openctx(req));
554 hdr->args.lock_context = req->wb_lock_context; 578 hdr->args.lock_context = req->wb_lock_context;
555 hdr->args.stable = NFS_UNSTABLE; 579 hdr->args.stable = NFS_UNSTABLE;
556 switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { 580 switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
@@ -698,6 +722,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
698 desc->pg_mirrors_dynamic = NULL; 722 desc->pg_mirrors_dynamic = NULL;
699 desc->pg_mirrors = desc->pg_mirrors_static; 723 desc->pg_mirrors = desc->pg_mirrors_static;
700 nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize); 724 nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize);
725 desc->pg_maxretrans = 0;
701} 726}
702 727
703/** 728/**
@@ -906,9 +931,9 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
906 struct file_lock_context *flctx; 931 struct file_lock_context *flctx;
907 932
908 if (prev) { 933 if (prev) {
909 if (!nfs_match_open_context(req->wb_context, prev->wb_context)) 934 if (!nfs_match_open_context(nfs_req_openctx(req), nfs_req_openctx(prev)))
910 return false; 935 return false;
911 flctx = d_inode(req->wb_context->dentry)->i_flctx; 936 flctx = d_inode(nfs_req_openctx(req)->dentry)->i_flctx;
912 if (flctx != NULL && 937 if (flctx != NULL &&
913 !(list_empty_careful(&flctx->flc_posix) && 938 !(list_empty_careful(&flctx->flc_posix) &&
914 list_empty_careful(&flctx->flc_flock)) && 939 list_empty_careful(&flctx->flc_flock)) &&
@@ -957,6 +982,15 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
957 return 0; 982 return 0;
958 mirror->pg_base = req->wb_pgbase; 983 mirror->pg_base = req->wb_pgbase;
959 } 984 }
985
986 if (desc->pg_maxretrans && req->wb_nio > desc->pg_maxretrans) {
987 if (NFS_SERVER(desc->pg_inode)->flags & NFS_MOUNT_SOFTERR)
988 desc->pg_error = -ETIMEDOUT;
989 else
990 desc->pg_error = -EIO;
991 return 0;
992 }
993
960 if (!nfs_can_coalesce_requests(prev, req, desc)) 994 if (!nfs_can_coalesce_requests(prev, req, desc))
961 return 0; 995 return 0;
962 nfs_list_move_request(req, &mirror->pg_list); 996 nfs_list_move_request(req, &mirror->pg_list);
@@ -1049,14 +1083,10 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
1049 pgbase += subreq->wb_bytes; 1083 pgbase += subreq->wb_bytes;
1050 1084
1051 if (bytes_left) { 1085 if (bytes_left) {
1052 subreq = nfs_create_request(req->wb_context, 1086 subreq = nfs_create_subreq(req, subreq, pgbase,
1053 req->wb_page, 1087 offset, bytes_left);
1054 subreq, pgbase, bytes_left);
1055 if (IS_ERR(subreq)) 1088 if (IS_ERR(subreq))
1056 goto err_ptr; 1089 goto err_ptr;
1057 nfs_lock_request(subreq);
1058 subreq->wb_offset = offset;
1059 subreq->wb_index = req->wb_index;
1060 } 1090 }
1061 } while (bytes_left > 0); 1091 } while (bytes_left > 0);
1062 1092
@@ -1158,19 +1188,14 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
1158 lastreq = lastreq->wb_this_page) 1188 lastreq = lastreq->wb_this_page)
1159 ; 1189 ;
1160 1190
1161 dupreq = nfs_create_request(req->wb_context, 1191 dupreq = nfs_create_subreq(req, lastreq,
1162 req->wb_page, lastreq, pgbase, bytes); 1192 pgbase, offset, bytes);
1163 1193
1194 nfs_page_group_unlock(req);
1164 if (IS_ERR(dupreq)) { 1195 if (IS_ERR(dupreq)) {
1165 nfs_page_group_unlock(req);
1166 desc->pg_error = PTR_ERR(dupreq); 1196 desc->pg_error = PTR_ERR(dupreq);
1167 goto out_failed; 1197 goto out_failed;
1168 } 1198 }
1169
1170 nfs_lock_request(dupreq);
1171 nfs_page_group_unlock(req);
1172 dupreq->wb_offset = offset;
1173 dupreq->wb_index = req->wb_index;
1174 } else 1199 } else
1175 dupreq = req; 1200 dupreq = req;
1176 1201
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 7066cd7c7aff..83722e936b4a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2436,7 +2436,7 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
2436 rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); 2436 rd_size = nfs_dreq_bytes_left(pgio->pg_dreq);
2437 2437
2438 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 2438 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
2439 req->wb_context, 2439 nfs_req_openctx(req),
2440 req_offset(req), 2440 req_offset(req),
2441 rd_size, 2441 rd_size,
2442 IOMODE_READ, 2442 IOMODE_READ,
@@ -2463,7 +2463,7 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
2463 pnfs_generic_pg_check_range(pgio, req); 2463 pnfs_generic_pg_check_range(pgio, req);
2464 if (pgio->pg_lseg == NULL) { 2464 if (pgio->pg_lseg == NULL) {
2465 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 2465 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
2466 req->wb_context, 2466 nfs_req_openctx(req),
2467 req_offset(req), 2467 req_offset(req),
2468 wb_size, 2468 wb_size,
2469 IOMODE_RW, 2469 IOMODE_RW,
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index c0420b979d88..f15609c003d8 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -459,7 +459,7 @@ static inline bool
459pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, 459pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
460 struct nfs_commit_info *cinfo, u32 ds_commit_idx) 460 struct nfs_commit_info *cinfo, u32 ds_commit_idx)
461{ 461{
462 struct inode *inode = d_inode(req->wb_context->dentry); 462 struct inode *inode = d_inode(nfs_req_openctx(req)->dentry);
463 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 463 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
464 464
465 if (lseg == NULL || ld->mark_request_commit == NULL) 465 if (lseg == NULL || ld->mark_request_commit == NULL)
@@ -471,7 +471,7 @@ pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
471static inline bool 471static inline bool
472pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo) 472pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
473{ 473{
474 struct inode *inode = d_inode(req->wb_context->dentry); 474 struct inode *inode = d_inode(nfs_req_openctx(req)->dentry);
475 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 475 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
476 476
477 if (ld == NULL || ld->clear_request_commit == NULL) 477 if (ld == NULL || ld->clear_request_commit == NULL)
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 1d95a60b2586..c799e540ed1e 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -92,7 +92,7 @@ EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
92 92
93static void nfs_readpage_release(struct nfs_page *req) 93static void nfs_readpage_release(struct nfs_page *req)
94{ 94{
95 struct inode *inode = d_inode(req->wb_context->dentry); 95 struct inode *inode = d_inode(nfs_req_openctx(req)->dentry);
96 96
97 dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id, 97 dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
98 (unsigned long long)NFS_FILEID(inode), req->wb_bytes, 98 (unsigned long long)NFS_FILEID(inode), req->wb_bytes,
@@ -118,7 +118,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
118 len = nfs_page_length(page); 118 len = nfs_page_length(page);
119 if (len == 0) 119 if (len == 0)
120 return nfs_return_empty_page(page); 120 return nfs_return_empty_page(page);
121 new = nfs_create_request(ctx, page, NULL, 0, len); 121 new = nfs_create_request(ctx, page, 0, len);
122 if (IS_ERR(new)) { 122 if (IS_ERR(new)) {
123 unlock_page(page); 123 unlock_page(page);
124 return PTR_ERR(new); 124 return PTR_ERR(new);
@@ -363,7 +363,7 @@ readpage_async_filler(void *data, struct page *page)
363 if (len == 0) 363 if (len == 0)
364 return nfs_return_empty_page(page); 364 return nfs_return_empty_page(page);
365 365
366 new = nfs_create_request(desc->ctx, page, NULL, 0, len); 366 new = nfs_create_request(desc->ctx, page, 0, len);
367 if (IS_ERR(new)) 367 if (IS_ERR(new))
368 goto out_error; 368 goto out_error;
369 369
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 450ae77d19bf..d6c687419a81 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -78,7 +78,7 @@
78 78
79enum { 79enum {
80 /* Mount options that take no arguments */ 80 /* Mount options that take no arguments */
81 Opt_soft, Opt_hard, 81 Opt_soft, Opt_softerr, Opt_hard,
82 Opt_posix, Opt_noposix, 82 Opt_posix, Opt_noposix,
83 Opt_cto, Opt_nocto, 83 Opt_cto, Opt_nocto,
84 Opt_ac, Opt_noac, 84 Opt_ac, Opt_noac,
@@ -125,6 +125,7 @@ static const match_table_t nfs_mount_option_tokens = {
125 { Opt_sloppy, "sloppy" }, 125 { Opt_sloppy, "sloppy" },
126 126
127 { Opt_soft, "soft" }, 127 { Opt_soft, "soft" },
128 { Opt_softerr, "softerr" },
128 { Opt_hard, "hard" }, 129 { Opt_hard, "hard" },
129 { Opt_deprecated, "intr" }, 130 { Opt_deprecated, "intr" },
130 { Opt_deprecated, "nointr" }, 131 { Opt_deprecated, "nointr" },
@@ -628,7 +629,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
628 const char *str; 629 const char *str;
629 const char *nostr; 630 const char *nostr;
630 } nfs_info[] = { 631 } nfs_info[] = {
631 { NFS_MOUNT_SOFT, ",soft", ",hard" }, 632 { NFS_MOUNT_SOFT, ",soft", "" },
633 { NFS_MOUNT_SOFTERR, ",softerr", "" },
632 { NFS_MOUNT_POSIX, ",posix", "" }, 634 { NFS_MOUNT_POSIX, ",posix", "" },
633 { NFS_MOUNT_NOCTO, ",nocto", "" }, 635 { NFS_MOUNT_NOCTO, ",nocto", "" },
634 { NFS_MOUNT_NOAC, ",noac", "" }, 636 { NFS_MOUNT_NOAC, ",noac", "" },
@@ -658,6 +660,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
658 seq_printf(m, ",acdirmin=%u", nfss->acdirmin/HZ); 660 seq_printf(m, ",acdirmin=%u", nfss->acdirmin/HZ);
659 if (nfss->acdirmax != NFS_DEF_ACDIRMAX*HZ || showdefaults) 661 if (nfss->acdirmax != NFS_DEF_ACDIRMAX*HZ || showdefaults)
660 seq_printf(m, ",acdirmax=%u", nfss->acdirmax/HZ); 662 seq_printf(m, ",acdirmax=%u", nfss->acdirmax/HZ);
663 if (!(nfss->flags & (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)))
664 seq_puts(m, ",hard");
661 for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { 665 for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
662 if (nfss->flags & nfs_infop->flag) 666 if (nfss->flags & nfs_infop->flag)
663 seq_puts(m, nfs_infop->str); 667 seq_puts(m, nfs_infop->str);
@@ -1239,10 +1243,15 @@ static int nfs_parse_mount_options(char *raw,
1239 */ 1243 */
1240 case Opt_soft: 1244 case Opt_soft:
1241 mnt->flags |= NFS_MOUNT_SOFT; 1245 mnt->flags |= NFS_MOUNT_SOFT;
1246 mnt->flags &= ~NFS_MOUNT_SOFTERR;
1242 break; 1247 break;
1243 case Opt_hard: 1248 case Opt_softerr:
1249 mnt->flags |= NFS_MOUNT_SOFTERR;
1244 mnt->flags &= ~NFS_MOUNT_SOFT; 1250 mnt->flags &= ~NFS_MOUNT_SOFT;
1245 break; 1251 break;
1252 case Opt_hard:
1253 mnt->flags &= ~(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR);
1254 break;
1246 case Opt_posix: 1255 case Opt_posix:
1247 mnt->flags |= NFS_MOUNT_POSIX; 1256 mnt->flags |= NFS_MOUNT_POSIX;
1248 break; 1257 break;
@@ -2476,6 +2485,21 @@ static int nfs_compare_super_address(struct nfs_server *server1,
2476 return 1; 2485 return 1;
2477} 2486}
2478 2487
2488static int nfs_compare_userns(const struct nfs_server *old,
2489 const struct nfs_server *new)
2490{
2491 const struct user_namespace *oldns = &init_user_ns;
2492 const struct user_namespace *newns = &init_user_ns;
2493
2494 if (old->client && old->client->cl_cred)
2495 oldns = old->client->cl_cred->user_ns;
2496 if (new->client && new->client->cl_cred)
2497 newns = new->client->cl_cred->user_ns;
2498 if (oldns != newns)
2499 return 0;
2500 return 1;
2501}
2502
2479static int nfs_compare_super(struct super_block *sb, void *data) 2503static int nfs_compare_super(struct super_block *sb, void *data)
2480{ 2504{
2481 struct nfs_sb_mountdata *sb_mntdata = data; 2505 struct nfs_sb_mountdata *sb_mntdata = data;
@@ -2489,6 +2513,8 @@ static int nfs_compare_super(struct super_block *sb, void *data)
2489 return 0; 2513 return 0;
2490 if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0) 2514 if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
2491 return 0; 2515 return 0;
2516 if (!nfs_compare_userns(old, server))
2517 return 0;
2492 return nfs_compare_mount_options(sb, server, mntflags); 2518 return nfs_compare_mount_options(sb, server, mntflags);
2493} 2519}
2494 2520
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 06eb44b47885..25ba299fdac2 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -26,8 +26,9 @@
26 * and straight-forward than readdir caching. 26 * and straight-forward than readdir caching.
27 */ 27 */
28 28
29static int nfs_symlink_filler(struct inode *inode, struct page *page) 29static int nfs_symlink_filler(void *data, struct page *page)
30{ 30{
31 struct inode *inode = data;
31 int error; 32 int error;
32 33
33 error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE); 34 error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE);
@@ -65,8 +66,8 @@ static const char *nfs_get_link(struct dentry *dentry,
65 err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping)); 66 err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
66 if (err) 67 if (err)
67 return err; 68 return err;
68 page = read_cache_page(&inode->i_data, 0, 69 page = read_cache_page(&inode->i_data, 0, nfs_symlink_filler,
69 (filler_t *)nfs_symlink_filler, inode); 70 inode);
70 if (IS_ERR(page)) 71 if (IS_ERR(page))
71 return ERR_CAST(page); 72 return ERR_CAST(page);
72 } 73 }
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f3ebabaa291d..bc5bb9323412 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -244,6 +244,12 @@ static void nfs_set_pageerror(struct address_space *mapping)
244 nfs_zap_mapping(mapping->host, mapping); 244 nfs_zap_mapping(mapping->host, mapping);
245} 245}
246 246
247static void nfs_mapping_set_error(struct page *page, int error)
248{
249 SetPageError(page);
250 mapping_set_error(page_file_mapping(page), error);
251}
252
247/* 253/*
248 * nfs_page_group_search_locked 254 * nfs_page_group_search_locked
249 * @head - head request of page group 255 * @head - head request of page group
@@ -582,11 +588,10 @@ release_request:
582 return ERR_PTR(ret); 588 return ERR_PTR(ret);
583} 589}
584 590
585static void nfs_write_error_remove_page(struct nfs_page *req) 591static void nfs_write_error(struct nfs_page *req, int error)
586{ 592{
593 nfs_mapping_set_error(req->wb_page, error);
587 nfs_end_page_writeback(req); 594 nfs_end_page_writeback(req);
588 generic_error_remove_page(page_file_mapping(req->wb_page),
589 req->wb_page);
590 nfs_release_request(req); 595 nfs_release_request(req);
591} 596}
592 597
@@ -609,6 +614,7 @@ nfs_error_is_fatal_on_server(int err)
609static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, 614static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
610 struct page *page) 615 struct page *page)
611{ 616{
617 struct address_space *mapping;
612 struct nfs_page *req; 618 struct nfs_page *req;
613 int ret = 0; 619 int ret = 0;
614 620
@@ -622,19 +628,19 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
622 nfs_set_page_writeback(page); 628 nfs_set_page_writeback(page);
623 WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags)); 629 WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
624 630
625 ret = req->wb_context->error;
626 /* If there is a fatal error that covers this write, just exit */ 631 /* If there is a fatal error that covers this write, just exit */
627 if (nfs_error_is_fatal_on_server(ret)) 632 ret = 0;
633 mapping = page_file_mapping(page);
634 if (test_bit(AS_ENOSPC, &mapping->flags) ||
635 test_bit(AS_EIO, &mapping->flags))
628 goto out_launder; 636 goto out_launder;
629 637
630 ret = 0;
631 if (!nfs_pageio_add_request(pgio, req)) { 638 if (!nfs_pageio_add_request(pgio, req)) {
632 ret = pgio->pg_error; 639 ret = pgio->pg_error;
633 /* 640 /*
634 * Remove the problematic req upon fatal errors on the server 641 * Remove the problematic req upon fatal errors on the server
635 */ 642 */
636 if (nfs_error_is_fatal(ret)) { 643 if (nfs_error_is_fatal(ret)) {
637 nfs_context_set_write_error(req->wb_context, ret);
638 if (nfs_error_is_fatal_on_server(ret)) 644 if (nfs_error_is_fatal_on_server(ret))
639 goto out_launder; 645 goto out_launder;
640 } else 646 } else
@@ -646,8 +652,8 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
646out: 652out:
647 return ret; 653 return ret;
648out_launder: 654out_launder:
649 nfs_write_error_remove_page(req); 655 nfs_write_error(req, ret);
650 return ret; 656 return 0;
651} 657}
652 658
653static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, 659static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
@@ -958,7 +964,8 @@ static void
958nfs_clear_request_commit(struct nfs_page *req) 964nfs_clear_request_commit(struct nfs_page *req)
959{ 965{
960 if (test_bit(PG_CLEAN, &req->wb_flags)) { 966 if (test_bit(PG_CLEAN, &req->wb_flags)) {
961 struct inode *inode = d_inode(req->wb_context->dentry); 967 struct nfs_open_context *ctx = nfs_req_openctx(req);
968 struct inode *inode = d_inode(ctx->dentry);
962 struct nfs_commit_info cinfo; 969 struct nfs_commit_info cinfo;
963 970
964 nfs_init_cinfo_from_inode(&cinfo, inode); 971 nfs_init_cinfo_from_inode(&cinfo, inode);
@@ -999,10 +1006,12 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
999 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && 1006 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
1000 (hdr->good_bytes < bytes)) { 1007 (hdr->good_bytes < bytes)) {
1001 nfs_set_pageerror(page_file_mapping(req->wb_page)); 1008 nfs_set_pageerror(page_file_mapping(req->wb_page));
1002 nfs_context_set_write_error(req->wb_context, hdr->error); 1009 nfs_mapping_set_error(req->wb_page, hdr->error);
1003 goto remove_req; 1010 goto remove_req;
1004 } 1011 }
1005 if (nfs_write_need_commit(hdr)) { 1012 if (nfs_write_need_commit(hdr)) {
1013 /* Reset wb_nio, since the write was successful. */
1014 req->wb_nio = 0;
1006 memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); 1015 memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
1007 nfs_mark_request_commit(req, hdr->lseg, &cinfo, 1016 nfs_mark_request_commit(req, hdr->lseg, &cinfo,
1008 hdr->pgio_mirror_idx); 1017 hdr->pgio_mirror_idx);
@@ -1136,6 +1145,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
1136 req->wb_bytes = end - req->wb_offset; 1145 req->wb_bytes = end - req->wb_offset;
1137 else 1146 else
1138 req->wb_bytes = rqend - req->wb_offset; 1147 req->wb_bytes = rqend - req->wb_offset;
1148 req->wb_nio = 0;
1139 return req; 1149 return req;
1140out_flushme: 1150out_flushme:
1141 /* 1151 /*
@@ -1165,7 +1175,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
1165 req = nfs_try_to_update_request(inode, page, offset, bytes); 1175 req = nfs_try_to_update_request(inode, page, offset, bytes);
1166 if (req != NULL) 1176 if (req != NULL)
1167 goto out; 1177 goto out;
1168 req = nfs_create_request(ctx, page, NULL, offset, bytes); 1178 req = nfs_create_request(ctx, page, offset, bytes);
1169 if (IS_ERR(req)) 1179 if (IS_ERR(req))
1170 goto out; 1180 goto out;
1171 nfs_inode_add_request(inode, req); 1181 nfs_inode_add_request(inode, req);
@@ -1210,7 +1220,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
1210 return 0; 1220 return 0;
1211 l_ctx = req->wb_lock_context; 1221 l_ctx = req->wb_lock_context;
1212 do_flush = req->wb_page != page || 1222 do_flush = req->wb_page != page ||
1213 !nfs_match_open_context(req->wb_context, ctx); 1223 !nfs_match_open_context(nfs_req_openctx(req), ctx);
1214 if (l_ctx && flctx && 1224 if (l_ctx && flctx &&
1215 !(list_empty_careful(&flctx->flc_posix) && 1225 !(list_empty_careful(&flctx->flc_posix) &&
1216 list_empty_careful(&flctx->flc_flock))) { 1226 list_empty_careful(&flctx->flc_flock))) {
@@ -1410,8 +1420,10 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
1410 */ 1420 */
1411static void nfs_redirty_request(struct nfs_page *req) 1421static void nfs_redirty_request(struct nfs_page *req)
1412{ 1422{
1423 /* Bump the transmission count */
1424 req->wb_nio++;
1413 nfs_mark_request_dirty(req); 1425 nfs_mark_request_dirty(req);
1414 set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); 1426 set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
1415 nfs_end_page_writeback(req); 1427 nfs_end_page_writeback(req);
1416 nfs_release_request(req); 1428 nfs_release_request(req);
1417} 1429}
@@ -1423,14 +1435,10 @@ static void nfs_async_write_error(struct list_head *head, int error)
1423 while (!list_empty(head)) { 1435 while (!list_empty(head)) {
1424 req = nfs_list_entry(head->next); 1436 req = nfs_list_entry(head->next);
1425 nfs_list_remove_request(req); 1437 nfs_list_remove_request(req);
1426 if (nfs_error_is_fatal(error)) { 1438 if (nfs_error_is_fatal(error))
1427 nfs_context_set_write_error(req->wb_context, error); 1439 nfs_write_error(req, error);
1428 if (nfs_error_is_fatal_on_server(error)) { 1440 else
1429 nfs_write_error_remove_page(req); 1441 nfs_redirty_request(req);
1430 continue;
1431 }
1432 }
1433 nfs_redirty_request(req);
1434 } 1442 }
1435} 1443}
1436 1444
@@ -1735,7 +1743,8 @@ void nfs_init_commit(struct nfs_commit_data *data,
1735 struct nfs_commit_info *cinfo) 1743 struct nfs_commit_info *cinfo)
1736{ 1744{
1737 struct nfs_page *first = nfs_list_entry(head->next); 1745 struct nfs_page *first = nfs_list_entry(head->next);
1738 struct inode *inode = d_inode(first->wb_context->dentry); 1746 struct nfs_open_context *ctx = nfs_req_openctx(first);
1747 struct inode *inode = d_inode(ctx->dentry);
1739 1748
1740 /* Set up the RPC argument and reply structs 1749 /* Set up the RPC argument and reply structs
1741 * NB: take care not to mess about with data->commit et al. */ 1750 * NB: take care not to mess about with data->commit et al. */
@@ -1743,7 +1752,7 @@ void nfs_init_commit(struct nfs_commit_data *data,
1743 list_splice_init(head, &data->pages); 1752 list_splice_init(head, &data->pages);
1744 1753
1745 data->inode = inode; 1754 data->inode = inode;
1746 data->cred = first->wb_context->cred; 1755 data->cred = ctx->cred;
1747 data->lseg = lseg; /* reference transferred */ 1756 data->lseg = lseg; /* reference transferred */
1748 /* only set lwb for pnfs commit */ 1757 /* only set lwb for pnfs commit */
1749 if (lseg) 1758 if (lseg)
@@ -1756,7 +1765,7 @@ void nfs_init_commit(struct nfs_commit_data *data,
1756 /* Note: we always request a commit of the entire inode */ 1765 /* Note: we always request a commit of the entire inode */
1757 data->args.offset = 0; 1766 data->args.offset = 0;
1758 data->args.count = 0; 1767 data->args.count = 0;
1759 data->context = get_nfs_open_context(first->wb_context); 1768 data->context = get_nfs_open_context(ctx);
1760 data->res.fattr = &data->fattr; 1769 data->res.fattr = &data->fattr;
1761 data->res.verf = &data->verf; 1770 data->res.verf = &data->verf;
1762 nfs_fattr_init(&data->fattr); 1771 nfs_fattr_init(&data->fattr);
@@ -1839,14 +1848,15 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
1839 nfs_clear_page_commit(req->wb_page); 1848 nfs_clear_page_commit(req->wb_page);
1840 1849
1841 dprintk("NFS: commit (%s/%llu %d@%lld)", 1850 dprintk("NFS: commit (%s/%llu %d@%lld)",
1842 req->wb_context->dentry->d_sb->s_id, 1851 nfs_req_openctx(req)->dentry->d_sb->s_id,
1843 (unsigned long long)NFS_FILEID(d_inode(req->wb_context->dentry)), 1852 (unsigned long long)NFS_FILEID(d_inode(nfs_req_openctx(req)->dentry)),
1844 req->wb_bytes, 1853 req->wb_bytes,
1845 (long long)req_offset(req)); 1854 (long long)req_offset(req));
1846 if (status < 0) { 1855 if (status < 0) {
1847 nfs_context_set_write_error(req->wb_context, status); 1856 if (req->wb_page) {
1848 if (req->wb_page) 1857 nfs_mapping_set_error(req->wb_page, status);
1849 nfs_inode_remove_request(req); 1858 nfs_inode_remove_request(req);
1859 }
1850 dprintk_cont(", error = %d\n", status); 1860 dprintk_cont(", error = %d\n", status);
1851 goto next; 1861 goto next;
1852 } 1862 }
@@ -1863,7 +1873,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
1863 /* We have a mismatch. Write the page again */ 1873 /* We have a mismatch. Write the page again */
1864 dprintk_cont(" mismatch\n"); 1874 dprintk_cont(" mismatch\n");
1865 nfs_mark_request_dirty(req); 1875 nfs_mark_request_dirty(req);
1866 set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); 1876 set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
1867 next: 1877 next:
1868 nfs_unlock_and_release_request(req); 1878 nfs_unlock_and_release_request(req);
1869 /* Latency breaker */ 1879 /* Latency breaker */
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 7caa3801ce72..9b93e7a9a26d 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -868,6 +868,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
868 .program = &cb_program, 868 .program = &cb_program,
869 .version = 1, 869 .version = 1,
870 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), 870 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
871 .cred = current_cred(),
871 }; 872 };
872 struct rpc_clnt *client; 873 struct rpc_clnt *client;
873 const struct cred *cred; 874 const struct cred *cred;
@@ -1033,7 +1034,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
1033 * the submission code will error out, so we don't need to 1034 * the submission code will error out, so we don't need to
1034 * handle that case here. 1035 * handle that case here.
1035 */ 1036 */
1036 if (task->tk_flags & RPC_TASK_KILLED) 1037 if (RPC_SIGNALLED(task))
1037 goto need_restart; 1038 goto need_restart;
1038 1039
1039 return true; 1040 return true;
@@ -1086,7 +1087,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
1086 dprintk("%s: freed slot, new seqid=%d\n", __func__, 1087 dprintk("%s: freed slot, new seqid=%d\n", __func__,
1087 clp->cl_cb_session->se_cb_seq_nr); 1088 clp->cl_cb_session->se_cb_seq_nr);
1088 1089
1089 if (task->tk_flags & RPC_TASK_KILLED) 1090 if (RPC_SIGNALLED(task))
1090 goto need_restart; 1091 goto need_restart;
1091out: 1092out:
1092 return ret; 1093 return ret;
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 053a4ef3d431..8c0cf1059443 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -46,6 +46,7 @@ struct nlmclnt_initdata {
46 int noresvport; 46 int noresvport;
47 struct net *net; 47 struct net *net;
48 const struct nlmclnt_operations *nlmclnt_ops; 48 const struct nlmclnt_operations *nlmclnt_ops;
49 const struct cred *cred;
49}; 50};
50 51
51/* 52/*
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index b065ef406770..c9b422dde542 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -70,6 +70,7 @@ struct nlm_host {
70 struct nsm_handle *h_nsmhandle; /* NSM status handle */ 70 struct nsm_handle *h_nsmhandle; /* NSM status handle */
71 char *h_addrbuf; /* address eyecatcher */ 71 char *h_addrbuf; /* address eyecatcher */
72 struct net *net; /* host net */ 72 struct net *net; /* host net */
73 const struct cred *h_cred;
73 char nodename[UNX_MAXNODENAME + 1]; 74 char nodename[UNX_MAXNODENAME + 1];
74 const struct nlmclnt_operations *h_nlmclnt_ops; /* Callback ops for NLM users */ 75 const struct nlmclnt_operations *h_nlmclnt_ops; /* Callback ops for NLM users */
75}; 76};
@@ -229,7 +230,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
229 const u32 version, 230 const u32 version,
230 const char *hostname, 231 const char *hostname,
231 int noresvport, 232 int noresvport,
232 struct net *net); 233 struct net *net,
234 const struct cred *cred);
233void nlmclnt_release_host(struct nlm_host *); 235void nlmclnt_release_host(struct nlm_host *);
234struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, 236struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
235 const char *hostname, 237 const char *hostname,
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 40e30376130b..d363d5765cdf 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -76,7 +76,6 @@ struct nfs_open_context {
76 fmode_t mode; 76 fmode_t mode;
77 77
78 unsigned long flags; 78 unsigned long flags;
79#define NFS_CONTEXT_ERROR_WRITE (0)
80#define NFS_CONTEXT_RESEND_WRITES (1) 79#define NFS_CONTEXT_RESEND_WRITES (1)
81#define NFS_CONTEXT_BAD (2) 80#define NFS_CONTEXT_BAD (2)
82#define NFS_CONTEXT_UNLOCK (3) 81#define NFS_CONTEXT_UNLOCK (3)
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index c827d31298cc..1e78032a174b 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -139,6 +139,16 @@ struct nfs_server {
139 struct nfs_iostats __percpu *io_stats; /* I/O statistics */ 139 struct nfs_iostats __percpu *io_stats; /* I/O statistics */
140 atomic_long_t writeback; /* number of writeback pages */ 140 atomic_long_t writeback; /* number of writeback pages */
141 int flags; /* various flags */ 141 int flags; /* various flags */
142
143/* The following are for internal use only. Also see uapi/linux/nfs_mount.h */
144#define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000
145#define NFS_MOUNT_LOOKUP_CACHE_NONE 0x20000
146#define NFS_MOUNT_NORESVPORT 0x40000
147#define NFS_MOUNT_LEGACY_INTERFACE 0x80000
148#define NFS_MOUNT_LOCAL_FLOCK 0x100000
149#define NFS_MOUNT_LOCAL_FCNTL 0x200000
150#define NFS_MOUNT_SOFTERR 0x400000
151
142 unsigned int caps; /* server capabilities */ 152 unsigned int caps; /* server capabilities */
143 unsigned int rsize; /* read size */ 153 unsigned int rsize; /* read size */
144 unsigned int rpages; /* read size (in pages) */ 154 unsigned int rpages; /* read size (in pages) */
@@ -231,6 +241,9 @@ struct nfs_server {
231 241
232 /* XDR related information */ 242 /* XDR related information */
233 unsigned int read_hdrsize; 243 unsigned int read_hdrsize;
244
245 /* User namespace info */
246 const struct cred *cred;
234}; 247};
235 248
236/* Server capabilities */ 249/* Server capabilities */
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index ad69430fd0eb..0bbd587fac6a 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -42,7 +42,6 @@ struct nfs_inode;
42struct nfs_page { 42struct nfs_page {
43 struct list_head wb_list; /* Defines state of page: */ 43 struct list_head wb_list; /* Defines state of page: */
44 struct page *wb_page; /* page to read in/write out */ 44 struct page *wb_page; /* page to read in/write out */
45 struct nfs_open_context *wb_context; /* File state context info */
46 struct nfs_lock_context *wb_lock_context; /* lock context info */ 45 struct nfs_lock_context *wb_lock_context; /* lock context info */
47 pgoff_t wb_index; /* Offset >> PAGE_SHIFT */ 46 pgoff_t wb_index; /* Offset >> PAGE_SHIFT */
48 unsigned int wb_offset, /* Offset & ~PAGE_MASK */ 47 unsigned int wb_offset, /* Offset & ~PAGE_MASK */
@@ -53,6 +52,7 @@ struct nfs_page {
53 struct nfs_write_verifier wb_verf; /* Commit cookie */ 52 struct nfs_write_verifier wb_verf; /* Commit cookie */
54 struct nfs_page *wb_this_page; /* list of reqs for this page */ 53 struct nfs_page *wb_this_page; /* list of reqs for this page */
55 struct nfs_page *wb_head; /* head pointer for req list */ 54 struct nfs_page *wb_head; /* head pointer for req list */
55 unsigned short wb_nio; /* Number of I/O attempts */
56}; 56};
57 57
58struct nfs_pageio_descriptor; 58struct nfs_pageio_descriptor;
@@ -87,7 +87,6 @@ struct nfs_pgio_mirror {
87}; 87};
88 88
89struct nfs_pageio_descriptor { 89struct nfs_pageio_descriptor {
90 unsigned char pg_moreio : 1;
91 struct inode *pg_inode; 90 struct inode *pg_inode;
92 const struct nfs_pageio_ops *pg_ops; 91 const struct nfs_pageio_ops *pg_ops;
93 const struct nfs_rw_ops *pg_rw_ops; 92 const struct nfs_rw_ops *pg_rw_ops;
@@ -105,6 +104,8 @@ struct nfs_pageio_descriptor {
105 struct nfs_pgio_mirror pg_mirrors_static[1]; 104 struct nfs_pgio_mirror pg_mirrors_static[1];
106 struct nfs_pgio_mirror *pg_mirrors_dynamic; 105 struct nfs_pgio_mirror *pg_mirrors_dynamic;
107 u32 pg_mirror_idx; /* current mirror */ 106 u32 pg_mirror_idx; /* current mirror */
107 unsigned short pg_maxretrans;
108 unsigned char pg_moreio : 1;
108}; 109};
109 110
110/* arbitrarily selected limit to number of mirrors */ 111/* arbitrarily selected limit to number of mirrors */
@@ -114,7 +115,6 @@ struct nfs_pageio_descriptor {
114 115
115extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, 116extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx,
116 struct page *page, 117 struct page *page,
117 struct nfs_page *last,
118 unsigned int offset, 118 unsigned int offset,
119 unsigned int count); 119 unsigned int count);
120extern void nfs_release_request(struct nfs_page *); 120extern void nfs_release_request(struct nfs_page *);
@@ -199,4 +199,10 @@ loff_t req_offset(struct nfs_page *req)
199 return (((loff_t)req->wb_index) << PAGE_SHIFT) + req->wb_offset; 199 return (((loff_t)req->wb_index) << PAGE_SHIFT) + req->wb_offset;
200} 200}
201 201
202static inline struct nfs_open_context *
203nfs_req_openctx(struct nfs_page *req)
204{
205 return req->wb_lock_context->open_context;
206}
207
202#endif /* _LINUX_NFS_PAGE_H */ 208#endif /* _LINUX_NFS_PAGE_H */
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 98bc9883b230..6e8073140a5d 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -50,6 +50,7 @@ struct rpc_clnt {
50 struct rpc_iostats * cl_metrics; /* per-client statistics */ 50 struct rpc_iostats * cl_metrics; /* per-client statistics */
51 51
52 unsigned int cl_softrtry : 1,/* soft timeouts */ 52 unsigned int cl_softrtry : 1,/* soft timeouts */
53 cl_softerr : 1,/* Timeouts return errors */
53 cl_discrtry : 1,/* disconnect before retry */ 54 cl_discrtry : 1,/* disconnect before retry */
54 cl_noretranstimeo: 1,/* No retransmit timeouts */ 55 cl_noretranstimeo: 1,/* No retransmit timeouts */
55 cl_autobind : 1,/* use getport() */ 56 cl_autobind : 1,/* use getport() */
@@ -71,6 +72,7 @@ struct rpc_clnt {
71 struct dentry *cl_debugfs; /* debugfs directory */ 72 struct dentry *cl_debugfs; /* debugfs directory */
72#endif 73#endif
73 struct rpc_xprt_iter cl_xpi; 74 struct rpc_xprt_iter cl_xpi;
75 const struct cred *cl_cred;
74}; 76};
75 77
76/* 78/*
@@ -125,6 +127,7 @@ struct rpc_create_args {
125 unsigned long flags; 127 unsigned long flags;
126 char *client_name; 128 char *client_name;
127 struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ 129 struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
130 const struct cred *cred;
128}; 131};
129 132
130struct rpc_add_xprt_test { 133struct rpc_add_xprt_test {
@@ -144,6 +147,7 @@ struct rpc_add_xprt_test {
144#define RPC_CLNT_CREATE_INFINITE_SLOTS (1UL << 7) 147#define RPC_CLNT_CREATE_INFINITE_SLOTS (1UL << 7)
145#define RPC_CLNT_CREATE_NO_IDLE_TIMEOUT (1UL << 8) 148#define RPC_CLNT_CREATE_NO_IDLE_TIMEOUT (1UL << 8)
146#define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) 149#define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9)
150#define RPC_CLNT_CREATE_SOFTERR (1UL << 10)
147 151
148struct rpc_clnt *rpc_create(struct rpc_create_args *args); 152struct rpc_clnt *rpc_create(struct rpc_create_args *args);
149struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, 153struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 52d41d0c1ae1..d0e451868f02 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -35,7 +35,6 @@ struct rpc_wait {
35 struct list_head list; /* wait queue links */ 35 struct list_head list; /* wait queue links */
36 struct list_head links; /* Links to related tasks */ 36 struct list_head links; /* Links to related tasks */
37 struct list_head timer_list; /* Timer list */ 37 struct list_head timer_list; /* Timer list */
38 unsigned long expires;
39}; 38};
40 39
41/* 40/*
@@ -62,6 +61,8 @@ struct rpc_task {
62 struct rpc_wait tk_wait; /* RPC wait */ 61 struct rpc_wait tk_wait; /* RPC wait */
63 } u; 62 } u;
64 63
64 int tk_rpc_status; /* Result of last RPC operation */
65
65 /* 66 /*
66 * RPC call state 67 * RPC call state
67 */ 68 */
@@ -125,7 +126,6 @@ struct rpc_task_setup {
125#define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */ 126#define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */
126#define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */ 127#define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */
127#define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ 128#define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */
128#define RPC_TASK_KILLED 0x0100 /* task was killed */
129#define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ 129#define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */
130#define RPC_TASK_SOFTCONN 0x0400 /* Fail if can't connect */ 130#define RPC_TASK_SOFTCONN 0x0400 /* Fail if can't connect */
131#define RPC_TASK_SENT 0x0800 /* message was sent */ 131#define RPC_TASK_SENT 0x0800 /* message was sent */
@@ -135,7 +135,6 @@ struct rpc_task_setup {
135 135
136#define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) 136#define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC)
137#define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) 137#define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER)
138#define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED)
139#define RPC_IS_SOFT(t) ((t)->tk_flags & (RPC_TASK_SOFT|RPC_TASK_TIMEOUT)) 138#define RPC_IS_SOFT(t) ((t)->tk_flags & (RPC_TASK_SOFT|RPC_TASK_TIMEOUT))
140#define RPC_IS_SOFTCONN(t) ((t)->tk_flags & RPC_TASK_SOFTCONN) 139#define RPC_IS_SOFTCONN(t) ((t)->tk_flags & RPC_TASK_SOFTCONN)
141#define RPC_WAS_SENT(t) ((t)->tk_flags & RPC_TASK_SENT) 140#define RPC_WAS_SENT(t) ((t)->tk_flags & RPC_TASK_SENT)
@@ -146,6 +145,7 @@ struct rpc_task_setup {
146#define RPC_TASK_NEED_XMIT 3 145#define RPC_TASK_NEED_XMIT 3
147#define RPC_TASK_NEED_RECV 4 146#define RPC_TASK_NEED_RECV 4
148#define RPC_TASK_MSG_PIN_WAIT 5 147#define RPC_TASK_MSG_PIN_WAIT 5
148#define RPC_TASK_SIGNALLED 6
149 149
150#define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) 150#define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
151#define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) 151#define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
@@ -169,6 +169,8 @@ struct rpc_task_setup {
169 169
170#define RPC_IS_ACTIVATED(t) test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate) 170#define RPC_IS_ACTIVATED(t) test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate)
171 171
172#define RPC_SIGNALLED(t) test_bit(RPC_TASK_SIGNALLED, &(t)->tk_runstate)
173
172/* 174/*
173 * Task priorities. 175 * Task priorities.
174 * Note: if you change these, you must also change 176 * Note: if you change these, you must also change
@@ -183,7 +185,6 @@ struct rpc_task_setup {
183struct rpc_timer { 185struct rpc_timer {
184 struct timer_list timer; 186 struct timer_list timer;
185 struct list_head list; 187 struct list_head list;
186 unsigned long expires;
187}; 188};
188 189
189/* 190/*
@@ -217,6 +218,7 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
217struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req); 218struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req);
218void rpc_put_task(struct rpc_task *); 219void rpc_put_task(struct rpc_task *);
219void rpc_put_task_async(struct rpc_task *); 220void rpc_put_task_async(struct rpc_task *);
221void rpc_signal_task(struct rpc_task *);
220void rpc_exit_task(struct rpc_task *); 222void rpc_exit_task(struct rpc_task *);
221void rpc_exit(struct rpc_task *, int); 223void rpc_exit(struct rpc_task *, int);
222void rpc_release_calldata(const struct rpc_call_ops *, void *); 224void rpc_release_calldata(const struct rpc_call_ops *, void *);
@@ -225,11 +227,19 @@ void rpc_execute(struct rpc_task *);
225void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); 227void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *);
226void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); 228void rpc_init_wait_queue(struct rpc_wait_queue *, const char *);
227void rpc_destroy_wait_queue(struct rpc_wait_queue *); 229void rpc_destroy_wait_queue(struct rpc_wait_queue *);
230unsigned long rpc_task_timeout(const struct rpc_task *task);
231void rpc_sleep_on_timeout(struct rpc_wait_queue *queue,
232 struct rpc_task *task,
233 rpc_action action,
234 unsigned long timeout);
228void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, 235void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *,
229 rpc_action action); 236 rpc_action action);
237void rpc_sleep_on_priority_timeout(struct rpc_wait_queue *queue,
238 struct rpc_task *task,
239 unsigned long timeout,
240 int priority);
230void rpc_sleep_on_priority(struct rpc_wait_queue *, 241void rpc_sleep_on_priority(struct rpc_wait_queue *,
231 struct rpc_task *, 242 struct rpc_task *,
232 rpc_action action,
233 int priority); 243 int priority);
234void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq, 244void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq,
235 struct rpc_wait_queue *queue, 245 struct rpc_wait_queue *queue,
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 3a391544299e..a6d9fce7f20e 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -143,7 +143,7 @@ struct rpc_xprt_ops {
143 void (*buf_free)(struct rpc_task *task); 143 void (*buf_free)(struct rpc_task *task);
144 void (*prepare_request)(struct rpc_rqst *req); 144 void (*prepare_request)(struct rpc_rqst *req);
145 int (*send_request)(struct rpc_rqst *req); 145 int (*send_request)(struct rpc_rqst *req);
146 void (*set_retrans_timeout)(struct rpc_task *task); 146 void (*wait_for_reply_request)(struct rpc_task *task);
147 void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); 147 void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task);
148 void (*release_request)(struct rpc_task *task); 148 void (*release_request)(struct rpc_task *task);
149 void (*close)(struct rpc_xprt *xprt); 149 void (*close)(struct rpc_xprt *xprt);
@@ -378,8 +378,8 @@ xprt_disable_swap(struct rpc_xprt *xprt)
378int xprt_register_transport(struct xprt_class *type); 378int xprt_register_transport(struct xprt_class *type);
379int xprt_unregister_transport(struct xprt_class *type); 379int xprt_unregister_transport(struct xprt_class *type);
380int xprt_load_transport(const char *); 380int xprt_load_transport(const char *);
381void xprt_set_retrans_timeout_def(struct rpc_task *task); 381void xprt_wait_for_reply_request_def(struct rpc_task *task);
382void xprt_set_retrans_timeout_rtt(struct rpc_task *task); 382void xprt_wait_for_reply_request_rtt(struct rpc_task *task);
383void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); 383void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
384void xprt_wait_for_buffer_space(struct rpc_xprt *xprt); 384void xprt_wait_for_buffer_space(struct rpc_xprt *xprt);
385bool xprt_write_space(struct rpc_xprt *xprt); 385bool xprt_write_space(struct rpc_xprt *xprt);
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index 962975b4313f..df9851cb82b2 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -511,6 +511,33 @@ TRACE_EVENT(xprtrdma_marshal,
511 ) 511 )
512); 512);
513 513
514TRACE_EVENT(xprtrdma_marshal_failed,
515 TP_PROTO(const struct rpc_rqst *rqst,
516 int ret
517 ),
518
519 TP_ARGS(rqst, ret),
520
521 TP_STRUCT__entry(
522 __field(unsigned int, task_id)
523 __field(unsigned int, client_id)
524 __field(u32, xid)
525 __field(int, ret)
526 ),
527
528 TP_fast_assign(
529 __entry->task_id = rqst->rq_task->tk_pid;
530 __entry->client_id = rqst->rq_task->tk_client->cl_clid;
531 __entry->xid = be32_to_cpu(rqst->rq_xid);
532 __entry->ret = ret;
533 ),
534
535 TP_printk("task:%u@%u xid=0x%08x: ret=%d",
536 __entry->task_id, __entry->client_id, __entry->xid,
537 __entry->ret
538 )
539);
540
514TRACE_EVENT(xprtrdma_post_send, 541TRACE_EVENT(xprtrdma_post_send,
515 TP_PROTO( 542 TP_PROTO(
516 const struct rpcrdma_req *req, 543 const struct rpcrdma_req *req,
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index f0a6f0c5549c..ffa3c51dbb1a 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -82,7 +82,6 @@ TRACE_DEFINE_ENUM(RPC_TASK_SWAPPER);
82TRACE_DEFINE_ENUM(RPC_CALL_MAJORSEEN); 82TRACE_DEFINE_ENUM(RPC_CALL_MAJORSEEN);
83TRACE_DEFINE_ENUM(RPC_TASK_ROOTCREDS); 83TRACE_DEFINE_ENUM(RPC_TASK_ROOTCREDS);
84TRACE_DEFINE_ENUM(RPC_TASK_DYNAMIC); 84TRACE_DEFINE_ENUM(RPC_TASK_DYNAMIC);
85TRACE_DEFINE_ENUM(RPC_TASK_KILLED);
86TRACE_DEFINE_ENUM(RPC_TASK_SOFT); 85TRACE_DEFINE_ENUM(RPC_TASK_SOFT);
87TRACE_DEFINE_ENUM(RPC_TASK_SOFTCONN); 86TRACE_DEFINE_ENUM(RPC_TASK_SOFTCONN);
88TRACE_DEFINE_ENUM(RPC_TASK_SENT); 87TRACE_DEFINE_ENUM(RPC_TASK_SENT);
@@ -97,7 +96,6 @@ TRACE_DEFINE_ENUM(RPC_TASK_NO_RETRANS_TIMEOUT);
97 { RPC_CALL_MAJORSEEN, "MAJORSEEN" }, \ 96 { RPC_CALL_MAJORSEEN, "MAJORSEEN" }, \
98 { RPC_TASK_ROOTCREDS, "ROOTCREDS" }, \ 97 { RPC_TASK_ROOTCREDS, "ROOTCREDS" }, \
99 { RPC_TASK_DYNAMIC, "DYNAMIC" }, \ 98 { RPC_TASK_DYNAMIC, "DYNAMIC" }, \
100 { RPC_TASK_KILLED, "KILLED" }, \
101 { RPC_TASK_SOFT, "SOFT" }, \ 99 { RPC_TASK_SOFT, "SOFT" }, \
102 { RPC_TASK_SOFTCONN, "SOFTCONN" }, \ 100 { RPC_TASK_SOFTCONN, "SOFTCONN" }, \
103 { RPC_TASK_SENT, "SENT" }, \ 101 { RPC_TASK_SENT, "SENT" }, \
@@ -111,6 +109,7 @@ TRACE_DEFINE_ENUM(RPC_TASK_ACTIVE);
111TRACE_DEFINE_ENUM(RPC_TASK_NEED_XMIT); 109TRACE_DEFINE_ENUM(RPC_TASK_NEED_XMIT);
112TRACE_DEFINE_ENUM(RPC_TASK_NEED_RECV); 110TRACE_DEFINE_ENUM(RPC_TASK_NEED_RECV);
113TRACE_DEFINE_ENUM(RPC_TASK_MSG_PIN_WAIT); 111TRACE_DEFINE_ENUM(RPC_TASK_MSG_PIN_WAIT);
112TRACE_DEFINE_ENUM(RPC_TASK_SIGNALLED);
114 113
115#define rpc_show_runstate(flags) \ 114#define rpc_show_runstate(flags) \
116 __print_flags(flags, "|", \ 115 __print_flags(flags, "|", \
@@ -119,7 +118,8 @@ TRACE_DEFINE_ENUM(RPC_TASK_MSG_PIN_WAIT);
119 { (1UL << RPC_TASK_ACTIVE), "ACTIVE" }, \ 118 { (1UL << RPC_TASK_ACTIVE), "ACTIVE" }, \
120 { (1UL << RPC_TASK_NEED_XMIT), "NEED_XMIT" }, \ 119 { (1UL << RPC_TASK_NEED_XMIT), "NEED_XMIT" }, \
121 { (1UL << RPC_TASK_NEED_RECV), "NEED_RECV" }, \ 120 { (1UL << RPC_TASK_NEED_RECV), "NEED_RECV" }, \
122 { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" }) 121 { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" }, \
122 { (1UL << RPC_TASK_SIGNALLED), "SIGNALLED" })
123 123
124DECLARE_EVENT_CLASS(rpc_task_running, 124DECLARE_EVENT_CLASS(rpc_task_running,
125 125
@@ -186,7 +186,7 @@ DECLARE_EVENT_CLASS(rpc_task_queued,
186 __entry->client_id = task->tk_client ? 186 __entry->client_id = task->tk_client ?
187 task->tk_client->cl_clid : -1; 187 task->tk_client->cl_clid : -1;
188 __entry->task_id = task->tk_pid; 188 __entry->task_id = task->tk_pid;
189 __entry->timeout = task->tk_timeout; 189 __entry->timeout = rpc_task_timeout(task);
190 __entry->runstate = task->tk_runstate; 190 __entry->runstate = task->tk_runstate;
191 __entry->status = task->tk_status; 191 __entry->status = task->tk_status;
192 __entry->flags = task->tk_flags; 192 __entry->flags = task->tk_flags;
diff --git a/include/uapi/linux/nfs_mount.h b/include/uapi/linux/nfs_mount.h
index e44e00616ab5..e3bcfc6aa3b0 100644
--- a/include/uapi/linux/nfs_mount.h
+++ b/include/uapi/linux/nfs_mount.h
@@ -66,13 +66,4 @@ struct nfs_mount_data {
66#define NFS_MOUNT_UNSHARED 0x8000 /* 5 */ 66#define NFS_MOUNT_UNSHARED 0x8000 /* 5 */
67#define NFS_MOUNT_FLAGMASK 0xFFFF 67#define NFS_MOUNT_FLAGMASK 0xFFFF
68 68
69/* The following are for internal use only */
70#define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000
71#define NFS_MOUNT_LOOKUP_CACHE_NONE 0x20000
72#define NFS_MOUNT_NORESVPORT 0x40000
73#define NFS_MOUNT_LEGACY_INTERFACE 0x80000
74
75#define NFS_MOUNT_LOCAL_FLOCK 0x100000
76#define NFS_MOUNT_LOCAL_FCNTL 0x200000
77
78#endif 69#endif
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 3fd56c0c90ae..4ce42c62458e 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -269,6 +269,7 @@ err:
269struct gss_upcall_msg { 269struct gss_upcall_msg {
270 refcount_t count; 270 refcount_t count;
271 kuid_t uid; 271 kuid_t uid;
272 const char *service_name;
272 struct rpc_pipe_msg msg; 273 struct rpc_pipe_msg msg;
273 struct list_head list; 274 struct list_head list;
274 struct gss_auth *auth; 275 struct gss_auth *auth;
@@ -316,6 +317,7 @@ gss_release_msg(struct gss_upcall_msg *gss_msg)
316 gss_put_ctx(gss_msg->ctx); 317 gss_put_ctx(gss_msg->ctx);
317 rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue); 318 rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue);
318 gss_put_auth(gss_msg->auth); 319 gss_put_auth(gss_msg->auth);
320 kfree_const(gss_msg->service_name);
319 kfree(gss_msg); 321 kfree(gss_msg);
320} 322}
321 323
@@ -410,9 +412,12 @@ gss_upcall_callback(struct rpc_task *task)
410 gss_release_msg(gss_msg); 412 gss_release_msg(gss_msg);
411} 413}
412 414
413static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg) 415static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg,
416 const struct cred *cred)
414{ 417{
415 uid_t uid = from_kuid(&init_user_ns, gss_msg->uid); 418 struct user_namespace *userns = cred->user_ns;
419
420 uid_t uid = from_kuid_munged(userns, gss_msg->uid);
416 memcpy(gss_msg->databuf, &uid, sizeof(uid)); 421 memcpy(gss_msg->databuf, &uid, sizeof(uid));
417 gss_msg->msg.data = gss_msg->databuf; 422 gss_msg->msg.data = gss_msg->databuf;
418 gss_msg->msg.len = sizeof(uid); 423 gss_msg->msg.len = sizeof(uid);
@@ -420,17 +425,31 @@ static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
420 BUILD_BUG_ON(sizeof(uid) > sizeof(gss_msg->databuf)); 425 BUILD_BUG_ON(sizeof(uid) > sizeof(gss_msg->databuf));
421} 426}
422 427
428static ssize_t
429gss_v0_upcall(struct file *file, struct rpc_pipe_msg *msg,
430 char __user *buf, size_t buflen)
431{
432 struct gss_upcall_msg *gss_msg = container_of(msg,
433 struct gss_upcall_msg,
434 msg);
435 if (msg->copied == 0)
436 gss_encode_v0_msg(gss_msg, file->f_cred);
437 return rpc_pipe_generic_upcall(file, msg, buf, buflen);
438}
439
423static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, 440static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
424 const char *service_name, 441 const char *service_name,
425 const char *target_name) 442 const char *target_name,
443 const struct cred *cred)
426{ 444{
445 struct user_namespace *userns = cred->user_ns;
427 struct gss_api_mech *mech = gss_msg->auth->mech; 446 struct gss_api_mech *mech = gss_msg->auth->mech;
428 char *p = gss_msg->databuf; 447 char *p = gss_msg->databuf;
429 size_t buflen = sizeof(gss_msg->databuf); 448 size_t buflen = sizeof(gss_msg->databuf);
430 int len; 449 int len;
431 450
432 len = scnprintf(p, buflen, "mech=%s uid=%d", mech->gm_name, 451 len = scnprintf(p, buflen, "mech=%s uid=%d", mech->gm_name,
433 from_kuid(&init_user_ns, gss_msg->uid)); 452 from_kuid_munged(userns, gss_msg->uid));
434 buflen -= len; 453 buflen -= len;
435 p += len; 454 p += len;
436 gss_msg->msg.len = len; 455 gss_msg->msg.len = len;
@@ -491,6 +510,25 @@ out_overflow:
491 return -ENOMEM; 510 return -ENOMEM;
492} 511}
493 512
513static ssize_t
514gss_v1_upcall(struct file *file, struct rpc_pipe_msg *msg,
515 char __user *buf, size_t buflen)
516{
517 struct gss_upcall_msg *gss_msg = container_of(msg,
518 struct gss_upcall_msg,
519 msg);
520 int err;
521 if (msg->copied == 0) {
522 err = gss_encode_v1_msg(gss_msg,
523 gss_msg->service_name,
524 gss_msg->auth->target_name,
525 file->f_cred);
526 if (err)
527 return err;
528 }
529 return rpc_pipe_generic_upcall(file, msg, buf, buflen);
530}
531
494static struct gss_upcall_msg * 532static struct gss_upcall_msg *
495gss_alloc_msg(struct gss_auth *gss_auth, 533gss_alloc_msg(struct gss_auth *gss_auth,
496 kuid_t uid, const char *service_name) 534 kuid_t uid, const char *service_name)
@@ -513,16 +551,14 @@ gss_alloc_msg(struct gss_auth *gss_auth,
513 refcount_set(&gss_msg->count, 1); 551 refcount_set(&gss_msg->count, 1);
514 gss_msg->uid = uid; 552 gss_msg->uid = uid;
515 gss_msg->auth = gss_auth; 553 gss_msg->auth = gss_auth;
516 switch (vers) { 554 kref_get(&gss_auth->kref);
517 case 0: 555 if (service_name) {
518 gss_encode_v0_msg(gss_msg); 556 gss_msg->service_name = kstrdup_const(service_name, GFP_NOFS);
519 break; 557 if (!gss_msg->service_name) {
520 default: 558 err = -ENOMEM;
521 err = gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name);
522 if (err)
523 goto err_put_pipe_version; 559 goto err_put_pipe_version;
560 }
524 } 561 }
525 kref_get(&gss_auth->kref);
526 return gss_msg; 562 return gss_msg;
527err_put_pipe_version: 563err_put_pipe_version:
528 put_pipe_version(gss_auth->net); 564 put_pipe_version(gss_auth->net);
@@ -581,8 +617,8 @@ gss_refresh_upcall(struct rpc_task *task)
581 /* XXX: warning on the first, under the assumption we 617 /* XXX: warning on the first, under the assumption we
582 * shouldn't normally hit this case on a refresh. */ 618 * shouldn't normally hit this case on a refresh. */
583 warn_gssd(); 619 warn_gssd();
584 task->tk_timeout = 15*HZ; 620 rpc_sleep_on_timeout(&pipe_version_rpc_waitqueue,
585 rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL); 621 task, NULL, jiffies + (15 * HZ));
586 err = -EAGAIN; 622 err = -EAGAIN;
587 goto out; 623 goto out;
588 } 624 }
@@ -595,7 +631,6 @@ gss_refresh_upcall(struct rpc_task *task)
595 if (gss_cred->gc_upcall != NULL) 631 if (gss_cred->gc_upcall != NULL)
596 rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL); 632 rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL);
597 else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { 633 else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) {
598 task->tk_timeout = 0;
599 gss_cred->gc_upcall = gss_msg; 634 gss_cred->gc_upcall = gss_msg;
600 /* gss_upcall_callback will release the reference to gss_upcall_msg */ 635 /* gss_upcall_callback will release the reference to gss_upcall_msg */
601 refcount_inc(&gss_msg->count); 636 refcount_inc(&gss_msg->count);
@@ -707,7 +742,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
707 goto err; 742 goto err;
708 } 743 }
709 744
710 uid = make_kuid(&init_user_ns, id); 745 uid = make_kuid(current_user_ns(), id);
711 if (!uid_valid(uid)) { 746 if (!uid_valid(uid)) {
712 err = -EINVAL; 747 err = -EINVAL;
713 goto err; 748 goto err;
@@ -2116,7 +2151,7 @@ static const struct rpc_credops gss_nullops = {
2116}; 2151};
2117 2152
2118static const struct rpc_pipe_ops gss_upcall_ops_v0 = { 2153static const struct rpc_pipe_ops gss_upcall_ops_v0 = {
2119 .upcall = rpc_pipe_generic_upcall, 2154 .upcall = gss_v0_upcall,
2120 .downcall = gss_pipe_downcall, 2155 .downcall = gss_pipe_downcall,
2121 .destroy_msg = gss_pipe_destroy_msg, 2156 .destroy_msg = gss_pipe_destroy_msg,
2122 .open_pipe = gss_pipe_open_v0, 2157 .open_pipe = gss_pipe_open_v0,
@@ -2124,7 +2159,7 @@ static const struct rpc_pipe_ops gss_upcall_ops_v0 = {
2124}; 2159};
2125 2160
2126static const struct rpc_pipe_ops gss_upcall_ops_v1 = { 2161static const struct rpc_pipe_ops gss_upcall_ops_v1 = {
2127 .upcall = rpc_pipe_generic_upcall, 2162 .upcall = gss_v1_upcall,
2128 .downcall = gss_pipe_downcall, 2163 .downcall = gss_pipe_downcall,
2129 .destroy_msg = gss_pipe_destroy_msg, 2164 .destroy_msg = gss_pipe_destroy_msg,
2130 .open_pipe = gss_pipe_open_v1, 2165 .open_pipe = gss_pipe_open_v1,
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index d4018e5a24c5..e7df1f782b2e 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -107,6 +107,8 @@ unx_marshal(struct rpc_task *task, struct xdr_stream *xdr)
107 __be32 *p, *cred_len, *gidarr_len; 107 __be32 *p, *cred_len, *gidarr_len;
108 int i; 108 int i;
109 struct group_info *gi = cred->cr_cred->group_info; 109 struct group_info *gi = cred->cr_cred->group_info;
110 struct user_namespace *userns = clnt->cl_cred ?
111 clnt->cl_cred->user_ns : &init_user_ns;
110 112
111 /* Credential */ 113 /* Credential */
112 114
@@ -122,14 +124,13 @@ unx_marshal(struct rpc_task *task, struct xdr_stream *xdr)
122 p = xdr_reserve_space(xdr, 3 * sizeof(*p)); 124 p = xdr_reserve_space(xdr, 3 * sizeof(*p));
123 if (!p) 125 if (!p)
124 goto marshal_failed; 126 goto marshal_failed;
125 *p++ = cpu_to_be32(from_kuid(&init_user_ns, cred->cr_cred->fsuid)); 127 *p++ = cpu_to_be32(from_kuid_munged(userns, cred->cr_cred->fsuid));
126 *p++ = cpu_to_be32(from_kgid(&init_user_ns, cred->cr_cred->fsgid)); 128 *p++ = cpu_to_be32(from_kgid_munged(userns, cred->cr_cred->fsgid));
127 129
128 gidarr_len = p++; 130 gidarr_len = p++;
129 if (gi) 131 if (gi)
130 for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++) 132 for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++)
131 *p++ = cpu_to_be32(from_kgid(&init_user_ns, 133 *p++ = cpu_to_be32(from_kgid_munged(userns, gi->gid[i]));
132 gi->gid[i]));
133 *gidarr_len = cpu_to_be32(p - gidarr_len - 1); 134 *gidarr_len = cpu_to_be32(p - gidarr_len - 1);
134 *cred_len = cpu_to_be32((p - cred_len - 1) << 2); 135 *cred_len = cpu_to_be32((p - cred_len - 1) << 2);
135 p = xdr_reserve_space(xdr, (p - gidarr_len - 1) << 2); 136 p = xdr_reserve_space(xdr, (p - gidarr_len - 1) << 2);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8ff11dc98d7f..c1f1afabd024 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -394,6 +394,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
394 if (err) 394 if (err)
395 goto out_no_clid; 395 goto out_no_clid;
396 396
397 clnt->cl_cred = get_cred(args->cred);
397 clnt->cl_procinfo = version->procs; 398 clnt->cl_procinfo = version->procs;
398 clnt->cl_maxproc = version->nrprocs; 399 clnt->cl_maxproc = version->nrprocs;
399 clnt->cl_prog = args->prognumber ? : program->number; 400 clnt->cl_prog = args->prognumber ? : program->number;
@@ -439,6 +440,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
439out_no_path: 440out_no_path:
440 rpc_free_iostats(clnt->cl_metrics); 441 rpc_free_iostats(clnt->cl_metrics);
441out_no_stats: 442out_no_stats:
443 put_cred(clnt->cl_cred);
442 rpc_free_clid(clnt); 444 rpc_free_clid(clnt);
443out_no_clid: 445out_no_clid:
444 kfree(clnt); 446 kfree(clnt);
@@ -484,8 +486,11 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
484 } 486 }
485 487
486 clnt->cl_softrtry = 1; 488 clnt->cl_softrtry = 1;
487 if (args->flags & RPC_CLNT_CREATE_HARDRTRY) 489 if (args->flags & (RPC_CLNT_CREATE_HARDRTRY|RPC_CLNT_CREATE_SOFTERR)) {
488 clnt->cl_softrtry = 0; 490 clnt->cl_softrtry = 0;
491 if (args->flags & RPC_CLNT_CREATE_SOFTERR)
492 clnt->cl_softerr = 1;
493 }
489 494
490 if (args->flags & RPC_CLNT_CREATE_AUTOBIND) 495 if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
491 clnt->cl_autobind = 1; 496 clnt->cl_autobind = 1;
@@ -623,10 +628,12 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
623 /* Turn off autobind on clones */ 628 /* Turn off autobind on clones */
624 new->cl_autobind = 0; 629 new->cl_autobind = 0;
625 new->cl_softrtry = clnt->cl_softrtry; 630 new->cl_softrtry = clnt->cl_softrtry;
631 new->cl_softerr = clnt->cl_softerr;
626 new->cl_noretranstimeo = clnt->cl_noretranstimeo; 632 new->cl_noretranstimeo = clnt->cl_noretranstimeo;
627 new->cl_discrtry = clnt->cl_discrtry; 633 new->cl_discrtry = clnt->cl_discrtry;
628 new->cl_chatty = clnt->cl_chatty; 634 new->cl_chatty = clnt->cl_chatty;
629 new->cl_principal = clnt->cl_principal; 635 new->cl_principal = clnt->cl_principal;
636 new->cl_cred = get_cred(clnt->cl_cred);
630 return new; 637 return new;
631 638
632out_err: 639out_err:
@@ -648,6 +655,7 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt)
648 .prognumber = clnt->cl_prog, 655 .prognumber = clnt->cl_prog,
649 .version = clnt->cl_vers, 656 .version = clnt->cl_vers,
650 .authflavor = clnt->cl_auth->au_flavor, 657 .authflavor = clnt->cl_auth->au_flavor,
658 .cred = clnt->cl_cred,
651 }; 659 };
652 return __rpc_clone_client(&args, clnt); 660 return __rpc_clone_client(&args, clnt);
653} 661}
@@ -669,6 +677,7 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
669 .prognumber = clnt->cl_prog, 677 .prognumber = clnt->cl_prog,
670 .version = clnt->cl_vers, 678 .version = clnt->cl_vers,
671 .authflavor = flavor, 679 .authflavor = flavor,
680 .cred = clnt->cl_cred,
672 }; 681 };
673 return __rpc_clone_client(&args, clnt); 682 return __rpc_clone_client(&args, clnt);
674} 683}
@@ -827,14 +836,8 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
827 * Spin lock all_tasks to prevent changes... 836 * Spin lock all_tasks to prevent changes...
828 */ 837 */
829 spin_lock(&clnt->cl_lock); 838 spin_lock(&clnt->cl_lock);
830 list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) { 839 list_for_each_entry(rovr, &clnt->cl_tasks, tk_task)
831 if (!RPC_IS_ACTIVATED(rovr)) 840 rpc_signal_task(rovr);
832 continue;
833 if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
834 rovr->tk_flags |= RPC_TASK_KILLED;
835 rpc_exit(rovr, -EIO);
836 }
837 }
838 spin_unlock(&clnt->cl_lock); 841 spin_unlock(&clnt->cl_lock);
839} 842}
840EXPORT_SYMBOL_GPL(rpc_killall_tasks); 843EXPORT_SYMBOL_GPL(rpc_killall_tasks);
@@ -882,6 +885,7 @@ rpc_free_client(struct rpc_clnt *clnt)
882 xprt_put(rcu_dereference_raw(clnt->cl_xprt)); 885 xprt_put(rcu_dereference_raw(clnt->cl_xprt));
883 xprt_iter_destroy(&clnt->cl_xpi); 886 xprt_iter_destroy(&clnt->cl_xpi);
884 rpciod_down(); 887 rpciod_down();
888 put_cred(clnt->cl_cred);
885 rpc_free_clid(clnt); 889 rpc_free_clid(clnt);
886 kfree(clnt); 890 kfree(clnt);
887 return parent; 891 return parent;
@@ -946,6 +950,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
946 .prognumber = program->number, 950 .prognumber = program->number,
947 .version = vers, 951 .version = vers,
948 .authflavor = old->cl_auth->au_flavor, 952 .authflavor = old->cl_auth->au_flavor,
953 .cred = old->cl_cred,
949 }; 954 };
950 struct rpc_clnt *clnt; 955 struct rpc_clnt *clnt;
951 int err; 956 int err;
@@ -1007,6 +1012,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
1007 atomic_inc(&clnt->cl_count); 1012 atomic_inc(&clnt->cl_count);
1008 if (clnt->cl_softrtry) 1013 if (clnt->cl_softrtry)
1009 task->tk_flags |= RPC_TASK_SOFT; 1014 task->tk_flags |= RPC_TASK_SOFT;
1015 if (clnt->cl_softerr)
1016 task->tk_flags |= RPC_TASK_TIMEOUT;
1010 if (clnt->cl_noretranstimeo) 1017 if (clnt->cl_noretranstimeo)
1011 task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; 1018 task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT;
1012 if (atomic_read(&clnt->cl_swapper)) 1019 if (atomic_read(&clnt->cl_swapper))
@@ -1470,22 +1477,14 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
1470} 1477}
1471EXPORT_SYMBOL_GPL(rpc_force_rebind); 1478EXPORT_SYMBOL_GPL(rpc_force_rebind);
1472 1479
1473/* 1480static int
1474 * Restart an (async) RPC call from the call_prepare state. 1481__rpc_restart_call(struct rpc_task *task, void (*action)(struct rpc_task *))
1475 * Usually called from within the exit handler.
1476 */
1477int
1478rpc_restart_call_prepare(struct rpc_task *task)
1479{ 1482{
1480 if (RPC_ASSASSINATED(task))
1481 return 0;
1482 task->tk_action = call_start;
1483 task->tk_status = 0; 1483 task->tk_status = 0;
1484 if (task->tk_ops->rpc_call_prepare != NULL) 1484 task->tk_rpc_status = 0;
1485 task->tk_action = rpc_prepare_task; 1485 task->tk_action = action;
1486 return 1; 1486 return 1;
1487} 1487}
1488EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
1489 1488
1490/* 1489/*
1491 * Restart an (async) RPC call. Usually called from within the 1490 * Restart an (async) RPC call. Usually called from within the
@@ -1494,14 +1493,23 @@ EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
1494int 1493int
1495rpc_restart_call(struct rpc_task *task) 1494rpc_restart_call(struct rpc_task *task)
1496{ 1495{
1497 if (RPC_ASSASSINATED(task)) 1496 return __rpc_restart_call(task, call_start);
1498 return 0;
1499 task->tk_action = call_start;
1500 task->tk_status = 0;
1501 return 1;
1502} 1497}
1503EXPORT_SYMBOL_GPL(rpc_restart_call); 1498EXPORT_SYMBOL_GPL(rpc_restart_call);
1504 1499
1500/*
1501 * Restart an (async) RPC call from the call_prepare state.
1502 * Usually called from within the exit handler.
1503 */
1504int
1505rpc_restart_call_prepare(struct rpc_task *task)
1506{
1507 if (task->tk_ops->rpc_call_prepare != NULL)
1508 return __rpc_restart_call(task, rpc_prepare_task);
1509 return rpc_restart_call(task);
1510}
1511EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
1512
1505const char 1513const char
1506*rpc_proc_name(const struct rpc_task *task) 1514*rpc_proc_name(const struct rpc_task *task)
1507{ 1515{
@@ -1516,6 +1524,19 @@ const char
1516 return "no proc"; 1524 return "no proc";
1517} 1525}
1518 1526
1527static void
1528__rpc_call_rpcerror(struct rpc_task *task, int tk_status, int rpc_status)
1529{
1530 task->tk_rpc_status = rpc_status;
1531 rpc_exit(task, tk_status);
1532}
1533
1534static void
1535rpc_call_rpcerror(struct rpc_task *task, int status)
1536{
1537 __rpc_call_rpcerror(task, status, status);
1538}
1539
1519/* 1540/*
1520 * 0. Initial state 1541 * 0. Initial state
1521 * 1542 *
@@ -1580,7 +1601,7 @@ call_reserveresult(struct rpc_task *task)
1580 1601
1581 printk(KERN_ERR "%s: status=%d, but no request slot, exiting\n", 1602 printk(KERN_ERR "%s: status=%d, but no request slot, exiting\n",
1582 __func__, status); 1603 __func__, status);
1583 rpc_exit(task, -EIO); 1604 rpc_call_rpcerror(task, -EIO);
1584 return; 1605 return;
1585 } 1606 }
1586 1607
@@ -1608,7 +1629,7 @@ call_reserveresult(struct rpc_task *task)
1608 __func__, status); 1629 __func__, status);
1609 break; 1630 break;
1610 } 1631 }
1611 rpc_exit(task, status); 1632 rpc_call_rpcerror(task, status);
1612} 1633}
1613 1634
1614/* 1635/*
@@ -1676,7 +1697,7 @@ call_refreshresult(struct rpc_task *task)
1676 } 1697 }
1677 dprintk("RPC: %5u %s: refresh creds failed with error %d\n", 1698 dprintk("RPC: %5u %s: refresh creds failed with error %d\n",
1678 task->tk_pid, __func__, status); 1699 task->tk_pid, __func__, status);
1679 rpc_exit(task, status); 1700 rpc_call_rpcerror(task, status);
1680} 1701}
1681 1702
1682/* 1703/*
@@ -1727,7 +1748,7 @@ call_allocate(struct rpc_task *task)
1727 if (status == 0) 1748 if (status == 0)
1728 return; 1749 return;
1729 if (status != -ENOMEM) { 1750 if (status != -ENOMEM) {
1730 rpc_exit(task, status); 1751 rpc_call_rpcerror(task, status);
1731 return; 1752 return;
1732 } 1753 }
1733 1754
@@ -1793,10 +1814,17 @@ call_encode(struct rpc_task *task)
1793 rpc_delay(task, HZ >> 4); 1814 rpc_delay(task, HZ >> 4);
1794 break; 1815 break;
1795 case -EKEYEXPIRED: 1816 case -EKEYEXPIRED:
1796 task->tk_action = call_refresh; 1817 if (!task->tk_cred_retry) {
1818 rpc_exit(task, task->tk_status);
1819 } else {
1820 task->tk_action = call_refresh;
1821 task->tk_cred_retry--;
1822 dprintk("RPC: %5u %s: retry refresh creds\n",
1823 task->tk_pid, __func__);
1824 }
1797 break; 1825 break;
1798 default: 1826 default:
1799 rpc_exit(task, task->tk_status); 1827 rpc_call_rpcerror(task, task->tk_status);
1800 } 1828 }
1801 return; 1829 return;
1802 } else { 1830 } else {
@@ -1857,7 +1885,6 @@ call_bind(struct rpc_task *task)
1857 if (!xprt_prepare_transmit(task)) 1885 if (!xprt_prepare_transmit(task))
1858 return; 1886 return;
1859 1887
1860 task->tk_timeout = xprt->bind_timeout;
1861 xprt->ops->rpcbind(task); 1888 xprt->ops->rpcbind(task);
1862} 1889}
1863 1890
@@ -1938,7 +1965,7 @@ call_bind_status(struct rpc_task *task)
1938 task->tk_pid, -task->tk_status); 1965 task->tk_pid, -task->tk_status);
1939 } 1966 }
1940 1967
1941 rpc_exit(task, status); 1968 rpc_call_rpcerror(task, status);
1942 return; 1969 return;
1943 1970
1944retry_timeout: 1971retry_timeout:
@@ -1973,7 +2000,7 @@ call_connect(struct rpc_task *task)
1973 if (task->tk_status < 0) 2000 if (task->tk_status < 0)
1974 return; 2001 return;
1975 if (task->tk_flags & RPC_TASK_NOCONNECT) { 2002 if (task->tk_flags & RPC_TASK_NOCONNECT) {
1976 rpc_exit(task, -ENOTCONN); 2003 rpc_call_rpcerror(task, -ENOTCONN);
1977 return; 2004 return;
1978 } 2005 }
1979 if (!xprt_prepare_transmit(task)) 2006 if (!xprt_prepare_transmit(task))
@@ -2033,7 +2060,7 @@ call_connect_status(struct rpc_task *task)
2033 task->tk_action = call_transmit; 2060 task->tk_action = call_transmit;
2034 return; 2061 return;
2035 } 2062 }
2036 rpc_exit(task, status); 2063 rpc_call_rpcerror(task, status);
2037 return; 2064 return;
2038out_retry: 2065out_retry:
2039 /* Check for timeouts before looping back to call_bind */ 2066 /* Check for timeouts before looping back to call_bind */
@@ -2118,7 +2145,7 @@ call_transmit_status(struct rpc_task *task)
2118 if (!task->tk_msg.rpc_proc->p_proc) 2145 if (!task->tk_msg.rpc_proc->p_proc)
2119 trace_xprt_ping(task->tk_xprt, 2146 trace_xprt_ping(task->tk_xprt,
2120 task->tk_status); 2147 task->tk_status);
2121 rpc_exit(task, task->tk_status); 2148 rpc_call_rpcerror(task, task->tk_status);
2122 return; 2149 return;
2123 } 2150 }
2124 /* fall through */ 2151 /* fall through */
@@ -2282,7 +2309,7 @@ call_status(struct rpc_task *task)
2282 rpc_check_timeout(task); 2309 rpc_check_timeout(task);
2283 return; 2310 return;
2284out_exit: 2311out_exit:
2285 rpc_exit(task, status); 2312 rpc_call_rpcerror(task, status);
2286} 2313}
2287 2314
2288static bool 2315static bool
@@ -2306,29 +2333,40 @@ rpc_check_timeout(struct rpc_task *task)
2306 task->tk_timeouts++; 2333 task->tk_timeouts++;
2307 2334
2308 if (RPC_IS_SOFTCONN(task) && !rpc_check_connected(task->tk_rqstp)) { 2335 if (RPC_IS_SOFTCONN(task) && !rpc_check_connected(task->tk_rqstp)) {
2309 rpc_exit(task, -ETIMEDOUT); 2336 rpc_call_rpcerror(task, -ETIMEDOUT);
2310 return; 2337 return;
2311 } 2338 }
2312 2339
2313 if (RPC_IS_SOFT(task)) { 2340 if (RPC_IS_SOFT(task)) {
2341 /*
2342 * Once a "no retrans timeout" soft tasks (a.k.a NFSv4) has
2343 * been sent, it should time out only if the transport
2344 * connection gets terminally broken.
2345 */
2346 if ((task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) &&
2347 rpc_check_connected(task->tk_rqstp))
2348 return;
2349
2314 if (clnt->cl_chatty) { 2350 if (clnt->cl_chatty) {
2315 printk(KERN_NOTICE "%s: server %s not responding, timed out\n", 2351 pr_notice_ratelimited(
2352 "%s: server %s not responding, timed out\n",
2316 clnt->cl_program->name, 2353 clnt->cl_program->name,
2317 task->tk_xprt->servername); 2354 task->tk_xprt->servername);
2318 } 2355 }
2319 if (task->tk_flags & RPC_TASK_TIMEOUT) 2356 if (task->tk_flags & RPC_TASK_TIMEOUT)
2320 rpc_exit(task, -ETIMEDOUT); 2357 rpc_call_rpcerror(task, -ETIMEDOUT);
2321 else 2358 else
2322 rpc_exit(task, -EIO); 2359 __rpc_call_rpcerror(task, -EIO, -ETIMEDOUT);
2323 return; 2360 return;
2324 } 2361 }
2325 2362
2326 if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) { 2363 if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) {
2327 task->tk_flags |= RPC_CALL_MAJORSEEN; 2364 task->tk_flags |= RPC_CALL_MAJORSEEN;
2328 if (clnt->cl_chatty) { 2365 if (clnt->cl_chatty) {
2329 printk(KERN_NOTICE "%s: server %s not responding, still trying\n", 2366 pr_notice_ratelimited(
2330 clnt->cl_program->name, 2367 "%s: server %s not responding, still trying\n",
2331 task->tk_xprt->servername); 2368 clnt->cl_program->name,
2369 task->tk_xprt->servername);
2332 } 2370 }
2333 } 2371 }
2334 rpc_force_rebind(clnt); 2372 rpc_force_rebind(clnt);
@@ -2358,7 +2396,7 @@ call_decode(struct rpc_task *task)
2358 2396
2359 if (task->tk_flags & RPC_CALL_MAJORSEEN) { 2397 if (task->tk_flags & RPC_CALL_MAJORSEEN) {
2360 if (clnt->cl_chatty) { 2398 if (clnt->cl_chatty) {
2361 printk(KERN_NOTICE "%s: server %s OK\n", 2399 pr_notice_ratelimited("%s: server %s OK\n",
2362 clnt->cl_program->name, 2400 clnt->cl_program->name,
2363 task->tk_xprt->servername); 2401 task->tk_xprt->servername);
2364 } 2402 }
@@ -2881,7 +2919,7 @@ static void rpc_show_task(const struct rpc_clnt *clnt,
2881 2919
2882 printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n", 2920 printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n",
2883 task->tk_pid, task->tk_flags, task->tk_status, 2921 task->tk_pid, task->tk_flags, task->tk_status,
2884 clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops, 2922 clnt, task->tk_rqstp, rpc_task_timeout(task), task->tk_ops,
2885 clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task), 2923 clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task),
2886 task->tk_action, rpc_waitq); 2924 task->tk_action, rpc_waitq);
2887} 2925}
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 19bb356230ed..95ebd76b132d 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -33,7 +33,7 @@ tasks_show(struct seq_file *f, void *v)
33 33
34 seq_printf(f, "%5u %04x %6d 0x%x 0x%x %8ld %ps %sv%u %s a:%ps q:%s\n", 34 seq_printf(f, "%5u %04x %6d 0x%x 0x%x %8ld %ps %sv%u %s a:%ps q:%s\n",
35 task->tk_pid, task->tk_flags, task->tk_status, 35 task->tk_pid, task->tk_flags, task->tk_status,
36 clnt->cl_clid, xid, task->tk_timeout, task->tk_ops, 36 clnt->cl_clid, xid, rpc_task_timeout(task), task->tk_ops,
37 clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task), 37 clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task),
38 task->tk_action, rpc_waitq); 38 task->tk_action, rpc_waitq);
39 return 0; 39 return 0;
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 41a971ac1c63..2277b7cdad27 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -240,6 +240,7 @@ static int rpcb_create_local_unix(struct net *net)
240 .program = &rpcb_program, 240 .program = &rpcb_program,
241 .version = RPCBVERS_2, 241 .version = RPCBVERS_2,
242 .authflavor = RPC_AUTH_NULL, 242 .authflavor = RPC_AUTH_NULL,
243 .cred = current_cred(),
243 /* 244 /*
244 * We turn off the idle timeout to prevent the kernel 245 * We turn off the idle timeout to prevent the kernel
245 * from automatically disconnecting the socket. 246 * from automatically disconnecting the socket.
@@ -299,6 +300,7 @@ static int rpcb_create_local_net(struct net *net)
299 .program = &rpcb_program, 300 .program = &rpcb_program,
300 .version = RPCBVERS_2, 301 .version = RPCBVERS_2,
301 .authflavor = RPC_AUTH_UNIX, 302 .authflavor = RPC_AUTH_UNIX,
303 .cred = current_cred(),
302 .flags = RPC_CLNT_CREATE_NOPING, 304 .flags = RPC_CLNT_CREATE_NOPING,
303 }; 305 };
304 struct rpc_clnt *clnt, *clnt4; 306 struct rpc_clnt *clnt, *clnt4;
@@ -358,7 +360,8 @@ out:
358static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename, 360static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename,
359 const char *hostname, 361 const char *hostname,
360 struct sockaddr *srvaddr, size_t salen, 362 struct sockaddr *srvaddr, size_t salen,
361 int proto, u32 version) 363 int proto, u32 version,
364 const struct cred *cred)
362{ 365{
363 struct rpc_create_args args = { 366 struct rpc_create_args args = {
364 .net = net, 367 .net = net,
@@ -370,6 +373,7 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename,
370 .program = &rpcb_program, 373 .program = &rpcb_program,
371 .version = version, 374 .version = version,
372 .authflavor = RPC_AUTH_UNIX, 375 .authflavor = RPC_AUTH_UNIX,
376 .cred = cred,
373 .flags = (RPC_CLNT_CREATE_NOPING | 377 .flags = (RPC_CLNT_CREATE_NOPING |
374 RPC_CLNT_CREATE_NONPRIVPORT), 378 RPC_CLNT_CREATE_NONPRIVPORT),
375 }; 379 };
@@ -694,7 +698,8 @@ void rpcb_getport_async(struct rpc_task *task)
694 698
695 /* Put self on the wait queue to ensure we get notified if 699 /* Put self on the wait queue to ensure we get notified if
696 * some other task is already attempting to bind the port */ 700 * some other task is already attempting to bind the port */
697 rpc_sleep_on(&xprt->binding, task, NULL); 701 rpc_sleep_on_timeout(&xprt->binding, task,
702 NULL, jiffies + xprt->bind_timeout);
698 703
699 if (xprt_test_and_set_binding(xprt)) { 704 if (xprt_test_and_set_binding(xprt)) {
700 dprintk("RPC: %5u %s: waiting for another binder\n", 705 dprintk("RPC: %5u %s: waiting for another binder\n",
@@ -744,7 +749,8 @@ void rpcb_getport_async(struct rpc_task *task)
744 rpcb_clnt = rpcb_create(xprt->xprt_net, 749 rpcb_clnt = rpcb_create(xprt->xprt_net,
745 clnt->cl_nodename, 750 clnt->cl_nodename,
746 xprt->servername, sap, salen, 751 xprt->servername, sap, salen,
747 xprt->prot, bind_version); 752 xprt->prot, bind_version,
753 clnt->cl_cred);
748 if (IS_ERR(rpcb_clnt)) { 754 if (IS_ERR(rpcb_clnt)) {
749 status = PTR_ERR(rpcb_clnt); 755 status = PTR_ERR(rpcb_clnt);
750 dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", 756 dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 28956c70100a..1a12fb03e611 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -58,6 +58,20 @@ static struct rpc_wait_queue delay_queue;
58struct workqueue_struct *rpciod_workqueue __read_mostly; 58struct workqueue_struct *rpciod_workqueue __read_mostly;
59struct workqueue_struct *xprtiod_workqueue __read_mostly; 59struct workqueue_struct *xprtiod_workqueue __read_mostly;
60 60
61unsigned long
62rpc_task_timeout(const struct rpc_task *task)
63{
64 unsigned long timeout = READ_ONCE(task->tk_timeout);
65
66 if (timeout != 0) {
67 unsigned long now = jiffies;
68 if (time_before(now, timeout))
69 return timeout - now;
70 }
71 return 0;
72}
73EXPORT_SYMBOL_GPL(rpc_task_timeout);
74
61/* 75/*
62 * Disable the timer for a given RPC task. Should be called with 76 * Disable the timer for a given RPC task. Should be called with
63 * queue->lock and bh_disabled in order to avoid races within 77 * queue->lock and bh_disabled in order to avoid races within
@@ -66,7 +80,7 @@ struct workqueue_struct *xprtiod_workqueue __read_mostly;
66static void 80static void
67__rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task) 81__rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
68{ 82{
69 if (task->tk_timeout == 0) 83 if (list_empty(&task->u.tk_wait.timer_list))
70 return; 84 return;
71 dprintk("RPC: %5u disabling timer\n", task->tk_pid); 85 dprintk("RPC: %5u disabling timer\n", task->tk_pid);
72 task->tk_timeout = 0; 86 task->tk_timeout = 0;
@@ -78,25 +92,21 @@ __rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
78static void 92static void
79rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires) 93rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires)
80{ 94{
81 queue->timer_list.expires = expires; 95 timer_reduce(&queue->timer_list.timer, expires);
82 mod_timer(&queue->timer_list.timer, expires);
83} 96}
84 97
85/* 98/*
86 * Set up a timer for the current task. 99 * Set up a timer for the current task.
87 */ 100 */
88static void 101static void
89__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) 102__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task,
103 unsigned long timeout)
90{ 104{
91 if (!task->tk_timeout)
92 return;
93
94 dprintk("RPC: %5u setting alarm for %u ms\n", 105 dprintk("RPC: %5u setting alarm for %u ms\n",
95 task->tk_pid, jiffies_to_msecs(task->tk_timeout)); 106 task->tk_pid, jiffies_to_msecs(timeout - jiffies));
96 107
97 task->u.tk_wait.expires = jiffies + task->tk_timeout; 108 task->tk_timeout = timeout;
98 if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires)) 109 rpc_set_queue_timer(queue, timeout);
99 rpc_set_queue_timer(queue, task->u.tk_wait.expires);
100 list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list); 110 list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
101} 111}
102 112
@@ -188,6 +198,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
188 if (RPC_IS_QUEUED(task)) 198 if (RPC_IS_QUEUED(task))
189 return; 199 return;
190 200
201 INIT_LIST_HEAD(&task->u.tk_wait.timer_list);
191 if (RPC_IS_PRIORITY(queue)) 202 if (RPC_IS_PRIORITY(queue))
192 __rpc_add_wait_queue_priority(queue, task, queue_priority); 203 __rpc_add_wait_queue_priority(queue, task, queue_priority);
193 else if (RPC_IS_SWAPPER(task)) 204 else if (RPC_IS_SWAPPER(task))
@@ -238,7 +249,9 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c
238 queue->maxpriority = nr_queues - 1; 249 queue->maxpriority = nr_queues - 1;
239 rpc_reset_waitqueue_priority(queue); 250 rpc_reset_waitqueue_priority(queue);
240 queue->qlen = 0; 251 queue->qlen = 0;
241 timer_setup(&queue->timer_list.timer, __rpc_queue_timer_fn, 0); 252 timer_setup(&queue->timer_list.timer,
253 __rpc_queue_timer_fn,
254 TIMER_DEFERRABLE);
242 INIT_LIST_HEAD(&queue->timer_list.list); 255 INIT_LIST_HEAD(&queue->timer_list.list);
243 rpc_assign_waitqueue_name(queue, qname); 256 rpc_assign_waitqueue_name(queue, qname);
244} 257}
@@ -362,7 +375,6 @@ static void rpc_make_runnable(struct workqueue_struct *wq,
362 */ 375 */
363static void __rpc_sleep_on_priority(struct rpc_wait_queue *q, 376static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
364 struct rpc_task *task, 377 struct rpc_task *task,
365 rpc_action action,
366 unsigned char queue_priority) 378 unsigned char queue_priority)
367{ 379{
368 dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", 380 dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
@@ -372,47 +384,100 @@ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
372 384
373 __rpc_add_wait_queue(q, task, queue_priority); 385 __rpc_add_wait_queue(q, task, queue_priority);
374 386
375 WARN_ON_ONCE(task->tk_callback != NULL);
376 task->tk_callback = action;
377 __rpc_add_timer(q, task);
378} 387}
379 388
380void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, 389static void __rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q,
381 rpc_action action) 390 struct rpc_task *task, unsigned long timeout,
391 unsigned char queue_priority)
392{
393 if (time_is_after_jiffies(timeout)) {
394 __rpc_sleep_on_priority(q, task, queue_priority);
395 __rpc_add_timer(q, task, timeout);
396 } else
397 task->tk_status = -ETIMEDOUT;
398}
399
400static void rpc_set_tk_callback(struct rpc_task *task, rpc_action action)
401{
402 if (action && !WARN_ON_ONCE(task->tk_callback != NULL))
403 task->tk_callback = action;
404}
405
406static bool rpc_sleep_check_activated(struct rpc_task *task)
382{ 407{
383 /* We shouldn't ever put an inactive task to sleep */ 408 /* We shouldn't ever put an inactive task to sleep */
384 WARN_ON_ONCE(!RPC_IS_ACTIVATED(task)); 409 if (WARN_ON_ONCE(!RPC_IS_ACTIVATED(task))) {
385 if (!RPC_IS_ACTIVATED(task)) {
386 task->tk_status = -EIO; 410 task->tk_status = -EIO;
387 rpc_put_task_async(task); 411 rpc_put_task_async(task);
388 return; 412 return false;
389 } 413 }
414 return true;
415}
416
417void rpc_sleep_on_timeout(struct rpc_wait_queue *q, struct rpc_task *task,
418 rpc_action action, unsigned long timeout)
419{
420 if (!rpc_sleep_check_activated(task))
421 return;
422
423 rpc_set_tk_callback(task, action);
424
425 /*
426 * Protect the queue operations.
427 */
428 spin_lock_bh(&q->lock);
429 __rpc_sleep_on_priority_timeout(q, task, timeout, task->tk_priority);
430 spin_unlock_bh(&q->lock);
431}
432EXPORT_SYMBOL_GPL(rpc_sleep_on_timeout);
390 433
434void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
435 rpc_action action)
436{
437 if (!rpc_sleep_check_activated(task))
438 return;
439
440 rpc_set_tk_callback(task, action);
441
442 WARN_ON_ONCE(task->tk_timeout != 0);
391 /* 443 /*
392 * Protect the queue operations. 444 * Protect the queue operations.
393 */ 445 */
394 spin_lock_bh(&q->lock); 446 spin_lock_bh(&q->lock);
395 __rpc_sleep_on_priority(q, task, action, task->tk_priority); 447 __rpc_sleep_on_priority(q, task, task->tk_priority);
396 spin_unlock_bh(&q->lock); 448 spin_unlock_bh(&q->lock);
397} 449}
398EXPORT_SYMBOL_GPL(rpc_sleep_on); 450EXPORT_SYMBOL_GPL(rpc_sleep_on);
399 451
452void rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q,
453 struct rpc_task *task, unsigned long timeout, int priority)
454{
455 if (!rpc_sleep_check_activated(task))
456 return;
457
458 priority -= RPC_PRIORITY_LOW;
459 /*
460 * Protect the queue operations.
461 */
462 spin_lock_bh(&q->lock);
463 __rpc_sleep_on_priority_timeout(q, task, timeout, priority);
464 spin_unlock_bh(&q->lock);
465}
466EXPORT_SYMBOL_GPL(rpc_sleep_on_priority_timeout);
467
400void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, 468void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task,
401 rpc_action action, int priority) 469 int priority)
402{ 470{
403 /* We shouldn't ever put an inactive task to sleep */ 471 if (!rpc_sleep_check_activated(task))
404 WARN_ON_ONCE(!RPC_IS_ACTIVATED(task));
405 if (!RPC_IS_ACTIVATED(task)) {
406 task->tk_status = -EIO;
407 rpc_put_task_async(task);
408 return; 472 return;
409 }
410 473
474 WARN_ON_ONCE(task->tk_timeout != 0);
475 priority -= RPC_PRIORITY_LOW;
411 /* 476 /*
412 * Protect the queue operations. 477 * Protect the queue operations.
413 */ 478 */
414 spin_lock_bh(&q->lock); 479 spin_lock_bh(&q->lock);
415 __rpc_sleep_on_priority(q, task, action, priority - RPC_PRIORITY_LOW); 480 __rpc_sleep_on_priority(q, task, priority);
416 spin_unlock_bh(&q->lock); 481 spin_unlock_bh(&q->lock);
417} 482}
418EXPORT_SYMBOL_GPL(rpc_sleep_on_priority); 483EXPORT_SYMBOL_GPL(rpc_sleep_on_priority);
@@ -704,7 +769,7 @@ static void __rpc_queue_timer_fn(struct timer_list *t)
704 spin_lock(&queue->lock); 769 spin_lock(&queue->lock);
705 expires = now = jiffies; 770 expires = now = jiffies;
706 list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) { 771 list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) {
707 timeo = task->u.tk_wait.expires; 772 timeo = task->tk_timeout;
708 if (time_after_eq(now, timeo)) { 773 if (time_after_eq(now, timeo)) {
709 dprintk("RPC: %5u timeout\n", task->tk_pid); 774 dprintk("RPC: %5u timeout\n", task->tk_pid);
710 task->tk_status = -ETIMEDOUT; 775 task->tk_status = -ETIMEDOUT;
@@ -730,8 +795,7 @@ static void __rpc_atrun(struct rpc_task *task)
730 */ 795 */
731void rpc_delay(struct rpc_task *task, unsigned long delay) 796void rpc_delay(struct rpc_task *task, unsigned long delay)
732{ 797{
733 task->tk_timeout = delay; 798 rpc_sleep_on_timeout(&delay_queue, task, __rpc_atrun, jiffies + delay);
734 rpc_sleep_on(&delay_queue, task, __rpc_atrun);
735} 799}
736EXPORT_SYMBOL_GPL(rpc_delay); 800EXPORT_SYMBOL_GPL(rpc_delay);
737 801
@@ -759,8 +823,7 @@ static void
759rpc_reset_task_statistics(struct rpc_task *task) 823rpc_reset_task_statistics(struct rpc_task *task)
760{ 824{
761 task->tk_timeouts = 0; 825 task->tk_timeouts = 0;
762 task->tk_flags &= ~(RPC_CALL_MAJORSEEN|RPC_TASK_KILLED|RPC_TASK_SENT); 826 task->tk_flags &= ~(RPC_CALL_MAJORSEEN|RPC_TASK_SENT);
763
764 rpc_init_task_statistics(task); 827 rpc_init_task_statistics(task);
765} 828}
766 829
@@ -773,7 +836,6 @@ void rpc_exit_task(struct rpc_task *task)
773 if (task->tk_ops->rpc_call_done != NULL) { 836 if (task->tk_ops->rpc_call_done != NULL) {
774 task->tk_ops->rpc_call_done(task, task->tk_calldata); 837 task->tk_ops->rpc_call_done(task, task->tk_calldata);
775 if (task->tk_action != NULL) { 838 if (task->tk_action != NULL) {
776 WARN_ON(RPC_ASSASSINATED(task));
777 /* Always release the RPC slot and buffer memory */ 839 /* Always release the RPC slot and buffer memory */
778 xprt_release(task); 840 xprt_release(task);
779 rpc_reset_task_statistics(task); 841 rpc_reset_task_statistics(task);
@@ -781,6 +843,19 @@ void rpc_exit_task(struct rpc_task *task)
781 } 843 }
782} 844}
783 845
846void rpc_signal_task(struct rpc_task *task)
847{
848 struct rpc_wait_queue *queue;
849
850 if (!RPC_IS_ACTIVATED(task))
851 return;
852 set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
853 smp_mb__after_atomic();
854 queue = READ_ONCE(task->tk_waitqueue);
855 if (queue)
856 rpc_wake_up_queued_task_set_status(queue, task, -ERESTARTSYS);
857}
858
784void rpc_exit(struct rpc_task *task, int status) 859void rpc_exit(struct rpc_task *task, int status)
785{ 860{
786 task->tk_status = status; 861 task->tk_status = status;
@@ -836,6 +911,13 @@ static void __rpc_execute(struct rpc_task *task)
836 */ 911 */
837 if (!RPC_IS_QUEUED(task)) 912 if (!RPC_IS_QUEUED(task))
838 continue; 913 continue;
914
915 /*
916 * Signalled tasks should exit rather than sleep.
917 */
918 if (RPC_SIGNALLED(task))
919 rpc_exit(task, -ERESTARTSYS);
920
839 /* 921 /*
840 * The queue->lock protects against races with 922 * The queue->lock protects against races with
841 * rpc_make_runnable(). 923 * rpc_make_runnable().
@@ -861,7 +943,7 @@ static void __rpc_execute(struct rpc_task *task)
861 status = out_of_line_wait_on_bit(&task->tk_runstate, 943 status = out_of_line_wait_on_bit(&task->tk_runstate,
862 RPC_TASK_QUEUED, rpc_wait_bit_killable, 944 RPC_TASK_QUEUED, rpc_wait_bit_killable,
863 TASK_KILLABLE); 945 TASK_KILLABLE);
864 if (status == -ERESTARTSYS) { 946 if (status < 0) {
865 /* 947 /*
866 * When a sync task receives a signal, it exits with 948 * When a sync task receives a signal, it exits with
867 * -ERESTARTSYS. In order to catch any callbacks that 949 * -ERESTARTSYS. In order to catch any callbacks that
@@ -869,7 +951,7 @@ static void __rpc_execute(struct rpc_task *task)
869 * break the loop here, but go around once more. 951 * break the loop here, but go around once more.
870 */ 952 */
871 dprintk("RPC: %5u got signal\n", task->tk_pid); 953 dprintk("RPC: %5u got signal\n", task->tk_pid);
872 task->tk_flags |= RPC_TASK_KILLED; 954 set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
873 rpc_exit(task, -ERESTARTSYS); 955 rpc_exit(task, -ERESTARTSYS);
874 } 956 }
875 dprintk("RPC: %5u sync task resuming\n", task->tk_pid); 957 dprintk("RPC: %5u sync task resuming\n", task->tk_pid);
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 7e55cfc69697..9faea12624a6 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -106,7 +106,7 @@ xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb
106 /* ACL likes to be lazy in allocating pages - ACLs 106 /* ACL likes to be lazy in allocating pages - ACLs
107 * are small by default but can get huge. */ 107 * are small by default but can get huge. */
108 if ((xdr->flags & XDRBUF_SPARSE_PAGES) && *ppage == NULL) { 108 if ((xdr->flags & XDRBUF_SPARSE_PAGES) && *ppage == NULL) {
109 *ppage = alloc_page(GFP_ATOMIC); 109 *ppage = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
110 if (unlikely(*ppage == NULL)) { 110 if (unlikely(*ppage == NULL)) {
111 if (copied == 0) 111 if (copied == 0)
112 copied = -ENOMEM; 112 copied = -ENOMEM;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index d7117d241460..a9d40bc7ebed 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -73,6 +73,15 @@ static void xprt_destroy(struct rpc_xprt *xprt);
73static DEFINE_SPINLOCK(xprt_list_lock); 73static DEFINE_SPINLOCK(xprt_list_lock);
74static LIST_HEAD(xprt_list); 74static LIST_HEAD(xprt_list);
75 75
76static unsigned long xprt_request_timeout(const struct rpc_rqst *req)
77{
78 unsigned long timeout = jiffies + req->rq_timeout;
79
80 if (time_before(timeout, req->rq_majortimeo))
81 return timeout;
82 return req->rq_majortimeo;
83}
84
76/** 85/**
77 * xprt_register_transport - register a transport implementation 86 * xprt_register_transport - register a transport implementation
78 * @transport: transport to register 87 * @transport: transport to register
@@ -209,9 +218,12 @@ out_unlock:
209out_sleep: 218out_sleep:
210 dprintk("RPC: %5u failed to lock transport %p\n", 219 dprintk("RPC: %5u failed to lock transport %p\n",
211 task->tk_pid, xprt); 220 task->tk_pid, xprt);
212 task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0;
213 task->tk_status = -EAGAIN; 221 task->tk_status = -EAGAIN;
214 rpc_sleep_on(&xprt->sending, task, NULL); 222 if (RPC_IS_SOFT(task))
223 rpc_sleep_on_timeout(&xprt->sending, task, NULL,
224 xprt_request_timeout(req));
225 else
226 rpc_sleep_on(&xprt->sending, task, NULL);
215 return 0; 227 return 0;
216} 228}
217EXPORT_SYMBOL_GPL(xprt_reserve_xprt); 229EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
@@ -273,9 +285,12 @@ out_unlock:
273 xprt_clear_locked(xprt); 285 xprt_clear_locked(xprt);
274out_sleep: 286out_sleep:
275 dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); 287 dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
276 task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0;
277 task->tk_status = -EAGAIN; 288 task->tk_status = -EAGAIN;
278 rpc_sleep_on(&xprt->sending, task, NULL); 289 if (RPC_IS_SOFT(task))
290 rpc_sleep_on_timeout(&xprt->sending, task, NULL,
291 xprt_request_timeout(req));
292 else
293 rpc_sleep_on(&xprt->sending, task, NULL);
279 return 0; 294 return 0;
280} 295}
281EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); 296EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);
@@ -554,53 +569,44 @@ bool xprt_write_space(struct rpc_xprt *xprt)
554} 569}
555EXPORT_SYMBOL_GPL(xprt_write_space); 570EXPORT_SYMBOL_GPL(xprt_write_space);
556 571
557/** 572static unsigned long xprt_abs_ktime_to_jiffies(ktime_t abstime)
558 * xprt_set_retrans_timeout_def - set a request's retransmit timeout
559 * @task: task whose timeout is to be set
560 *
561 * Set a request's retransmit timeout based on the transport's
562 * default timeout parameters. Used by transports that don't adjust
563 * the retransmit timeout based on round-trip time estimation.
564 */
565void xprt_set_retrans_timeout_def(struct rpc_task *task)
566{ 573{
567 task->tk_timeout = task->tk_rqstp->rq_timeout; 574 s64 delta = ktime_to_ns(ktime_get() - abstime);
575 return likely(delta >= 0) ?
576 jiffies - nsecs_to_jiffies(delta) :
577 jiffies + nsecs_to_jiffies(-delta);
568} 578}
569EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_def);
570 579
571/** 580static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req)
572 * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout
573 * @task: task whose timeout is to be set
574 *
575 * Set a request's retransmit timeout using the RTT estimator.
576 */
577void xprt_set_retrans_timeout_rtt(struct rpc_task *task)
578{ 581{
579 int timer = task->tk_msg.rpc_proc->p_timer; 582 const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
580 struct rpc_clnt *clnt = task->tk_client; 583 unsigned long majortimeo = req->rq_timeout;
581 struct rpc_rtt *rtt = clnt->cl_rtt;
582 struct rpc_rqst *req = task->tk_rqstp;
583 unsigned long max_timeout = clnt->cl_timeout->to_maxval;
584 584
585 task->tk_timeout = rpc_calc_rto(rtt, timer); 585 if (to->to_exponential)
586 task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries; 586 majortimeo <<= to->to_retries;
587 if (task->tk_timeout > max_timeout || task->tk_timeout == 0) 587 else
588 task->tk_timeout = max_timeout; 588 majortimeo += to->to_increment * to->to_retries;
589 if (majortimeo > to->to_maxval || majortimeo == 0)
590 majortimeo = to->to_maxval;
591 return majortimeo;
589} 592}
590EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_rtt);
591 593
592static void xprt_reset_majortimeo(struct rpc_rqst *req) 594static void xprt_reset_majortimeo(struct rpc_rqst *req)
593{ 595{
594 const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout; 596 req->rq_majortimeo += xprt_calc_majortimeo(req);
597}
595 598
596 req->rq_majortimeo = req->rq_timeout; 599static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
597 if (to->to_exponential) 600{
598 req->rq_majortimeo <<= to->to_retries; 601 unsigned long time_init;
602 struct rpc_xprt *xprt = req->rq_xprt;
603
604 if (likely(xprt && xprt_connected(xprt)))
605 time_init = jiffies;
599 else 606 else
600 req->rq_majortimeo += to->to_increment * to->to_retries; 607 time_init = xprt_abs_ktime_to_jiffies(task->tk_start);
601 if (req->rq_majortimeo > to->to_maxval || req->rq_majortimeo == 0) 608 req->rq_timeout = task->tk_client->cl_timeout->to_initval;
602 req->rq_majortimeo = to->to_maxval; 609 req->rq_majortimeo = time_init + xprt_calc_majortimeo(req);
603 req->rq_majortimeo += jiffies;
604} 610}
605 611
606/** 612/**
@@ -822,9 +828,9 @@ void xprt_connect(struct rpc_task *task)
822 xprt->ops->close(xprt); 828 xprt->ops->close(xprt);
823 829
824 if (!xprt_connected(xprt)) { 830 if (!xprt_connected(xprt)) {
825 task->tk_timeout = task->tk_rqstp->rq_timeout;
826 task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie; 831 task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie;
827 rpc_sleep_on(&xprt->pending, task, NULL); 832 rpc_sleep_on_timeout(&xprt->pending, task, NULL,
833 xprt_request_timeout(task->tk_rqstp));
828 834
829 if (test_bit(XPRT_CLOSING, &xprt->state)) 835 if (test_bit(XPRT_CLOSING, &xprt->state))
830 return; 836 return;
@@ -949,7 +955,7 @@ xprt_is_pinned_rqst(struct rpc_rqst *req)
949 * @req: Request to pin 955 * @req: Request to pin
950 * 956 *
951 * Caller must ensure this is atomic with the call to xprt_lookup_rqst() 957 * Caller must ensure this is atomic with the call to xprt_lookup_rqst()
952 * so should be holding the xprt receive lock. 958 * so should be holding xprt->queue_lock.
953 */ 959 */
954void xprt_pin_rqst(struct rpc_rqst *req) 960void xprt_pin_rqst(struct rpc_rqst *req)
955{ 961{
@@ -961,7 +967,7 @@ EXPORT_SYMBOL_GPL(xprt_pin_rqst);
961 * xprt_unpin_rqst - Unpin a request on the transport receive list 967 * xprt_unpin_rqst - Unpin a request on the transport receive list
962 * @req: Request to pin 968 * @req: Request to pin
963 * 969 *
964 * Caller should be holding the xprt receive lock. 970 * Caller should be holding xprt->queue_lock.
965 */ 971 */
966void xprt_unpin_rqst(struct rpc_rqst *req) 972void xprt_unpin_rqst(struct rpc_rqst *req)
967{ 973{
@@ -1017,7 +1023,6 @@ xprt_request_enqueue_receive(struct rpc_task *task)
1017 set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate); 1023 set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate);
1018 spin_unlock(&xprt->queue_lock); 1024 spin_unlock(&xprt->queue_lock);
1019 1025
1020 xprt_reset_majortimeo(req);
1021 /* Turn off autodisconnect */ 1026 /* Turn off autodisconnect */
1022 del_singleshot_timer_sync(&xprt->timer); 1027 del_singleshot_timer_sync(&xprt->timer);
1023} 1028}
@@ -1103,6 +1108,49 @@ static void xprt_timer(struct rpc_task *task)
1103} 1108}
1104 1109
1105/** 1110/**
1111 * xprt_wait_for_reply_request_def - wait for reply
1112 * @task: pointer to rpc_task
1113 *
1114 * Set a request's retransmit timeout based on the transport's
1115 * default timeout parameters. Used by transports that don't adjust
1116 * the retransmit timeout based on round-trip time estimation,
1117 * and put the task to sleep on the pending queue.
1118 */
1119void xprt_wait_for_reply_request_def(struct rpc_task *task)
1120{
1121 struct rpc_rqst *req = task->tk_rqstp;
1122
1123 rpc_sleep_on_timeout(&req->rq_xprt->pending, task, xprt_timer,
1124 xprt_request_timeout(req));
1125}
1126EXPORT_SYMBOL_GPL(xprt_wait_for_reply_request_def);
1127
1128/**
1129 * xprt_wait_for_reply_request_rtt - wait for reply using RTT estimator
1130 * @task: pointer to rpc_task
1131 *
1132 * Set a request's retransmit timeout using the RTT estimator,
1133 * and put the task to sleep on the pending queue.
1134 */
1135void xprt_wait_for_reply_request_rtt(struct rpc_task *task)
1136{
1137 int timer = task->tk_msg.rpc_proc->p_timer;
1138 struct rpc_clnt *clnt = task->tk_client;
1139 struct rpc_rtt *rtt = clnt->cl_rtt;
1140 struct rpc_rqst *req = task->tk_rqstp;
1141 unsigned long max_timeout = clnt->cl_timeout->to_maxval;
1142 unsigned long timeout;
1143
1144 timeout = rpc_calc_rto(rtt, timer);
1145 timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries;
1146 if (timeout > max_timeout || timeout == 0)
1147 timeout = max_timeout;
1148 rpc_sleep_on_timeout(&req->rq_xprt->pending, task, xprt_timer,
1149 jiffies + timeout);
1150}
1151EXPORT_SYMBOL_GPL(xprt_wait_for_reply_request_rtt);
1152
1153/**
1106 * xprt_request_wait_receive - wait for the reply to an RPC request 1154 * xprt_request_wait_receive - wait for the reply to an RPC request
1107 * @task: RPC task about to send a request 1155 * @task: RPC task about to send a request
1108 * 1156 *
@@ -1121,8 +1169,7 @@ void xprt_request_wait_receive(struct rpc_task *task)
1121 */ 1169 */
1122 spin_lock(&xprt->queue_lock); 1170 spin_lock(&xprt->queue_lock);
1123 if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) { 1171 if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) {
1124 xprt->ops->set_retrans_timeout(task); 1172 xprt->ops->wait_for_reply_request(task);
1125 rpc_sleep_on(&xprt->pending, task, xprt_timer);
1126 /* 1173 /*
1127 * Send an extra queue wakeup call if the 1174 * Send an extra queue wakeup call if the
1128 * connection was dropped in case the call to 1175 * connection was dropped in case the call to
@@ -1337,6 +1384,10 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
1337 if (status < 0) 1384 if (status < 0)
1338 goto out_dequeue; 1385 goto out_dequeue;
1339 } 1386 }
1387 if (RPC_SIGNALLED(task)) {
1388 status = -ERESTARTSYS;
1389 goto out_dequeue;
1390 }
1340 } 1391 }
1341 1392
1342 /* 1393 /*
@@ -1605,7 +1656,6 @@ xprt_request_init(struct rpc_task *task)
1605 struct rpc_xprt *xprt = task->tk_xprt; 1656 struct rpc_xprt *xprt = task->tk_xprt;
1606 struct rpc_rqst *req = task->tk_rqstp; 1657 struct rpc_rqst *req = task->tk_rqstp;
1607 1658
1608 req->rq_timeout = task->tk_client->cl_timeout->to_initval;
1609 req->rq_task = task; 1659 req->rq_task = task;
1610 req->rq_xprt = xprt; 1660 req->rq_xprt = xprt;
1611 req->rq_buffer = NULL; 1661 req->rq_buffer = NULL;
@@ -1618,7 +1668,7 @@ xprt_request_init(struct rpc_task *task)
1618 req->rq_snd_buf.bvec = NULL; 1668 req->rq_snd_buf.bvec = NULL;
1619 req->rq_rcv_buf.bvec = NULL; 1669 req->rq_rcv_buf.bvec = NULL;
1620 req->rq_release_snd_buf = NULL; 1670 req->rq_release_snd_buf = NULL;
1621 xprt_reset_majortimeo(req); 1671 xprt_init_majortimeo(task, req);
1622 dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid, 1672 dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
1623 req, ntohl(req->rq_xid)); 1673 req, ntohl(req->rq_xid));
1624} 1674}
@@ -1647,7 +1697,6 @@ void xprt_reserve(struct rpc_task *task)
1647 if (task->tk_rqstp != NULL) 1697 if (task->tk_rqstp != NULL)
1648 return; 1698 return;
1649 1699
1650 task->tk_timeout = 0;
1651 task->tk_status = -EAGAIN; 1700 task->tk_status = -EAGAIN;
1652 if (!xprt_throttle_congested(xprt, task)) 1701 if (!xprt_throttle_congested(xprt, task))
1653 xprt_do_reserve(xprt, task); 1702 xprt_do_reserve(xprt, task);
@@ -1670,7 +1719,6 @@ void xprt_retry_reserve(struct rpc_task *task)
1670 if (task->tk_rqstp != NULL) 1719 if (task->tk_rqstp != NULL)
1671 return; 1720 return;
1672 1721
1673 task->tk_timeout = 0;
1674 task->tk_status = -EAGAIN; 1722 task->tk_status = -EAGAIN;
1675 xprt_do_reserve(xprt, task); 1723 xprt_do_reserve(xprt, task);
1676} 1724}
@@ -1827,7 +1875,9 @@ found:
1827 xprt->idle_timeout = 0; 1875 xprt->idle_timeout = 0;
1828 INIT_WORK(&xprt->task_cleanup, xprt_autoclose); 1876 INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
1829 if (xprt_has_timer(xprt)) 1877 if (xprt_has_timer(xprt))
1830 timer_setup(&xprt->timer, xprt_init_autodisconnect, 0); 1878 timer_setup(&xprt->timer,
1879 xprt_init_autodisconnect,
1880 TIMER_DEFERRABLE);
1831 else 1881 else
1832 timer_setup(&xprt->timer, NULL, 0); 1882 timer_setup(&xprt->timer, NULL, 0);
1833 1883
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index d79b18c1f4cd..ce986591f213 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -19,45 +19,6 @@
19 19
20#undef RPCRDMA_BACKCHANNEL_DEBUG 20#undef RPCRDMA_BACKCHANNEL_DEBUG
21 21
22static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
23 unsigned int count)
24{
25 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
26 struct rpcrdma_req *req;
27 struct rpc_rqst *rqst;
28 unsigned int i;
29
30 for (i = 0; i < (count << 1); i++) {
31 struct rpcrdma_regbuf *rb;
32 size_t size;
33
34 req = rpcrdma_create_req(r_xprt);
35 if (IS_ERR(req))
36 return PTR_ERR(req);
37 rqst = &req->rl_slot;
38
39 rqst->rq_xprt = xprt;
40 INIT_LIST_HEAD(&rqst->rq_bc_list);
41 __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
42 spin_lock(&xprt->bc_pa_lock);
43 list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
44 spin_unlock(&xprt->bc_pa_lock);
45
46 size = r_xprt->rx_data.inline_rsize;
47 rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
48 if (IS_ERR(rb))
49 goto out_fail;
50 req->rl_sendbuf = rb;
51 xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base,
52 min_t(size_t, size, PAGE_SIZE));
53 }
54 return 0;
55
56out_fail:
57 rpcrdma_req_destroy(req);
58 return -ENOMEM;
59}
60
61/** 22/**
62 * xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests 23 * xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests
63 * @xprt: transport associated with these backchannel resources 24 * @xprt: transport associated with these backchannel resources
@@ -68,34 +29,10 @@ out_fail:
68int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) 29int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
69{ 30{
70 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 31 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
71 int rc;
72 32
73 /* The backchannel reply path returns each rpc_rqst to the 33 r_xprt->rx_buf.rb_bc_srv_max_requests = RPCRDMA_BACKWARD_WRS >> 1;
74 * bc_pa_list _after_ the reply is sent. If the server is
75 * faster than the client, it can send another backward
76 * direction request before the rpc_rqst is returned to the
77 * list. The client rejects the request in this case.
78 *
79 * Twice as many rpc_rqsts are prepared to ensure there is
80 * always an rpc_rqst available as soon as a reply is sent.
81 */
82 if (reqs > RPCRDMA_BACKWARD_WRS >> 1)
83 goto out_err;
84
85 rc = rpcrdma_bc_setup_reqs(r_xprt, reqs);
86 if (rc)
87 goto out_free;
88
89 r_xprt->rx_buf.rb_bc_srv_max_requests = reqs;
90 trace_xprtrdma_cb_setup(r_xprt, reqs); 34 trace_xprtrdma_cb_setup(r_xprt, reqs);
91 return 0; 35 return 0;
92
93out_free:
94 xprt_rdma_bc_destroy(xprt, reqs);
95
96out_err:
97 pr_err("RPC: %s: setup backchannel transport failed\n", __func__);
98 return -ENOMEM;
99} 36}
100 37
101/** 38/**
@@ -107,10 +44,10 @@ out_err:
107size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt) 44size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
108{ 45{
109 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 46 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
110 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; 47 struct rpcrdma_ep *ep = &r_xprt->rx_ep;
111 size_t maxmsg; 48 size_t maxmsg;
112 49
113 maxmsg = min_t(unsigned int, cdata->inline_rsize, cdata->inline_wsize); 50 maxmsg = min_t(unsigned int, ep->rep_inline_send, ep->rep_inline_recv);
114 maxmsg = min_t(unsigned int, maxmsg, PAGE_SIZE); 51 maxmsg = min_t(unsigned int, maxmsg, PAGE_SIZE);
115 return maxmsg - RPCRDMA_HDRLEN_MIN; 52 return maxmsg - RPCRDMA_HDRLEN_MIN;
116} 53}
@@ -123,7 +60,7 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
123 60
124 rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); 61 rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
125 xdr_init_encode(&req->rl_stream, &req->rl_hdrbuf, 62 xdr_init_encode(&req->rl_stream, &req->rl_hdrbuf,
126 req->rl_rdmabuf->rg_base, rqst); 63 rdmab_data(req->rl_rdmabuf), rqst);
127 64
128 p = xdr_reserve_space(&req->rl_stream, 28); 65 p = xdr_reserve_space(&req->rl_stream, 28);
129 if (unlikely(!p)) 66 if (unlikely(!p))
@@ -223,6 +160,43 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
223 spin_unlock(&xprt->bc_pa_lock); 160 spin_unlock(&xprt->bc_pa_lock);
224} 161}
225 162
163static struct rpc_rqst *rpcrdma_bc_rqst_get(struct rpcrdma_xprt *r_xprt)
164{
165 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
166 struct rpcrdma_req *req;
167 struct rpc_rqst *rqst;
168 size_t size;
169
170 spin_lock(&xprt->bc_pa_lock);
171 rqst = list_first_entry_or_null(&xprt->bc_pa_list, struct rpc_rqst,
172 rq_bc_pa_list);
173 if (!rqst)
174 goto create_req;
175 list_del(&rqst->rq_bc_pa_list);
176 spin_unlock(&xprt->bc_pa_lock);
177 return rqst;
178
179create_req:
180 spin_unlock(&xprt->bc_pa_lock);
181
182 /* Set a limit to prevent a remote from overrunning our resources.
183 */
184 if (xprt->bc_alloc_count >= RPCRDMA_BACKWARD_WRS)
185 return NULL;
186
187 size = min_t(size_t, r_xprt->rx_ep.rep_inline_recv, PAGE_SIZE);
188 req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL);
189 if (!req)
190 return NULL;
191
192 xprt->bc_alloc_count++;
193 rqst = &req->rl_slot;
194 rqst->rq_xprt = xprt;
195 __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
196 xdr_buf_init(&rqst->rq_snd_buf, rdmab_data(req->rl_sendbuf), size);
197 return rqst;
198}
199
226/** 200/**
227 * rpcrdma_bc_receive_call - Handle a backward direction call 201 * rpcrdma_bc_receive_call - Handle a backward direction call
228 * @r_xprt: transport receiving the call 202 * @r_xprt: transport receiving the call
@@ -254,18 +228,10 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
254 pr_info("RPC: %s: %*ph\n", __func__, size, p); 228 pr_info("RPC: %s: %*ph\n", __func__, size, p);
255#endif 229#endif
256 230
257 /* Grab a free bc rqst */ 231 rqst = rpcrdma_bc_rqst_get(r_xprt);
258 spin_lock(&xprt->bc_pa_lock); 232 if (!rqst)
259 if (list_empty(&xprt->bc_pa_list)) {
260 spin_unlock(&xprt->bc_pa_lock);
261 goto out_overflow; 233 goto out_overflow;
262 }
263 rqst = list_first_entry(&xprt->bc_pa_list,
264 struct rpc_rqst, rq_bc_pa_list);
265 list_del(&rqst->rq_bc_pa_list);
266 spin_unlock(&xprt->bc_pa_lock);
267 234
268 /* Prepare rqst */
269 rqst->rq_reply_bytes_recvd = 0; 235 rqst->rq_reply_bytes_recvd = 0;
270 rqst->rq_xid = *p; 236 rqst->rq_xid = *p;
271 237
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 52cb6c1b0c2b..794ba4ca0994 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -82,13 +82,13 @@
82 82
83/** 83/**
84 * frwr_is_supported - Check if device supports FRWR 84 * frwr_is_supported - Check if device supports FRWR
85 * @ia: interface adapter to check 85 * @device: interface adapter to check
86 * 86 *
87 * Returns true if device supports FRWR, otherwise false 87 * Returns true if device supports FRWR, otherwise false
88 */ 88 */
89bool frwr_is_supported(struct rpcrdma_ia *ia) 89bool frwr_is_supported(struct ib_device *device)
90{ 90{
91 struct ib_device_attr *attrs = &ia->ri_device->attrs; 91 struct ib_device_attr *attrs = &device->attrs;
92 92
93 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) 93 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
94 goto out_not_supported; 94 goto out_not_supported;
@@ -98,7 +98,7 @@ bool frwr_is_supported(struct rpcrdma_ia *ia)
98 98
99out_not_supported: 99out_not_supported:
100 pr_info("rpcrdma: 'frwr' mode is not supported by device %s\n", 100 pr_info("rpcrdma: 'frwr' mode is not supported by device %s\n",
101 ia->ri_device->name); 101 device->name);
102 return false; 102 return false;
103} 103}
104 104
@@ -131,7 +131,7 @@ frwr_mr_recycle_worker(struct work_struct *work)
131 131
132 if (mr->mr_dir != DMA_NONE) { 132 if (mr->mr_dir != DMA_NONE) {
133 trace_xprtrdma_mr_unmap(mr); 133 trace_xprtrdma_mr_unmap(mr);
134 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, 134 ib_dma_unmap_sg(r_xprt->rx_ia.ri_id->device,
135 mr->mr_sg, mr->mr_nents, mr->mr_dir); 135 mr->mr_sg, mr->mr_nents, mr->mr_dir);
136 mr->mr_dir = DMA_NONE; 136 mr->mr_dir = DMA_NONE;
137 } 137 }
@@ -194,12 +194,11 @@ out_list_err:
194 * frwr_open - Prepare an endpoint for use with FRWR 194 * frwr_open - Prepare an endpoint for use with FRWR
195 * @ia: interface adapter this endpoint will use 195 * @ia: interface adapter this endpoint will use
196 * @ep: endpoint to prepare 196 * @ep: endpoint to prepare
197 * @cdata: transport parameters
198 * 197 *
199 * On success, sets: 198 * On success, sets:
200 * ep->rep_attr.cap.max_send_wr 199 * ep->rep_attr.cap.max_send_wr
201 * ep->rep_attr.cap.max_recv_wr 200 * ep->rep_attr.cap.max_recv_wr
202 * cdata->max_requests 201 * ep->rep_max_requests
203 * ia->ri_max_segs 202 * ia->ri_max_segs
204 * 203 *
205 * And these FRWR-related fields: 204 * And these FRWR-related fields:
@@ -208,10 +207,9 @@ out_list_err:
208 * 207 *
209 * On failure, a negative errno is returned. 208 * On failure, a negative errno is returned.
210 */ 209 */
211int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, 210int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep)
212 struct rpcrdma_create_data_internal *cdata)
213{ 211{
214 struct ib_device_attr *attrs = &ia->ri_device->attrs; 212 struct ib_device_attr *attrs = &ia->ri_id->device->attrs;
215 int max_qp_wr, depth, delta; 213 int max_qp_wr, depth, delta;
216 214
217 ia->ri_mrtype = IB_MR_TYPE_MEM_REG; 215 ia->ri_mrtype = IB_MR_TYPE_MEM_REG;
@@ -253,24 +251,23 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
253 } while (delta > 0); 251 } while (delta > 0);
254 } 252 }
255 253
256 max_qp_wr = ia->ri_device->attrs.max_qp_wr; 254 max_qp_wr = ia->ri_id->device->attrs.max_qp_wr;
257 max_qp_wr -= RPCRDMA_BACKWARD_WRS; 255 max_qp_wr -= RPCRDMA_BACKWARD_WRS;
258 max_qp_wr -= 1; 256 max_qp_wr -= 1;
259 if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE) 257 if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
260 return -ENOMEM; 258 return -ENOMEM;
261 if (cdata->max_requests > max_qp_wr) 259 if (ep->rep_max_requests > max_qp_wr)
262 cdata->max_requests = max_qp_wr; 260 ep->rep_max_requests = max_qp_wr;
263 ep->rep_attr.cap.max_send_wr = cdata->max_requests * depth; 261 ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth;
264 if (ep->rep_attr.cap.max_send_wr > max_qp_wr) { 262 if (ep->rep_attr.cap.max_send_wr > max_qp_wr) {
265 cdata->max_requests = max_qp_wr / depth; 263 ep->rep_max_requests = max_qp_wr / depth;
266 if (!cdata->max_requests) 264 if (!ep->rep_max_requests)
267 return -EINVAL; 265 return -EINVAL;
268 ep->rep_attr.cap.max_send_wr = cdata->max_requests * 266 ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth;
269 depth;
270 } 267 }
271 ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; 268 ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
272 ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */ 269 ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
273 ep->rep_attr.cap.max_recv_wr = cdata->max_requests; 270 ep->rep_attr.cap.max_recv_wr = ep->rep_max_requests;
274 ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; 271 ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
275 ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */ 272 ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
276 273
@@ -300,15 +297,6 @@ size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt)
300 (ia->ri_max_segs - 2) * ia->ri_max_frwr_depth); 297 (ia->ri_max_segs - 2) * ia->ri_max_frwr_depth);
301} 298}
302 299
303static void
304__frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
305{
306 if (wc->status != IB_WC_WR_FLUSH_ERR)
307 pr_err("rpcrdma: %s: %s (%u/0x%x)\n",
308 wr, ib_wc_status_msg(wc->status),
309 wc->status, wc->vendor_err);
310}
311
312/** 300/**
313 * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC 301 * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC
314 * @cq: completion queue (ignored) 302 * @cq: completion queue (ignored)
@@ -323,10 +311,8 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
323 container_of(cqe, struct rpcrdma_frwr, fr_cqe); 311 container_of(cqe, struct rpcrdma_frwr, fr_cqe);
324 312
325 /* WARNING: Only wr_cqe and status are reliable at this point */ 313 /* WARNING: Only wr_cqe and status are reliable at this point */
326 if (wc->status != IB_WC_SUCCESS) { 314 if (wc->status != IB_WC_SUCCESS)
327 frwr->fr_state = FRWR_FLUSHED_FR; 315 frwr->fr_state = FRWR_FLUSHED_FR;
328 __frwr_sendcompletion_flush(wc, "fastreg");
329 }
330 trace_xprtrdma_wc_fastreg(wc, frwr); 316 trace_xprtrdma_wc_fastreg(wc, frwr);
331} 317}
332 318
@@ -344,10 +330,8 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
344 fr_cqe); 330 fr_cqe);
345 331
346 /* WARNING: Only wr_cqe and status are reliable at this point */ 332 /* WARNING: Only wr_cqe and status are reliable at this point */
347 if (wc->status != IB_WC_SUCCESS) { 333 if (wc->status != IB_WC_SUCCESS)
348 frwr->fr_state = FRWR_FLUSHED_LI; 334 frwr->fr_state = FRWR_FLUSHED_LI;
349 __frwr_sendcompletion_flush(wc, "localinv");
350 }
351 trace_xprtrdma_wc_li(wc, frwr); 335 trace_xprtrdma_wc_li(wc, frwr);
352} 336}
353 337
@@ -366,12 +350,10 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
366 fr_cqe); 350 fr_cqe);
367 351
368 /* WARNING: Only wr_cqe and status are reliable at this point */ 352 /* WARNING: Only wr_cqe and status are reliable at this point */
369 if (wc->status != IB_WC_SUCCESS) { 353 if (wc->status != IB_WC_SUCCESS)
370 frwr->fr_state = FRWR_FLUSHED_LI; 354 frwr->fr_state = FRWR_FLUSHED_LI;
371 __frwr_sendcompletion_flush(wc, "localinv");
372 }
373 complete(&frwr->fr_linv_done);
374 trace_xprtrdma_wc_li_wake(wc, frwr); 355 trace_xprtrdma_wc_li_wake(wc, frwr);
356 complete(&frwr->fr_linv_done);
375} 357}
376 358
377/** 359/**
@@ -436,7 +418,8 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
436 } 418 }
437 mr->mr_dir = rpcrdma_data_dir(writing); 419 mr->mr_dir = rpcrdma_data_dir(writing);
438 420
439 mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir); 421 mr->mr_nents =
422 ib_dma_map_sg(ia->ri_id->device, mr->mr_sg, i, mr->mr_dir);
440 if (!mr->mr_nents) 423 if (!mr->mr_nents)
441 goto out_dmamap_err; 424 goto out_dmamap_err;
442 425
@@ -466,7 +449,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
466 return seg; 449 return seg;
467 450
468out_dmamap_err: 451out_dmamap_err:
469 frwr->fr_state = FRWR_IS_INVALID; 452 mr->mr_dir = DMA_NONE;
470 trace_xprtrdma_frwr_sgerr(mr, i); 453 trace_xprtrdma_frwr_sgerr(mr, i);
471 rpcrdma_mr_put(mr); 454 rpcrdma_mr_put(mr);
472 return ERR_PTR(-EIO); 455 return ERR_PTR(-EIO);
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 6c1fb270f127..85115a2e2639 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -105,16 +105,23 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
105 return size; 105 return size;
106} 106}
107 107
108/**
109 * rpcrdma_set_max_header_sizes - Initialize inline payload sizes
110 * @r_xprt: transport instance to initialize
111 *
112 * The max_inline fields contain the maximum size of an RPC message
113 * so the marshaling code doesn't have to repeat this calculation
114 * for every RPC.
115 */
108void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt) 116void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
109{ 117{
110 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; 118 unsigned int maxsegs = r_xprt->rx_ia.ri_max_segs;
111 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 119 struct rpcrdma_ep *ep = &r_xprt->rx_ep;
112 unsigned int maxsegs = ia->ri_max_segs; 120
113 121 ep->rep_max_inline_send =
114 ia->ri_max_inline_write = cdata->inline_wsize - 122 ep->rep_inline_send - rpcrdma_max_call_header_size(maxsegs);
115 rpcrdma_max_call_header_size(maxsegs); 123 ep->rep_max_inline_recv =
116 ia->ri_max_inline_read = cdata->inline_rsize - 124 ep->rep_inline_recv - rpcrdma_max_reply_header_size(maxsegs);
117 rpcrdma_max_reply_header_size(maxsegs);
118} 125}
119 126
120/* The client can send a request inline as long as the RPCRDMA header 127/* The client can send a request inline as long as the RPCRDMA header
@@ -131,7 +138,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
131 struct xdr_buf *xdr = &rqst->rq_snd_buf; 138 struct xdr_buf *xdr = &rqst->rq_snd_buf;
132 unsigned int count, remaining, offset; 139 unsigned int count, remaining, offset;
133 140
134 if (xdr->len > r_xprt->rx_ia.ri_max_inline_write) 141 if (xdr->len > r_xprt->rx_ep.rep_max_inline_send)
135 return false; 142 return false;
136 143
137 if (xdr->page_len) { 144 if (xdr->page_len) {
@@ -159,9 +166,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
159static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt, 166static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
160 struct rpc_rqst *rqst) 167 struct rpc_rqst *rqst)
161{ 168{
162 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 169 return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep.rep_max_inline_recv;
163
164 return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read;
165} 170}
166 171
167/* The client is required to provide a Reply chunk if the maximum 172/* The client is required to provide a Reply chunk if the maximum
@@ -173,10 +178,9 @@ rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
173 const struct rpc_rqst *rqst) 178 const struct rpc_rqst *rqst)
174{ 179{
175 const struct xdr_buf *buf = &rqst->rq_rcv_buf; 180 const struct xdr_buf *buf = &rqst->rq_rcv_buf;
176 const struct rpcrdma_ia *ia = &r_xprt->rx_ia;
177 181
178 return buf->head[0].iov_len + buf->tail[0].iov_len < 182 return (buf->head[0].iov_len + buf->tail[0].iov_len) <
179 ia->ri_max_inline_read; 183 r_xprt->rx_ep.rep_max_inline_recv;
180} 184}
181 185
182/* Split @vec on page boundaries into SGEs. FMR registers pages, not 186/* Split @vec on page boundaries into SGEs. FMR registers pages, not
@@ -238,7 +242,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
238 */ 242 */
239 if (unlikely(xdrbuf->flags & XDRBUF_SPARSE_PAGES)) { 243 if (unlikely(xdrbuf->flags & XDRBUF_SPARSE_PAGES)) {
240 if (!*ppages) 244 if (!*ppages)
241 *ppages = alloc_page(GFP_ATOMIC); 245 *ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
242 if (!*ppages) 246 if (!*ppages)
243 return -ENOBUFS; 247 return -ENOBUFS;
244 } 248 }
@@ -508,50 +512,45 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
508} 512}
509 513
510/** 514/**
511 * rpcrdma_unmap_sendctx - DMA-unmap Send buffers 515 * rpcrdma_sendctx_unmap - DMA-unmap Send buffer
512 * @sc: sendctx containing SGEs to unmap 516 * @sc: sendctx containing SGEs to unmap
513 * 517 *
514 */ 518 */
515void 519void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
516rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
517{ 520{
518 struct rpcrdma_ia *ia = &sc->sc_xprt->rx_ia;
519 struct ib_sge *sge; 521 struct ib_sge *sge;
520 unsigned int count;
521 522
522 /* The first two SGEs contain the transport header and 523 /* The first two SGEs contain the transport header and
523 * the inline buffer. These are always left mapped so 524 * the inline buffer. These are always left mapped so
524 * they can be cheaply re-used. 525 * they can be cheaply re-used.
525 */ 526 */
526 sge = &sc->sc_sges[2]; 527 for (sge = &sc->sc_sges[2]; sc->sc_unmap_count;
527 for (count = sc->sc_unmap_count; count; ++sge, --count) 528 ++sge, --sc->sc_unmap_count)
528 ib_dma_unmap_page(ia->ri_device, 529 ib_dma_unmap_page(sc->sc_device, sge->addr, sge->length,
529 sge->addr, sge->length, DMA_TO_DEVICE); 530 DMA_TO_DEVICE);
530 531
531 if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &sc->sc_req->rl_flags)) { 532 if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES,
532 smp_mb__after_atomic(); 533 &sc->sc_req->rl_flags))
533 wake_up_bit(&sc->sc_req->rl_flags, RPCRDMA_REQ_F_TX_RESOURCES); 534 wake_up_bit(&sc->sc_req->rl_flags, RPCRDMA_REQ_F_TX_RESOURCES);
534 }
535} 535}
536 536
537/* Prepare an SGE for the RPC-over-RDMA transport header. 537/* Prepare an SGE for the RPC-over-RDMA transport header.
538 */ 538 */
539static bool 539static bool rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt,
540rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req, 540 struct rpcrdma_req *req, u32 len)
541 u32 len)
542{ 541{
543 struct rpcrdma_sendctx *sc = req->rl_sendctx; 542 struct rpcrdma_sendctx *sc = req->rl_sendctx;
544 struct rpcrdma_regbuf *rb = req->rl_rdmabuf; 543 struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
545 struct ib_sge *sge = sc->sc_sges; 544 struct ib_sge *sge = sc->sc_sges;
546 545
547 if (!rpcrdma_dma_map_regbuf(ia, rb)) 546 if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
548 goto out_regbuf; 547 goto out_regbuf;
549 sge->addr = rdmab_addr(rb); 548 sge->addr = rdmab_addr(rb);
550 sge->length = len; 549 sge->length = len;
551 sge->lkey = rdmab_lkey(rb); 550 sge->lkey = rdmab_lkey(rb);
552 551
553 ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, 552 ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
554 sge->length, DMA_TO_DEVICE); 553 DMA_TO_DEVICE);
555 sc->sc_wr.num_sge++; 554 sc->sc_wr.num_sge++;
556 return true; 555 return true;
557 556
@@ -563,23 +562,23 @@ out_regbuf:
563/* Prepare the Send SGEs. The head and tail iovec, and each entry 562/* Prepare the Send SGEs. The head and tail iovec, and each entry
564 * in the page list, gets its own SGE. 563 * in the page list, gets its own SGE.
565 */ 564 */
566static bool 565static bool rpcrdma_prepare_msg_sges(struct rpcrdma_xprt *r_xprt,
567rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, 566 struct rpcrdma_req *req,
568 struct xdr_buf *xdr, enum rpcrdma_chunktype rtype) 567 struct xdr_buf *xdr,
568 enum rpcrdma_chunktype rtype)
569{ 569{
570 struct rpcrdma_sendctx *sc = req->rl_sendctx; 570 struct rpcrdma_sendctx *sc = req->rl_sendctx;
571 unsigned int sge_no, page_base, len, remaining; 571 unsigned int sge_no, page_base, len, remaining;
572 struct rpcrdma_regbuf *rb = req->rl_sendbuf; 572 struct rpcrdma_regbuf *rb = req->rl_sendbuf;
573 struct ib_device *device = ia->ri_device;
574 struct ib_sge *sge = sc->sc_sges; 573 struct ib_sge *sge = sc->sc_sges;
575 u32 lkey = ia->ri_pd->local_dma_lkey;
576 struct page *page, **ppages; 574 struct page *page, **ppages;
577 575
578 /* The head iovec is straightforward, as it is already 576 /* The head iovec is straightforward, as it is already
579 * DMA-mapped. Sync the content that has changed. 577 * DMA-mapped. Sync the content that has changed.
580 */ 578 */
581 if (!rpcrdma_dma_map_regbuf(ia, rb)) 579 if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
582 goto out_regbuf; 580 goto out_regbuf;
581 sc->sc_device = rdmab_device(rb);
583 sge_no = 1; 582 sge_no = 1;
584 sge[sge_no].addr = rdmab_addr(rb); 583 sge[sge_no].addr = rdmab_addr(rb);
585 sge[sge_no].length = xdr->head[0].iov_len; 584 sge[sge_no].length = xdr->head[0].iov_len;
@@ -626,13 +625,14 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
626 goto out_mapping_overflow; 625 goto out_mapping_overflow;
627 626
628 len = min_t(u32, PAGE_SIZE - page_base, remaining); 627 len = min_t(u32, PAGE_SIZE - page_base, remaining);
629 sge[sge_no].addr = ib_dma_map_page(device, *ppages, 628 sge[sge_no].addr =
630 page_base, len, 629 ib_dma_map_page(rdmab_device(rb), *ppages,
631 DMA_TO_DEVICE); 630 page_base, len, DMA_TO_DEVICE);
632 if (ib_dma_mapping_error(device, sge[sge_no].addr)) 631 if (ib_dma_mapping_error(rdmab_device(rb),
632 sge[sge_no].addr))
633 goto out_mapping_err; 633 goto out_mapping_err;
634 sge[sge_no].length = len; 634 sge[sge_no].length = len;
635 sge[sge_no].lkey = lkey; 635 sge[sge_no].lkey = rdmab_lkey(rb);
636 636
637 sc->sc_unmap_count++; 637 sc->sc_unmap_count++;
638 ppages++; 638 ppages++;
@@ -653,13 +653,13 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
653 653
654map_tail: 654map_tail:
655 sge_no++; 655 sge_no++;
656 sge[sge_no].addr = ib_dma_map_page(device, page, 656 sge[sge_no].addr =
657 page_base, len, 657 ib_dma_map_page(rdmab_device(rb), page, page_base, len,
658 DMA_TO_DEVICE); 658 DMA_TO_DEVICE);
659 if (ib_dma_mapping_error(device, sge[sge_no].addr)) 659 if (ib_dma_mapping_error(rdmab_device(rb), sge[sge_no].addr))
660 goto out_mapping_err; 660 goto out_mapping_err;
661 sge[sge_no].length = len; 661 sge[sge_no].length = len;
662 sge[sge_no].lkey = lkey; 662 sge[sge_no].lkey = rdmab_lkey(rb);
663 sc->sc_unmap_count++; 663 sc->sc_unmap_count++;
664 } 664 }
665 665
@@ -674,12 +674,12 @@ out_regbuf:
674 return false; 674 return false;
675 675
676out_mapping_overflow: 676out_mapping_overflow:
677 rpcrdma_unmap_sendctx(sc); 677 rpcrdma_sendctx_unmap(sc);
678 pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no); 678 pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no);
679 return false; 679 return false;
680 680
681out_mapping_err: 681out_mapping_err:
682 rpcrdma_unmap_sendctx(sc); 682 rpcrdma_sendctx_unmap(sc);
683 trace_xprtrdma_dma_maperr(sge[sge_no].addr); 683 trace_xprtrdma_dma_maperr(sge[sge_no].addr);
684 return false; 684 return false;
685} 685}
@@ -699,7 +699,7 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
699 struct rpcrdma_req *req, u32 hdrlen, 699 struct rpcrdma_req *req, u32 hdrlen,
700 struct xdr_buf *xdr, enum rpcrdma_chunktype rtype) 700 struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
701{ 701{
702 req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf); 702 req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt);
703 if (!req->rl_sendctx) 703 if (!req->rl_sendctx)
704 return -EAGAIN; 704 return -EAGAIN;
705 req->rl_sendctx->sc_wr.num_sge = 0; 705 req->rl_sendctx->sc_wr.num_sge = 0;
@@ -707,11 +707,11 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
707 req->rl_sendctx->sc_req = req; 707 req->rl_sendctx->sc_req = req;
708 __clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags); 708 __clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
709 709
710 if (!rpcrdma_prepare_hdr_sge(&r_xprt->rx_ia, req, hdrlen)) 710 if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen))
711 return -EIO; 711 return -EIO;
712 712
713 if (rtype != rpcrdma_areadch) 713 if (rtype != rpcrdma_areadch)
714 if (!rpcrdma_prepare_msg_sges(&r_xprt->rx_ia, req, xdr, rtype)) 714 if (!rpcrdma_prepare_msg_sges(r_xprt, req, xdr, rtype))
715 return -EIO; 715 return -EIO;
716 716
717 return 0; 717 return 0;
@@ -747,8 +747,8 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
747 int ret; 747 int ret;
748 748
749 rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); 749 rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
750 xdr_init_encode(xdr, &req->rl_hdrbuf, 750 xdr_init_encode(xdr, &req->rl_hdrbuf, rdmab_data(req->rl_rdmabuf),
751 req->rl_rdmabuf->rg_base, rqst); 751 rqst);
752 752
753 /* Fixed header fields */ 753 /* Fixed header fields */
754 ret = -EMSGSIZE; 754 ret = -EMSGSIZE;
@@ -876,6 +876,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
876 return 0; 876 return 0;
877 877
878out_err: 878out_err:
879 trace_xprtrdma_marshal_failed(rqst, ret);
879 switch (ret) { 880 switch (ret) {
880 case -EAGAIN: 881 case -EAGAIN:
881 xprt_wait_for_buffer_space(rqst->rq_xprt); 882 xprt_wait_for_buffer_space(rqst->rq_xprt);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 907464c2a9f0..bed57d8b5c19 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -261,7 +261,7 @@ static const struct rpc_xprt_ops xprt_rdma_bc_procs = {
261 .buf_alloc = xprt_rdma_bc_allocate, 261 .buf_alloc = xprt_rdma_bc_allocate,
262 .buf_free = xprt_rdma_bc_free, 262 .buf_free = xprt_rdma_bc_free,
263 .send_request = xprt_rdma_bc_send_request, 263 .send_request = xprt_rdma_bc_send_request,
264 .set_retrans_timeout = xprt_set_retrans_timeout_def, 264 .wait_for_reply_request = xprt_wait_for_reply_request_def,
265 .close = xprt_rdma_bc_close, 265 .close = xprt_rdma_bc_close,
266 .destroy = xprt_rdma_bc_put, 266 .destroy = xprt_rdma_bc_put,
267 .print_stats = xprt_rdma_print_stats 267 .print_stats = xprt_rdma_print_stats
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 5d261353bd90..1f73a6a7e43c 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -68,9 +68,9 @@
68 * tunables 68 * tunables
69 */ 69 */
70 70
71static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; 71unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
72unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; 72unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
73static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; 73unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
74unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR; 74unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR;
75int xprt_rdma_pad_optimize; 75int xprt_rdma_pad_optimize;
76 76
@@ -288,7 +288,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
288 288
289 cancel_delayed_work_sync(&r_xprt->rx_connect_worker); 289 cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
290 290
291 rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); 291 rpcrdma_ep_destroy(r_xprt);
292 rpcrdma_buffer_destroy(&r_xprt->rx_buf); 292 rpcrdma_buffer_destroy(&r_xprt->rx_buf);
293 rpcrdma_ia_close(&r_xprt->rx_ia); 293 rpcrdma_ia_close(&r_xprt->rx_ia);
294 294
@@ -311,10 +311,8 @@ static const struct rpc_timeout xprt_rdma_default_timeout = {
311static struct rpc_xprt * 311static struct rpc_xprt *
312xprt_setup_rdma(struct xprt_create *args) 312xprt_setup_rdma(struct xprt_create *args)
313{ 313{
314 struct rpcrdma_create_data_internal cdata;
315 struct rpc_xprt *xprt; 314 struct rpc_xprt *xprt;
316 struct rpcrdma_xprt *new_xprt; 315 struct rpcrdma_xprt *new_xprt;
317 struct rpcrdma_ep *new_ep;
318 struct sockaddr *sap; 316 struct sockaddr *sap;
319 int rc; 317 int rc;
320 318
@@ -349,40 +347,12 @@ xprt_setup_rdma(struct xprt_create *args)
349 xprt_set_bound(xprt); 347 xprt_set_bound(xprt);
350 xprt_rdma_format_addresses(xprt, sap); 348 xprt_rdma_format_addresses(xprt, sap);
351 349
352 cdata.max_requests = xprt_rdma_slot_table_entries;
353
354 cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */
355 cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */
356
357 cdata.inline_wsize = xprt_rdma_max_inline_write;
358 if (cdata.inline_wsize > cdata.wsize)
359 cdata.inline_wsize = cdata.wsize;
360
361 cdata.inline_rsize = xprt_rdma_max_inline_read;
362 if (cdata.inline_rsize > cdata.rsize)
363 cdata.inline_rsize = cdata.rsize;
364
365 /*
366 * Create new transport instance, which includes initialized
367 * o ia
368 * o endpoint
369 * o buffers
370 */
371
372 new_xprt = rpcx_to_rdmax(xprt); 350 new_xprt = rpcx_to_rdmax(xprt);
373
374 rc = rpcrdma_ia_open(new_xprt); 351 rc = rpcrdma_ia_open(new_xprt);
375 if (rc) 352 if (rc)
376 goto out1; 353 goto out1;
377 354
378 /* 355 rc = rpcrdma_ep_create(new_xprt);
379 * initialize and create ep
380 */
381 new_xprt->rx_data = cdata;
382 new_ep = &new_xprt->rx_ep;
383
384 rc = rpcrdma_ep_create(&new_xprt->rx_ep,
385 &new_xprt->rx_ia, &new_xprt->rx_data);
386 if (rc) 356 if (rc)
387 goto out2; 357 goto out2;
388 358
@@ -413,7 +383,7 @@ out4:
413 rpcrdma_buffer_destroy(&new_xprt->rx_buf); 383 rpcrdma_buffer_destroy(&new_xprt->rx_buf);
414 rc = -ENODEV; 384 rc = -ENODEV;
415out3: 385out3:
416 rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); 386 rpcrdma_ep_destroy(new_xprt);
417out2: 387out2:
418 rpcrdma_ia_close(&new_xprt->rx_ia); 388 rpcrdma_ia_close(&new_xprt->rx_ia);
419out1: 389out1:
@@ -585,52 +555,15 @@ xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst)
585 rpc_wake_up_next(&xprt->backlog); 555 rpc_wake_up_next(&xprt->backlog);
586} 556}
587 557
588static bool 558static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt,
589rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, 559 struct rpcrdma_regbuf *rb, size_t size,
590 size_t size, gfp_t flags) 560 gfp_t flags)
591{ 561{
592 struct rpcrdma_regbuf *rb; 562 if (unlikely(rdmab_length(rb) < size)) {
593 563 if (!rpcrdma_regbuf_realloc(rb, size, flags))
594 if (req->rl_sendbuf && rdmab_length(req->rl_sendbuf) >= size) 564 return false;
595 return true; 565 r_xprt->rx_stats.hardway_register_count += size;
596 566 }
597 rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags);
598 if (IS_ERR(rb))
599 return false;
600
601 rpcrdma_free_regbuf(req->rl_sendbuf);
602 r_xprt->rx_stats.hardway_register_count += size;
603 req->rl_sendbuf = rb;
604 return true;
605}
606
607/* The rq_rcv_buf is used only if a Reply chunk is necessary.
608 * The decision to use a Reply chunk is made later in
609 * rpcrdma_marshal_req. This buffer is registered at that time.
610 *
611 * Otherwise, the associated RPC Reply arrives in a separate
612 * Receive buffer, arbitrarily chosen by the HCA. The buffer
613 * allocated here for the RPC Reply is not utilized in that
614 * case. See rpcrdma_inline_fixup.
615 *
616 * A regbuf is used here to remember the buffer size.
617 */
618static bool
619rpcrdma_get_recvbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
620 size_t size, gfp_t flags)
621{
622 struct rpcrdma_regbuf *rb;
623
624 if (req->rl_recvbuf && rdmab_length(req->rl_recvbuf) >= size)
625 return true;
626
627 rb = rpcrdma_alloc_regbuf(size, DMA_NONE, flags);
628 if (IS_ERR(rb))
629 return false;
630
631 rpcrdma_free_regbuf(req->rl_recvbuf);
632 r_xprt->rx_stats.hardway_register_count += size;
633 req->rl_recvbuf = rb;
634 return true; 567 return true;
635} 568}
636 569
@@ -655,13 +588,15 @@ xprt_rdma_allocate(struct rpc_task *task)
655 if (RPC_IS_SWAPPER(task)) 588 if (RPC_IS_SWAPPER(task))
656 flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; 589 flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
657 590
658 if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags)) 591 if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize,
592 flags))
659 goto out_fail; 593 goto out_fail;
660 if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) 594 if (!rpcrdma_check_regbuf(r_xprt, req->rl_recvbuf, rqst->rq_rcvsize,
595 flags))
661 goto out_fail; 596 goto out_fail;
662 597
663 rqst->rq_buffer = req->rl_sendbuf->rg_base; 598 rqst->rq_buffer = rdmab_data(req->rl_sendbuf);
664 rqst->rq_rbuffer = req->rl_recvbuf->rg_base; 599 rqst->rq_rbuffer = rdmab_data(req->rl_recvbuf);
665 trace_xprtrdma_op_allocate(task, req); 600 trace_xprtrdma_op_allocate(task, req);
666 return 0; 601 return 0;
667 602
@@ -815,7 +750,7 @@ static const struct rpc_xprt_ops xprt_rdma_procs = {
815 .alloc_slot = xprt_rdma_alloc_slot, 750 .alloc_slot = xprt_rdma_alloc_slot,
816 .free_slot = xprt_rdma_free_slot, 751 .free_slot = xprt_rdma_free_slot,
817 .release_request = xprt_release_rqst_cong, /* ditto */ 752 .release_request = xprt_release_rqst_cong, /* ditto */
818 .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ 753 .wait_for_reply_request = xprt_wait_for_reply_request_def, /* ditto */
819 .timer = xprt_rdma_timer, 754 .timer = xprt_rdma_timer,
820 .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */ 755 .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */
821 .set_port = xprt_rdma_set_port, 756 .set_port = xprt_rdma_set_port,
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 30cfc0efe699..bef5eac8ab38 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -76,11 +76,16 @@
76static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); 76static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
77static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); 77static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
78static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); 78static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
79static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp); 79static struct rpcrdma_regbuf *
80static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); 80rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
81 gfp_t flags);
82static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb);
83static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb);
81static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); 84static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
82 85
83/* Wait for outstanding transport work to finish. 86/* Wait for outstanding transport work to finish. ib_drain_qp
87 * handles the drains in the wrong order for us, so open code
88 * them here.
84 */ 89 */
85static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) 90static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
86{ 91{
@@ -132,11 +137,6 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
132 137
133 /* WARNING: Only wr_cqe and status are reliable at this point */ 138 /* WARNING: Only wr_cqe and status are reliable at this point */
134 trace_xprtrdma_wc_send(sc, wc); 139 trace_xprtrdma_wc_send(sc, wc);
135 if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
136 pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
137 ib_wc_status_msg(wc->status),
138 wc->status, wc->vendor_err);
139
140 rpcrdma_sendctx_put_locked(sc); 140 rpcrdma_sendctx_put_locked(sc);
141} 141}
142 142
@@ -174,10 +174,6 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
174 return; 174 return;
175 175
176out_flushed: 176out_flushed:
177 if (wc->status != IB_WC_WR_FLUSH_ERR)
178 pr_err("rpcrdma: Recv: %s (%u/0x%x)\n",
179 ib_wc_status_msg(wc->status),
180 wc->status, wc->vendor_err);
181 rpcrdma_recv_buffer_put(rep); 177 rpcrdma_recv_buffer_put(rep);
182} 178}
183 179
@@ -185,7 +181,6 @@ static void
185rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, 181rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
186 struct rdma_conn_param *param) 182 struct rdma_conn_param *param)
187{ 183{
188 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
189 const struct rpcrdma_connect_private *pmsg = param->private_data; 184 const struct rpcrdma_connect_private *pmsg = param->private_data;
190 unsigned int rsize, wsize; 185 unsigned int rsize, wsize;
191 186
@@ -202,12 +197,13 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
202 wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); 197 wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
203 } 198 }
204 199
205 if (rsize < cdata->inline_rsize) 200 if (rsize < r_xprt->rx_ep.rep_inline_recv)
206 cdata->inline_rsize = rsize; 201 r_xprt->rx_ep.rep_inline_recv = rsize;
207 if (wsize < cdata->inline_wsize) 202 if (wsize < r_xprt->rx_ep.rep_inline_send)
208 cdata->inline_wsize = wsize; 203 r_xprt->rx_ep.rep_inline_send = wsize;
209 dprintk("RPC: %s: max send %u, max recv %u\n", 204 dprintk("RPC: %s: max send %u, max recv %u\n", __func__,
210 __func__, cdata->inline_wsize, cdata->inline_rsize); 205 r_xprt->rx_ep.rep_inline_send,
206 r_xprt->rx_ep.rep_inline_recv);
211 rpcrdma_set_max_header_sizes(r_xprt); 207 rpcrdma_set_max_header_sizes(r_xprt);
212} 208}
213 209
@@ -247,7 +243,7 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
247 case RDMA_CM_EVENT_DEVICE_REMOVAL: 243 case RDMA_CM_EVENT_DEVICE_REMOVAL:
248#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 244#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
249 pr_info("rpcrdma: removing device %s for %s:%s\n", 245 pr_info("rpcrdma: removing device %s for %s:%s\n",
250 ia->ri_device->name, 246 ia->ri_id->device->name,
251 rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt)); 247 rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt));
252#endif 248#endif
253 set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags); 249 set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
@@ -256,7 +252,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
256 wait_for_completion(&ia->ri_remove_done); 252 wait_for_completion(&ia->ri_remove_done);
257 253
258 ia->ri_id = NULL; 254 ia->ri_id = NULL;
259 ia->ri_device = NULL;
260 /* Return 1 to ensure the core destroys the id. */ 255 /* Return 1 to ensure the core destroys the id. */
261 return 1; 256 return 1;
262 case RDMA_CM_EVENT_ESTABLISHED: 257 case RDMA_CM_EVENT_ESTABLISHED:
@@ -291,7 +286,7 @@ disconnected:
291 286
292 dprintk("RPC: %s: %s:%s on %s/frwr: %s\n", __func__, 287 dprintk("RPC: %s: %s:%s on %s/frwr: %s\n", __func__,
293 rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt), 288 rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt),
294 ia->ri_device->name, rdma_event_msg(event->event)); 289 ia->ri_id->device->name, rdma_event_msg(event->event));
295 return 0; 290 return 0;
296} 291}
297 292
@@ -370,9 +365,8 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt)
370 rc = PTR_ERR(ia->ri_id); 365 rc = PTR_ERR(ia->ri_id);
371 goto out_err; 366 goto out_err;
372 } 367 }
373 ia->ri_device = ia->ri_id->device;
374 368
375 ia->ri_pd = ib_alloc_pd(ia->ri_device, 0); 369 ia->ri_pd = ib_alloc_pd(ia->ri_id->device, 0);
376 if (IS_ERR(ia->ri_pd)) { 370 if (IS_ERR(ia->ri_pd)) {
377 rc = PTR_ERR(ia->ri_pd); 371 rc = PTR_ERR(ia->ri_pd);
378 pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc); 372 pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc);
@@ -381,12 +375,12 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt)
381 375
382 switch (xprt_rdma_memreg_strategy) { 376 switch (xprt_rdma_memreg_strategy) {
383 case RPCRDMA_FRWR: 377 case RPCRDMA_FRWR:
384 if (frwr_is_supported(ia)) 378 if (frwr_is_supported(ia->ri_id->device))
385 break; 379 break;
386 /*FALLTHROUGH*/ 380 /*FALLTHROUGH*/
387 default: 381 default:
388 pr_err("rpcrdma: Device %s does not support memreg mode %d\n", 382 pr_err("rpcrdma: Device %s does not support memreg mode %d\n",
389 ia->ri_device->name, xprt_rdma_memreg_strategy); 383 ia->ri_id->device->name, xprt_rdma_memreg_strategy);
390 rc = -EINVAL; 384 rc = -EINVAL;
391 goto out_err; 385 goto out_err;
392 } 386 }
@@ -438,11 +432,11 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
438 * mappings and MRs are gone. 432 * mappings and MRs are gone.
439 */ 433 */
440 list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list) 434 list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list)
441 rpcrdma_dma_unmap_regbuf(rep->rr_rdmabuf); 435 rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf);
442 list_for_each_entry(req, &buf->rb_allreqs, rl_all) { 436 list_for_each_entry(req, &buf->rb_allreqs, rl_all) {
443 rpcrdma_dma_unmap_regbuf(req->rl_rdmabuf); 437 rpcrdma_regbuf_dma_unmap(req->rl_rdmabuf);
444 rpcrdma_dma_unmap_regbuf(req->rl_sendbuf); 438 rpcrdma_regbuf_dma_unmap(req->rl_sendbuf);
445 rpcrdma_dma_unmap_regbuf(req->rl_recvbuf); 439 rpcrdma_regbuf_dma_unmap(req->rl_recvbuf);
446 } 440 }
447 rpcrdma_mrs_destroy(buf); 441 rpcrdma_mrs_destroy(buf);
448 ib_dealloc_pd(ia->ri_pd); 442 ib_dealloc_pd(ia->ri_pd);
@@ -468,7 +462,6 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
468 rdma_destroy_id(ia->ri_id); 462 rdma_destroy_id(ia->ri_id);
469 } 463 }
470 ia->ri_id = NULL; 464 ia->ri_id = NULL;
471 ia->ri_device = NULL;
472 465
473 /* If the pd is still busy, xprtrdma missed freeing a resource */ 466 /* If the pd is still busy, xprtrdma missed freeing a resource */
474 if (ia->ri_pd && !IS_ERR(ia->ri_pd)) 467 if (ia->ri_pd && !IS_ERR(ia->ri_pd))
@@ -476,19 +469,26 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
476 ia->ri_pd = NULL; 469 ia->ri_pd = NULL;
477} 470}
478 471
479/* 472/**
480 * Create unconnected endpoint. 473 * rpcrdma_ep_create - Create unconnected endpoint
474 * @r_xprt: transport to instantiate
475 *
476 * Returns zero on success, or a negative errno.
481 */ 477 */
482int 478int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
483rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
484 struct rpcrdma_create_data_internal *cdata)
485{ 479{
480 struct rpcrdma_ep *ep = &r_xprt->rx_ep;
481 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
486 struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; 482 struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
487 struct ib_cq *sendcq, *recvcq; 483 struct ib_cq *sendcq, *recvcq;
488 unsigned int max_sge; 484 unsigned int max_sge;
489 int rc; 485 int rc;
490 486
491 max_sge = min_t(unsigned int, ia->ri_device->attrs.max_send_sge, 487 ep->rep_max_requests = xprt_rdma_slot_table_entries;
488 ep->rep_inline_send = xprt_rdma_max_inline_write;
489 ep->rep_inline_recv = xprt_rdma_max_inline_read;
490
491 max_sge = min_t(unsigned int, ia->ri_id->device->attrs.max_send_sge,
492 RPCRDMA_MAX_SEND_SGES); 492 RPCRDMA_MAX_SEND_SGES);
493 if (max_sge < RPCRDMA_MIN_SEND_SGES) { 493 if (max_sge < RPCRDMA_MIN_SEND_SGES) {
494 pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge); 494 pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
@@ -496,7 +496,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
496 } 496 }
497 ia->ri_max_send_sges = max_sge; 497 ia->ri_max_send_sges = max_sge;
498 498
499 rc = frwr_open(ia, ep, cdata); 499 rc = frwr_open(ia, ep);
500 if (rc) 500 if (rc)
501 return rc; 501 return rc;
502 502
@@ -518,23 +518,21 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
518 ep->rep_attr.cap.max_send_sge, 518 ep->rep_attr.cap.max_send_sge,
519 ep->rep_attr.cap.max_recv_sge); 519 ep->rep_attr.cap.max_recv_sge);
520 520
521 /* set trigger for requesting send completion */ 521 ep->rep_send_batch = ep->rep_max_requests >> 3;
522 ep->rep_send_batch = min_t(unsigned int, RPCRDMA_MAX_SEND_BATCH,
523 cdata->max_requests >> 2);
524 ep->rep_send_count = ep->rep_send_batch; 522 ep->rep_send_count = ep->rep_send_batch;
525 init_waitqueue_head(&ep->rep_connect_wait); 523 init_waitqueue_head(&ep->rep_connect_wait);
526 ep->rep_receive_count = 0; 524 ep->rep_receive_count = 0;
527 525
528 sendcq = ib_alloc_cq(ia->ri_device, NULL, 526 sendcq = ib_alloc_cq(ia->ri_id->device, NULL,
529 ep->rep_attr.cap.max_send_wr + 1, 527 ep->rep_attr.cap.max_send_wr + 1,
530 ia->ri_device->num_comp_vectors > 1 ? 1 : 0, 528 ia->ri_id->device->num_comp_vectors > 1 ? 1 : 0,
531 IB_POLL_WORKQUEUE); 529 IB_POLL_WORKQUEUE);
532 if (IS_ERR(sendcq)) { 530 if (IS_ERR(sendcq)) {
533 rc = PTR_ERR(sendcq); 531 rc = PTR_ERR(sendcq);
534 goto out1; 532 goto out1;
535 } 533 }
536 534
537 recvcq = ib_alloc_cq(ia->ri_device, NULL, 535 recvcq = ib_alloc_cq(ia->ri_id->device, NULL,
538 ep->rep_attr.cap.max_recv_wr + 1, 536 ep->rep_attr.cap.max_recv_wr + 1,
539 0, IB_POLL_WORKQUEUE); 537 0, IB_POLL_WORKQUEUE);
540 if (IS_ERR(recvcq)) { 538 if (IS_ERR(recvcq)) {
@@ -552,15 +550,15 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
552 pmsg->cp_magic = rpcrdma_cmp_magic; 550 pmsg->cp_magic = rpcrdma_cmp_magic;
553 pmsg->cp_version = RPCRDMA_CMP_VERSION; 551 pmsg->cp_version = RPCRDMA_CMP_VERSION;
554 pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK; 552 pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK;
555 pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize); 553 pmsg->cp_send_size = rpcrdma_encode_buffer_size(ep->rep_inline_send);
556 pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize); 554 pmsg->cp_recv_size = rpcrdma_encode_buffer_size(ep->rep_inline_recv);
557 ep->rep_remote_cma.private_data = pmsg; 555 ep->rep_remote_cma.private_data = pmsg;
558 ep->rep_remote_cma.private_data_len = sizeof(*pmsg); 556 ep->rep_remote_cma.private_data_len = sizeof(*pmsg);
559 557
560 /* Client offers RDMA Read but does not initiate */ 558 /* Client offers RDMA Read but does not initiate */
561 ep->rep_remote_cma.initiator_depth = 0; 559 ep->rep_remote_cma.initiator_depth = 0;
562 ep->rep_remote_cma.responder_resources = 560 ep->rep_remote_cma.responder_resources =
563 min_t(int, U8_MAX, ia->ri_device->attrs.max_qp_rd_atom); 561 min_t(int, U8_MAX, ia->ri_id->device->attrs.max_qp_rd_atom);
564 562
565 /* Limit transport retries so client can detect server 563 /* Limit transport retries so client can detect server
566 * GID changes quickly. RPC layer handles re-establishing 564 * GID changes quickly. RPC layer handles re-establishing
@@ -583,16 +581,16 @@ out1:
583 return rc; 581 return rc;
584} 582}
585 583
586/* 584/**
587 * rpcrdma_ep_destroy 585 * rpcrdma_ep_destroy - Disconnect and destroy endpoint.
586 * @r_xprt: transport instance to shut down
588 * 587 *
589 * Disconnect and destroy endpoint. After this, the only
590 * valid operations on the ep are to free it (if dynamically
591 * allocated) or re-create it.
592 */ 588 */
593void 589void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt)
594rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
595{ 590{
591 struct rpcrdma_ep *ep = &r_xprt->rx_ep;
592 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
593
596 if (ia->ri_id && ia->ri_id->qp) { 594 if (ia->ri_id && ia->ri_id->qp) {
597 rpcrdma_ep_disconnect(ep, ia); 595 rpcrdma_ep_disconnect(ep, ia);
598 rdma_destroy_qp(ia->ri_id); 596 rdma_destroy_qp(ia->ri_id);
@@ -622,7 +620,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
622 goto out1; 620 goto out1;
623 621
624 rc = -ENOMEM; 622 rc = -ENOMEM;
625 err = rpcrdma_ep_create(ep, ia, &r_xprt->rx_data); 623 err = rpcrdma_ep_create(r_xprt);
626 if (err) { 624 if (err) {
627 pr_err("rpcrdma: rpcrdma_ep_create returned %d\n", err); 625 pr_err("rpcrdma: rpcrdma_ep_create returned %d\n", err);
628 goto out2; 626 goto out2;
@@ -639,7 +637,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
639 return 0; 637 return 0;
640 638
641out3: 639out3:
642 rpcrdma_ep_destroy(ep, ia); 640 rpcrdma_ep_destroy(r_xprt);
643out2: 641out2:
644 rpcrdma_ia_close(ia); 642 rpcrdma_ia_close(ia);
645out1: 643out1:
@@ -672,7 +670,7 @@ rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
672 */ 670 */
673 old = id; 671 old = id;
674 rc = -ENETUNREACH; 672 rc = -ENETUNREACH;
675 if (ia->ri_device != id->device) { 673 if (ia->ri_id->device != id->device) {
676 pr_err("rpcrdma: can't reconnect on different device!\n"); 674 pr_err("rpcrdma: can't reconnect on different device!\n");
677 goto out_destroy; 675 goto out_destroy;
678 } 676 }
@@ -796,8 +794,8 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
796 */ 794 */
797 795
798/* rpcrdma_sendctxs_destroy() assumes caller has already quiesced 796/* rpcrdma_sendctxs_destroy() assumes caller has already quiesced
799 * queue activity, and ib_drain_qp has flushed all remaining Send 797 * queue activity, and rpcrdma_xprt_drain has flushed all remaining
800 * requests. 798 * Send requests.
801 */ 799 */
802static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer *buf) 800static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer *buf)
803{ 801{
@@ -867,20 +865,20 @@ static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf,
867 865
868/** 866/**
869 * rpcrdma_sendctx_get_locked - Acquire a send context 867 * rpcrdma_sendctx_get_locked - Acquire a send context
870 * @buf: transport buffers from which to acquire an unused context 868 * @r_xprt: controlling transport instance
871 * 869 *
872 * Returns pointer to a free send completion context; or NULL if 870 * Returns pointer to a free send completion context; or NULL if
873 * the queue is empty. 871 * the queue is empty.
874 * 872 *
875 * Usage: Called to acquire an SGE array before preparing a Send WR. 873 * Usage: Called to acquire an SGE array before preparing a Send WR.
876 * 874 *
877 * The caller serializes calls to this function (per rpcrdma_buffer), 875 * The caller serializes calls to this function (per transport), and
878 * and provides an effective memory barrier that flushes the new value 876 * provides an effective memory barrier that flushes the new value
879 * of rb_sc_head. 877 * of rb_sc_head.
880 */ 878 */
881struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf) 879struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt)
882{ 880{
883 struct rpcrdma_xprt *r_xprt; 881 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
884 struct rpcrdma_sendctx *sc; 882 struct rpcrdma_sendctx *sc;
885 unsigned long next_head; 883 unsigned long next_head;
886 884
@@ -905,7 +903,6 @@ out_emptyq:
905 * backing up. Cause the caller to pause and try again. 903 * backing up. Cause the caller to pause and try again.
906 */ 904 */
907 set_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags); 905 set_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags);
908 r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf);
909 r_xprt->rx_stats.empty_sendctx_q++; 906 r_xprt->rx_stats.empty_sendctx_q++;
910 return NULL; 907 return NULL;
911} 908}
@@ -917,7 +914,7 @@ out_emptyq:
917 * Usage: Called from Send completion to return a sendctxt 914 * Usage: Called from Send completion to return a sendctxt
918 * to the queue. 915 * to the queue.
919 * 916 *
920 * The caller serializes calls to this function (per rpcrdma_buffer). 917 * The caller serializes calls to this function (per transport).
921 */ 918 */
922static void 919static void
923rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc) 920rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
@@ -925,7 +922,7 @@ rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
925 struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf; 922 struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf;
926 unsigned long next_tail; 923 unsigned long next_tail;
927 924
928 /* Unmap SGEs of previously completed by unsignaled 925 /* Unmap SGEs of previously completed but unsignaled
929 * Sends by walking up the queue until @sc is found. 926 * Sends by walking up the queue until @sc is found.
930 */ 927 */
931 next_tail = buf->rb_sc_tail; 928 next_tail = buf->rb_sc_tail;
@@ -933,7 +930,7 @@ rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
933 next_tail = rpcrdma_sendctx_next(buf, next_tail); 930 next_tail = rpcrdma_sendctx_next(buf, next_tail);
934 931
935 /* ORDER: item must be accessed _before_ tail is updated */ 932 /* ORDER: item must be accessed _before_ tail is updated */
936 rpcrdma_unmap_sendctx(buf->rb_sc_ctxs[next_tail]); 933 rpcrdma_sendctx_unmap(buf->rb_sc_ctxs[next_tail]);
937 934
938 } while (buf->rb_sc_ctxs[next_tail] != sc); 935 } while (buf->rb_sc_ctxs[next_tail] != sc);
939 936
@@ -996,54 +993,70 @@ rpcrdma_mr_refresh_worker(struct work_struct *work)
996 rpcrdma_mrs_create(r_xprt); 993 rpcrdma_mrs_create(r_xprt);
997} 994}
998 995
999struct rpcrdma_req * 996/**
1000rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) 997 * rpcrdma_req_create - Allocate an rpcrdma_req object
998 * @r_xprt: controlling r_xprt
999 * @size: initial size, in bytes, of send and receive buffers
1000 * @flags: GFP flags passed to memory allocators
1001 *
1002 * Returns an allocated and fully initialized rpcrdma_req or NULL.
1003 */
1004struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
1005 gfp_t flags)
1001{ 1006{
1002 struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; 1007 struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
1003 struct rpcrdma_regbuf *rb; 1008 struct rpcrdma_regbuf *rb;
1004 struct rpcrdma_req *req; 1009 struct rpcrdma_req *req;
1005 1010
1006 req = kzalloc(sizeof(*req), GFP_KERNEL); 1011 req = kzalloc(sizeof(*req), flags);
1007 if (req == NULL) 1012 if (req == NULL)
1008 return ERR_PTR(-ENOMEM); 1013 goto out1;
1009 1014
1010 rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, 1015 rb = rpcrdma_regbuf_alloc(RPCRDMA_HDRBUF_SIZE, DMA_TO_DEVICE, flags);
1011 DMA_TO_DEVICE, GFP_KERNEL); 1016 if (!rb)
1012 if (IS_ERR(rb)) { 1017 goto out2;
1013 kfree(req);
1014 return ERR_PTR(-ENOMEM);
1015 }
1016 req->rl_rdmabuf = rb; 1018 req->rl_rdmabuf = rb;
1017 xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb)); 1019 xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb));
1020
1021 req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE, flags);
1022 if (!req->rl_sendbuf)
1023 goto out3;
1024
1025 req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE, flags);
1026 if (!req->rl_recvbuf)
1027 goto out4;
1028
1018 req->rl_buffer = buffer; 1029 req->rl_buffer = buffer;
1019 INIT_LIST_HEAD(&req->rl_registered); 1030 INIT_LIST_HEAD(&req->rl_registered);
1020
1021 spin_lock(&buffer->rb_lock); 1031 spin_lock(&buffer->rb_lock);
1022 list_add(&req->rl_all, &buffer->rb_allreqs); 1032 list_add(&req->rl_all, &buffer->rb_allreqs);
1023 spin_unlock(&buffer->rb_lock); 1033 spin_unlock(&buffer->rb_lock);
1024 return req; 1034 return req;
1035
1036out4:
1037 kfree(req->rl_sendbuf);
1038out3:
1039 kfree(req->rl_rdmabuf);
1040out2:
1041 kfree(req);
1042out1:
1043 return NULL;
1025} 1044}
1026 1045
1027static int 1046static bool rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, bool temp)
1028rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp)
1029{ 1047{
1030 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
1031 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1048 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1032 struct rpcrdma_rep *rep; 1049 struct rpcrdma_rep *rep;
1033 int rc;
1034 1050
1035 rc = -ENOMEM;
1036 rep = kzalloc(sizeof(*rep), GFP_KERNEL); 1051 rep = kzalloc(sizeof(*rep), GFP_KERNEL);
1037 if (rep == NULL) 1052 if (rep == NULL)
1038 goto out; 1053 goto out;
1039 1054
1040 rep->rr_rdmabuf = rpcrdma_alloc_regbuf(cdata->inline_rsize, 1055 rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep.rep_inline_recv,
1041 DMA_FROM_DEVICE, GFP_KERNEL); 1056 DMA_FROM_DEVICE, GFP_KERNEL);
1042 if (IS_ERR(rep->rr_rdmabuf)) { 1057 if (!rep->rr_rdmabuf)
1043 rc = PTR_ERR(rep->rr_rdmabuf);
1044 goto out_free; 1058 goto out_free;
1045 } 1059 xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf),
1046 xdr_buf_init(&rep->rr_hdrbuf, rep->rr_rdmabuf->rg_base,
1047 rdmab_length(rep->rr_rdmabuf)); 1060 rdmab_length(rep->rr_rdmabuf));
1048 1061
1049 rep->rr_cqe.done = rpcrdma_wc_receive; 1062 rep->rr_cqe.done = rpcrdma_wc_receive;
@@ -1058,22 +1071,27 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp)
1058 spin_lock(&buf->rb_lock); 1071 spin_lock(&buf->rb_lock);
1059 list_add(&rep->rr_list, &buf->rb_recv_bufs); 1072 list_add(&rep->rr_list, &buf->rb_recv_bufs);
1060 spin_unlock(&buf->rb_lock); 1073 spin_unlock(&buf->rb_lock);
1061 return 0; 1074 return true;
1062 1075
1063out_free: 1076out_free:
1064 kfree(rep); 1077 kfree(rep);
1065out: 1078out:
1066 return rc; 1079 return false;
1067} 1080}
1068 1081
1069int 1082/**
1070rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) 1083 * rpcrdma_buffer_create - Create initial set of req/rep objects
1084 * @r_xprt: transport instance to (re)initialize
1085 *
1086 * Returns zero on success, otherwise a negative errno.
1087 */
1088int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1071{ 1089{
1072 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1090 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1073 int i, rc; 1091 int i, rc;
1074 1092
1075 buf->rb_flags = 0; 1093 buf->rb_flags = 0;
1076 buf->rb_max_requests = r_xprt->rx_data.max_requests; 1094 buf->rb_max_requests = r_xprt->rx_ep.rep_max_requests;
1077 buf->rb_bc_srv_max_requests = 0; 1095 buf->rb_bc_srv_max_requests = 0;
1078 spin_lock_init(&buf->rb_mrlock); 1096 spin_lock_init(&buf->rb_mrlock);
1079 spin_lock_init(&buf->rb_lock); 1097 spin_lock_init(&buf->rb_lock);
@@ -1086,16 +1104,15 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1086 1104
1087 INIT_LIST_HEAD(&buf->rb_send_bufs); 1105 INIT_LIST_HEAD(&buf->rb_send_bufs);
1088 INIT_LIST_HEAD(&buf->rb_allreqs); 1106 INIT_LIST_HEAD(&buf->rb_allreqs);
1107
1108 rc = -ENOMEM;
1089 for (i = 0; i < buf->rb_max_requests; i++) { 1109 for (i = 0; i < buf->rb_max_requests; i++) {
1090 struct rpcrdma_req *req; 1110 struct rpcrdma_req *req;
1091 1111
1092 req = rpcrdma_create_req(r_xprt); 1112 req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE,
1093 if (IS_ERR(req)) { 1113 GFP_KERNEL);
1094 dprintk("RPC: %s: request buffer %d alloc" 1114 if (!req)
1095 " failed\n", __func__, i);
1096 rc = PTR_ERR(req);
1097 goto out; 1115 goto out;
1098 }
1099 list_add(&req->rl_list, &buf->rb_send_bufs); 1116 list_add(&req->rl_list, &buf->rb_send_bufs);
1100 } 1117 }
1101 1118
@@ -1121,10 +1138,9 @@ out:
1121 return rc; 1138 return rc;
1122} 1139}
1123 1140
1124static void 1141static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
1125rpcrdma_destroy_rep(struct rpcrdma_rep *rep)
1126{ 1142{
1127 rpcrdma_free_regbuf(rep->rr_rdmabuf); 1143 rpcrdma_regbuf_free(rep->rr_rdmabuf);
1128 kfree(rep); 1144 kfree(rep);
1129} 1145}
1130 1146
@@ -1140,9 +1156,9 @@ rpcrdma_req_destroy(struct rpcrdma_req *req)
1140{ 1156{
1141 list_del(&req->rl_all); 1157 list_del(&req->rl_all);
1142 1158
1143 rpcrdma_free_regbuf(req->rl_recvbuf); 1159 rpcrdma_regbuf_free(req->rl_recvbuf);
1144 rpcrdma_free_regbuf(req->rl_sendbuf); 1160 rpcrdma_regbuf_free(req->rl_sendbuf);
1145 rpcrdma_free_regbuf(req->rl_rdmabuf); 1161 rpcrdma_regbuf_free(req->rl_rdmabuf);
1146 kfree(req); 1162 kfree(req);
1147} 1163}
1148 1164
@@ -1180,7 +1196,7 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
1180 * rpcrdma_buffer_destroy - Release all hw resources 1196 * rpcrdma_buffer_destroy - Release all hw resources
1181 * @buf: root control block for resources 1197 * @buf: root control block for resources
1182 * 1198 *
1183 * ORDERING: relies on a prior ib_drain_qp : 1199 * ORDERING: relies on a prior rpcrdma_xprt_drain :
1184 * - No more Send or Receive completions can occur 1200 * - No more Send or Receive completions can occur
1185 * - All MRs, reps, and reqs are returned to their free lists 1201 * - All MRs, reps, and reqs are returned to their free lists
1186 */ 1202 */
@@ -1202,7 +1218,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1202 rep = list_first_entry(&buf->rb_recv_bufs, 1218 rep = list_first_entry(&buf->rb_recv_bufs,
1203 struct rpcrdma_rep, rr_list); 1219 struct rpcrdma_rep, rr_list);
1204 list_del(&rep->rr_list); 1220 list_del(&rep->rr_list);
1205 rpcrdma_destroy_rep(rep); 1221 rpcrdma_rep_destroy(rep);
1206 } 1222 }
1207 1223
1208 while (!list_empty(&buf->rb_send_bufs)) { 1224 while (!list_empty(&buf->rb_send_bufs)) {
@@ -1281,7 +1297,7 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
1281 1297
1282 if (mr->mr_dir != DMA_NONE) { 1298 if (mr->mr_dir != DMA_NONE) {
1283 trace_xprtrdma_mr_unmap(mr); 1299 trace_xprtrdma_mr_unmap(mr);
1284 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, 1300 ib_dma_unmap_sg(r_xprt->rx_ia.ri_id->device,
1285 mr->mr_sg, mr->mr_nents, mr->mr_dir); 1301 mr->mr_sg, mr->mr_nents, mr->mr_dir);
1286 mr->mr_dir = DMA_NONE; 1302 mr->mr_dir = DMA_NONE;
1287 } 1303 }
@@ -1331,7 +1347,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
1331 } 1347 }
1332 spin_unlock(&buffers->rb_lock); 1348 spin_unlock(&buffers->rb_lock);
1333 if (rep) 1349 if (rep)
1334 rpcrdma_destroy_rep(rep); 1350 rpcrdma_rep_destroy(rep);
1335} 1351}
1336 1352
1337/* 1353/*
@@ -1348,69 +1364,90 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1348 list_add(&rep->rr_list, &buffers->rb_recv_bufs); 1364 list_add(&rep->rr_list, &buffers->rb_recv_bufs);
1349 spin_unlock(&buffers->rb_lock); 1365 spin_unlock(&buffers->rb_lock);
1350 } else { 1366 } else {
1351 rpcrdma_destroy_rep(rep); 1367 rpcrdma_rep_destroy(rep);
1352 } 1368 }
1353} 1369}
1354 1370
1355/** 1371/* Returns a pointer to a rpcrdma_regbuf object, or NULL.
1356 * rpcrdma_alloc_regbuf - allocate and DMA-map memory for SEND/RECV buffers
1357 * @size: size of buffer to be allocated, in bytes
1358 * @direction: direction of data movement
1359 * @flags: GFP flags
1360 *
1361 * Returns an ERR_PTR, or a pointer to a regbuf, a buffer that
1362 * can be persistently DMA-mapped for I/O.
1363 * 1372 *
1364 * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for 1373 * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
1365 * receiving the payload of RDMA RECV operations. During Long Calls 1374 * receiving the payload of RDMA RECV operations. During Long Calls
1366 * or Replies they may be registered externally via frwr_map. 1375 * or Replies they may be registered externally via frwr_map.
1367 */ 1376 */
1368struct rpcrdma_regbuf * 1377static struct rpcrdma_regbuf *
1369rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction, 1378rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
1370 gfp_t flags) 1379 gfp_t flags)
1371{ 1380{
1372 struct rpcrdma_regbuf *rb; 1381 struct rpcrdma_regbuf *rb;
1373 1382
1374 rb = kmalloc(sizeof(*rb) + size, flags); 1383 rb = kmalloc(sizeof(*rb), flags);
1375 if (rb == NULL) 1384 if (!rb)
1376 return ERR_PTR(-ENOMEM); 1385 return NULL;
1386 rb->rg_data = kmalloc(size, flags);
1387 if (!rb->rg_data) {
1388 kfree(rb);
1389 return NULL;
1390 }
1377 1391
1378 rb->rg_device = NULL; 1392 rb->rg_device = NULL;
1379 rb->rg_direction = direction; 1393 rb->rg_direction = direction;
1380 rb->rg_iov.length = size; 1394 rb->rg_iov.length = size;
1381
1382 return rb; 1395 return rb;
1383} 1396}
1384 1397
1385/** 1398/**
1386 * __rpcrdma_map_regbuf - DMA-map a regbuf 1399 * rpcrdma_regbuf_realloc - re-allocate a SEND/RECV buffer
1387 * @ia: controlling rpcrdma_ia 1400 * @rb: regbuf to reallocate
1401 * @size: size of buffer to be allocated, in bytes
1402 * @flags: GFP flags
1403 *
1404 * Returns true if reallocation was successful. If false is
1405 * returned, @rb is left untouched.
1406 */
1407bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags)
1408{
1409 void *buf;
1410
1411 buf = kmalloc(size, flags);
1412 if (!buf)
1413 return false;
1414
1415 rpcrdma_regbuf_dma_unmap(rb);
1416 kfree(rb->rg_data);
1417
1418 rb->rg_data = buf;
1419 rb->rg_iov.length = size;
1420 return true;
1421}
1422
1423/**
1424 * __rpcrdma_regbuf_dma_map - DMA-map a regbuf
1425 * @r_xprt: controlling transport instance
1388 * @rb: regbuf to be mapped 1426 * @rb: regbuf to be mapped
1427 *
1428 * Returns true if the buffer is now DMA mapped to @r_xprt's device
1389 */ 1429 */
1390bool 1430bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt,
1391__rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) 1431 struct rpcrdma_regbuf *rb)
1392{ 1432{
1393 struct ib_device *device = ia->ri_device; 1433 struct ib_device *device = r_xprt->rx_ia.ri_id->device;
1394 1434
1395 if (rb->rg_direction == DMA_NONE) 1435 if (rb->rg_direction == DMA_NONE)
1396 return false; 1436 return false;
1397 1437
1398 rb->rg_iov.addr = ib_dma_map_single(device, 1438 rb->rg_iov.addr = ib_dma_map_single(device, rdmab_data(rb),
1399 (void *)rb->rg_base, 1439 rdmab_length(rb), rb->rg_direction);
1400 rdmab_length(rb),
1401 rb->rg_direction);
1402 if (ib_dma_mapping_error(device, rdmab_addr(rb))) { 1440 if (ib_dma_mapping_error(device, rdmab_addr(rb))) {
1403 trace_xprtrdma_dma_maperr(rdmab_addr(rb)); 1441 trace_xprtrdma_dma_maperr(rdmab_addr(rb));
1404 return false; 1442 return false;
1405 } 1443 }
1406 1444
1407 rb->rg_device = device; 1445 rb->rg_device = device;
1408 rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey; 1446 rb->rg_iov.lkey = r_xprt->rx_ia.ri_pd->local_dma_lkey;
1409 return true; 1447 return true;
1410} 1448}
1411 1449
1412static void 1450static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb)
1413rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb)
1414{ 1451{
1415 if (!rb) 1452 if (!rb)
1416 return; 1453 return;
@@ -1418,19 +1455,16 @@ rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb)
1418 if (!rpcrdma_regbuf_is_mapped(rb)) 1455 if (!rpcrdma_regbuf_is_mapped(rb))
1419 return; 1456 return;
1420 1457
1421 ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb), 1458 ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb), rdmab_length(rb),
1422 rdmab_length(rb), rb->rg_direction); 1459 rb->rg_direction);
1423 rb->rg_device = NULL; 1460 rb->rg_device = NULL;
1424} 1461}
1425 1462
1426/** 1463static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb)
1427 * rpcrdma_free_regbuf - deregister and free registered buffer
1428 * @rb: regbuf to be deregistered and freed
1429 */
1430void
1431rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb)
1432{ 1464{
1433 rpcrdma_dma_unmap_regbuf(rb); 1465 rpcrdma_regbuf_dma_unmap(rb);
1466 if (rb)
1467 kfree(rb->rg_data);
1434 kfree(rb); 1468 kfree(rb);
1435} 1469}
1436 1470
@@ -1497,17 +1531,15 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
1497 list_del(&rep->rr_list); 1531 list_del(&rep->rr_list);
1498 spin_unlock(&buf->rb_lock); 1532 spin_unlock(&buf->rb_lock);
1499 if (!rep) { 1533 if (!rep) {
1500 if (rpcrdma_create_rep(r_xprt, temp)) 1534 if (!rpcrdma_rep_create(r_xprt, temp))
1501 break; 1535 break;
1502 continue; 1536 continue;
1503 } 1537 }
1504 1538
1505 rb = rep->rr_rdmabuf; 1539 rb = rep->rr_rdmabuf;
1506 if (!rpcrdma_regbuf_is_mapped(rb)) { 1540 if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) {
1507 if (!__rpcrdma_dma_map_regbuf(&r_xprt->rx_ia, rb)) { 1541 rpcrdma_recv_buffer_put(rep);
1508 rpcrdma_recv_buffer_put(rep); 1542 break;
1509 break;
1510 }
1511 } 1543 }
1512 1544
1513 trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe); 1545 trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 10f6593e1a6a..d1e0749bcbc4 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -66,20 +66,17 @@
66 * Interface Adapter -- one per transport instance 66 * Interface Adapter -- one per transport instance
67 */ 67 */
68struct rpcrdma_ia { 68struct rpcrdma_ia {
69 struct ib_device *ri_device;
70 struct rdma_cm_id *ri_id; 69 struct rdma_cm_id *ri_id;
71 struct ib_pd *ri_pd; 70 struct ib_pd *ri_pd;
72 struct completion ri_done;
73 struct completion ri_remove_done;
74 int ri_async_rc; 71 int ri_async_rc;
75 unsigned int ri_max_segs; 72 unsigned int ri_max_segs;
76 unsigned int ri_max_frwr_depth; 73 unsigned int ri_max_frwr_depth;
77 unsigned int ri_max_inline_write;
78 unsigned int ri_max_inline_read;
79 unsigned int ri_max_send_sges; 74 unsigned int ri_max_send_sges;
80 bool ri_implicit_roundup; 75 bool ri_implicit_roundup;
81 enum ib_mr_type ri_mrtype; 76 enum ib_mr_type ri_mrtype;
82 unsigned long ri_flags; 77 unsigned long ri_flags;
78 struct completion ri_done;
79 struct completion ri_remove_done;
83}; 80};
84 81
85enum { 82enum {
@@ -93,22 +90,29 @@ enum {
93struct rpcrdma_ep { 90struct rpcrdma_ep {
94 unsigned int rep_send_count; 91 unsigned int rep_send_count;
95 unsigned int rep_send_batch; 92 unsigned int rep_send_batch;
93 unsigned int rep_max_inline_send;
94 unsigned int rep_max_inline_recv;
96 int rep_connected; 95 int rep_connected;
97 struct ib_qp_init_attr rep_attr; 96 struct ib_qp_init_attr rep_attr;
98 wait_queue_head_t rep_connect_wait; 97 wait_queue_head_t rep_connect_wait;
99 struct rpcrdma_connect_private rep_cm_private; 98 struct rpcrdma_connect_private rep_cm_private;
100 struct rdma_conn_param rep_remote_cma; 99 struct rdma_conn_param rep_remote_cma;
100 unsigned int rep_max_requests; /* set by /proc */
101 unsigned int rep_inline_send; /* negotiated */
102 unsigned int rep_inline_recv; /* negotiated */
101 int rep_receive_count; 103 int rep_receive_count;
102}; 104};
103 105
104/* Pre-allocate extra Work Requests for handling backward receives 106/* Pre-allocate extra Work Requests for handling backward receives
105 * and sends. This is a fixed value because the Work Queues are 107 * and sends. This is a fixed value because the Work Queues are
106 * allocated when the forward channel is set up. 108 * allocated when the forward channel is set up, long before the
109 * backchannel is provisioned. This value is two times
110 * NFS4_DEF_CB_SLOT_TABLE_SIZE.
107 */ 111 */
108#if defined(CONFIG_SUNRPC_BACKCHANNEL) 112#if defined(CONFIG_SUNRPC_BACKCHANNEL)
109#define RPCRDMA_BACKWARD_WRS (8) 113#define RPCRDMA_BACKWARD_WRS (32)
110#else 114#else
111#define RPCRDMA_BACKWARD_WRS (0) 115#define RPCRDMA_BACKWARD_WRS (0)
112#endif 116#endif
113 117
114/* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV 118/* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV
@@ -121,33 +125,34 @@ struct rpcrdma_regbuf {
121 struct ib_sge rg_iov; 125 struct ib_sge rg_iov;
122 struct ib_device *rg_device; 126 struct ib_device *rg_device;
123 enum dma_data_direction rg_direction; 127 enum dma_data_direction rg_direction;
124 __be32 rg_base[0] __attribute__ ((aligned(256))); 128 void *rg_data;
125}; 129};
126 130
127static inline u64 131static inline u64 rdmab_addr(struct rpcrdma_regbuf *rb)
128rdmab_addr(struct rpcrdma_regbuf *rb)
129{ 132{
130 return rb->rg_iov.addr; 133 return rb->rg_iov.addr;
131} 134}
132 135
133static inline u32 136static inline u32 rdmab_length(struct rpcrdma_regbuf *rb)
134rdmab_length(struct rpcrdma_regbuf *rb)
135{ 137{
136 return rb->rg_iov.length; 138 return rb->rg_iov.length;
137} 139}
138 140
139static inline u32 141static inline u32 rdmab_lkey(struct rpcrdma_regbuf *rb)
140rdmab_lkey(struct rpcrdma_regbuf *rb)
141{ 142{
142 return rb->rg_iov.lkey; 143 return rb->rg_iov.lkey;
143} 144}
144 145
145static inline struct ib_device * 146static inline struct ib_device *rdmab_device(struct rpcrdma_regbuf *rb)
146rdmab_device(struct rpcrdma_regbuf *rb)
147{ 147{
148 return rb->rg_device; 148 return rb->rg_device;
149} 149}
150 150
151static inline void *rdmab_data(const struct rpcrdma_regbuf *rb)
152{
153 return rb->rg_data;
154}
155
151#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN) 156#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN)
152 157
153/* To ensure a transport can always make forward progress, 158/* To ensure a transport can always make forward progress,
@@ -222,34 +227,18 @@ struct rpcrdma_xprt;
222struct rpcrdma_sendctx { 227struct rpcrdma_sendctx {
223 struct ib_send_wr sc_wr; 228 struct ib_send_wr sc_wr;
224 struct ib_cqe sc_cqe; 229 struct ib_cqe sc_cqe;
230 struct ib_device *sc_device;
225 struct rpcrdma_xprt *sc_xprt; 231 struct rpcrdma_xprt *sc_xprt;
226 struct rpcrdma_req *sc_req; 232 struct rpcrdma_req *sc_req;
227 unsigned int sc_unmap_count; 233 unsigned int sc_unmap_count;
228 struct ib_sge sc_sges[]; 234 struct ib_sge sc_sges[];
229}; 235};
230 236
231/* Limit the number of SGEs that can be unmapped during one
232 * Send completion. This caps the amount of work a single
233 * completion can do before returning to the provider.
234 *
235 * Setting this to zero disables Send completion batching.
236 */
237enum {
238 RPCRDMA_MAX_SEND_BATCH = 7,
239};
240
241/* 237/*
242 * struct rpcrdma_mr - external memory region metadata 238 * struct rpcrdma_mr - external memory region metadata
243 * 239 *
244 * An external memory region is any buffer or page that is registered 240 * An external memory region is any buffer or page that is registered
245 * on the fly (ie, not pre-registered). 241 * on the fly (ie, not pre-registered).
246 *
247 * Each rpcrdma_buffer has a list of free MWs anchored in rb_mrs. During
248 * call_allocate, rpcrdma_buffer_get() assigns one to each segment in
249 * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep
250 * track of registration metadata while each RPC is pending.
251 * rpcrdma_deregister_external() uses this metadata to unmap and
252 * release these resources when an RPC is complete.
253 */ 242 */
254enum rpcrdma_frwr_state { 243enum rpcrdma_frwr_state {
255 FRWR_IS_INVALID, /* ready to be used */ 244 FRWR_IS_INVALID, /* ready to be used */
@@ -419,20 +408,6 @@ enum {
419}; 408};
420 409
421/* 410/*
422 * Internal structure for transport instance creation. This
423 * exists primarily for modularity.
424 *
425 * This data should be set with mount options
426 */
427struct rpcrdma_create_data_internal {
428 unsigned int max_requests; /* max requests (slots) in flight */
429 unsigned int rsize; /* mount rsize - max read hdr+data */
430 unsigned int wsize; /* mount wsize - max write hdr+data */
431 unsigned int inline_rsize; /* max non-rdma read data payload */
432 unsigned int inline_wsize; /* max non-rdma write data payload */
433};
434
435/*
436 * Statistics for RPCRDMA 411 * Statistics for RPCRDMA
437 */ 412 */
438struct rpcrdma_stats { 413struct rpcrdma_stats {
@@ -476,13 +451,11 @@ struct rpcrdma_xprt {
476 struct rpcrdma_ia rx_ia; 451 struct rpcrdma_ia rx_ia;
477 struct rpcrdma_ep rx_ep; 452 struct rpcrdma_ep rx_ep;
478 struct rpcrdma_buffer rx_buf; 453 struct rpcrdma_buffer rx_buf;
479 struct rpcrdma_create_data_internal rx_data;
480 struct delayed_work rx_connect_worker; 454 struct delayed_work rx_connect_worker;
481 struct rpcrdma_stats rx_stats; 455 struct rpcrdma_stats rx_stats;
482}; 456};
483 457
484#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt) 458#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
485#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
486 459
487static inline const char * 460static inline const char *
488rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt) 461rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt)
@@ -516,9 +489,8 @@ void rpcrdma_ia_close(struct rpcrdma_ia *);
516/* 489/*
517 * Endpoint calls - xprtrdma/verbs.c 490 * Endpoint calls - xprtrdma/verbs.c
518 */ 491 */
519int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, 492int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt);
520 struct rpcrdma_create_data_internal *); 493void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt);
521void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
522int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); 494int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
523void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); 495void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
524 496
@@ -528,11 +500,12 @@ int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
528/* 500/*
529 * Buffer calls - xprtrdma/verbs.c 501 * Buffer calls - xprtrdma/verbs.c
530 */ 502 */
531struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); 503struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
504 gfp_t flags);
532void rpcrdma_req_destroy(struct rpcrdma_req *req); 505void rpcrdma_req_destroy(struct rpcrdma_req *req);
533int rpcrdma_buffer_create(struct rpcrdma_xprt *); 506int rpcrdma_buffer_create(struct rpcrdma_xprt *);
534void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); 507void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
535struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); 508struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt);
536 509
537struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt); 510struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
538void rpcrdma_mr_put(struct rpcrdma_mr *mr); 511void rpcrdma_mr_put(struct rpcrdma_mr *mr);
@@ -548,23 +521,34 @@ struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
548void rpcrdma_buffer_put(struct rpcrdma_req *); 521void rpcrdma_buffer_put(struct rpcrdma_req *);
549void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); 522void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
550 523
551struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, 524bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size,
552 gfp_t); 525 gfp_t flags);
553bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *); 526bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt,
554void rpcrdma_free_regbuf(struct rpcrdma_regbuf *); 527 struct rpcrdma_regbuf *rb);
555 528
556static inline bool 529/**
557rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb) 530 * rpcrdma_regbuf_is_mapped - check if buffer is DMA mapped
531 *
532 * Returns true if the buffer is now mapped to rb->rg_device.
533 */
534static inline bool rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb)
558{ 535{
559 return rb->rg_device != NULL; 536 return rb->rg_device != NULL;
560} 537}
561 538
562static inline bool 539/**
563rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) 540 * rpcrdma_regbuf_dma_map - DMA-map a regbuf
541 * @r_xprt: controlling transport instance
542 * @rb: regbuf to be mapped
543 *
544 * Returns true if the buffer is currently DMA mapped.
545 */
546static inline bool rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt,
547 struct rpcrdma_regbuf *rb)
564{ 548{
565 if (likely(rpcrdma_regbuf_is_mapped(rb))) 549 if (likely(rpcrdma_regbuf_is_mapped(rb)))
566 return true; 550 return true;
567 return __rpcrdma_dma_map_regbuf(ia, rb); 551 return __rpcrdma_regbuf_dma_map(r_xprt, rb);
568} 552}
569 553
570/* 554/*
@@ -579,9 +563,8 @@ rpcrdma_data_dir(bool writing)
579 563
580/* Memory registration calls xprtrdma/frwr_ops.c 564/* Memory registration calls xprtrdma/frwr_ops.c
581 */ 565 */
582bool frwr_is_supported(struct rpcrdma_ia *); 566bool frwr_is_supported(struct ib_device *device);
583int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, 567int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep);
584 struct rpcrdma_create_data_internal *cdata);
585int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr); 568int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr);
586void frwr_release_mr(struct rpcrdma_mr *mr); 569void frwr_release_mr(struct rpcrdma_mr *mr);
587size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt); 570size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt);
@@ -610,7 +593,7 @@ int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
610 struct rpcrdma_req *req, u32 hdrlen, 593 struct rpcrdma_req *req, u32 hdrlen,
611 struct xdr_buf *xdr, 594 struct xdr_buf *xdr,
612 enum rpcrdma_chunktype rtype); 595 enum rpcrdma_chunktype rtype);
613void rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc); 596void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc);
614int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst); 597int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
615void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); 598void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
616void rpcrdma_complete_rqst(struct rpcrdma_rep *rep); 599void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
@@ -627,7 +610,9 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
627 610
628/* RPC/RDMA module init - xprtrdma/transport.c 611/* RPC/RDMA module init - xprtrdma/transport.c
629 */ 612 */
613extern unsigned int xprt_rdma_slot_table_entries;
630extern unsigned int xprt_rdma_max_inline_read; 614extern unsigned int xprt_rdma_max_inline_read;
615extern unsigned int xprt_rdma_max_inline_write;
631void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap); 616void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
632void xprt_rdma_free_addresses(struct rpc_xprt *xprt); 617void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
633void xprt_rdma_close(struct rpc_xprt *xprt); 618void xprt_rdma_close(struct rpc_xprt *xprt);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 732d4b57411a..c69951ed2ebc 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2017,6 +2017,7 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
2017 * we'll need to figure out how to pass a namespace to 2017 * we'll need to figure out how to pass a namespace to
2018 * connect. 2018 * connect.
2019 */ 2019 */
2020 task->tk_rpc_status = -ENOTCONN;
2020 rpc_exit(task, -ENOTCONN); 2021 rpc_exit(task, -ENOTCONN);
2021 return; 2022 return;
2022 } 2023 }
@@ -2690,7 +2691,7 @@ static const struct rpc_xprt_ops xs_local_ops = {
2690 .buf_free = rpc_free, 2691 .buf_free = rpc_free,
2691 .prepare_request = xs_stream_prepare_request, 2692 .prepare_request = xs_stream_prepare_request,
2692 .send_request = xs_local_send_request, 2693 .send_request = xs_local_send_request,
2693 .set_retrans_timeout = xprt_set_retrans_timeout_def, 2694 .wait_for_reply_request = xprt_wait_for_reply_request_def,
2694 .close = xs_close, 2695 .close = xs_close,
2695 .destroy = xs_destroy, 2696 .destroy = xs_destroy,
2696 .print_stats = xs_local_print_stats, 2697 .print_stats = xs_local_print_stats,
@@ -2710,7 +2711,7 @@ static const struct rpc_xprt_ops xs_udp_ops = {
2710 .buf_alloc = rpc_malloc, 2711 .buf_alloc = rpc_malloc,
2711 .buf_free = rpc_free, 2712 .buf_free = rpc_free,
2712 .send_request = xs_udp_send_request, 2713 .send_request = xs_udp_send_request,
2713 .set_retrans_timeout = xprt_set_retrans_timeout_rtt, 2714 .wait_for_reply_request = xprt_wait_for_reply_request_rtt,
2714 .timer = xs_udp_timer, 2715 .timer = xs_udp_timer,
2715 .release_request = xprt_release_rqst_cong, 2716 .release_request = xprt_release_rqst_cong,
2716 .close = xs_close, 2717 .close = xs_close,
@@ -2733,7 +2734,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
2733 .buf_free = rpc_free, 2734 .buf_free = rpc_free,
2734 .prepare_request = xs_stream_prepare_request, 2735 .prepare_request = xs_stream_prepare_request,
2735 .send_request = xs_tcp_send_request, 2736 .send_request = xs_tcp_send_request,
2736 .set_retrans_timeout = xprt_set_retrans_timeout_def, 2737 .wait_for_reply_request = xprt_wait_for_reply_request_def,
2737 .close = xs_tcp_shutdown, 2738 .close = xs_tcp_shutdown,
2738 .destroy = xs_destroy, 2739 .destroy = xs_destroy,
2739 .set_connect_timeout = xs_tcp_set_connect_timeout, 2740 .set_connect_timeout = xs_tcp_set_connect_timeout,
@@ -2761,7 +2762,7 @@ static const struct rpc_xprt_ops bc_tcp_ops = {
2761 .buf_alloc = bc_malloc, 2762 .buf_alloc = bc_malloc,
2762 .buf_free = bc_free, 2763 .buf_free = bc_free,
2763 .send_request = bc_send_request, 2764 .send_request = bc_send_request,
2764 .set_retrans_timeout = xprt_set_retrans_timeout_def, 2765 .wait_for_reply_request = xprt_wait_for_reply_request_def,
2765 .close = bc_close, 2766 .close = bc_close,
2766 .destroy = bc_destroy, 2767 .destroy = bc_destroy,
2767 .print_stats = xs_tcp_print_stats, 2768 .print_stats = xs_tcp_print_stats,