aboutsummaryrefslogtreecommitdiffstats
path: root/net/sunrpc
diff options
context:
space:
mode:
Diffstat (limited to 'net/sunrpc')
-rw-r--r--net/sunrpc/Kconfig22
-rw-r--r--net/sunrpc/cache.c4
-rw-r--r--net/sunrpc/clnt.c48
-rw-r--r--net/sunrpc/rpcb_clnt.c103
-rw-r--r--net/sunrpc/stats.c10
-rw-r--r--net/sunrpc/svc.c160
-rw-r--r--net/sunrpc/svc_xprt.c31
-rw-r--r--net/sunrpc/svcsock.c40
-rw-r--r--net/sunrpc/xprt.c89
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c26
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c8
-rw-r--r--net/sunrpc/xprtsock.c363
12 files changed, 508 insertions, 396 deletions
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 5592883e1e4a..afd91c78ce8e 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -17,28 +17,6 @@ config SUNRPC_XPRT_RDMA
17 17
18 If unsure, say N. 18 If unsure, say N.
19 19
20config SUNRPC_REGISTER_V4
21 bool "Register local RPC services via rpcbind v4 (EXPERIMENTAL)"
22 depends on SUNRPC && EXPERIMENTAL
23 default n
24 help
25 Sun added support for registering RPC services at an IPv6
26 address by creating two new versions of the rpcbind protocol
27 (RFC 1833).
28
29 This option enables support in the kernel RPC server for
30 registering kernel RPC services via version 4 of the rpcbind
31 protocol. If you enable this option, you must run a portmapper
32 daemon that supports rpcbind protocol version 4.
33
34 Serving NFS over IPv6 from knfsd (the kernel's NFS server)
35 requires that you enable this option and use a portmapper that
36 supports rpcbind version 4.
37
38 If unsure, say N to get traditional behavior (register kernel
39 RPC services using only rpcbind version 2). Distributions
40 using the legacy Linux portmapper daemon must say N here.
41
42config RPCSEC_GSS_KRB5 20config RPCSEC_GSS_KRB5
43 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" 21 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
44 depends on SUNRPC && EXPERIMENTAL 22 depends on SUNRPC && EXPERIMENTAL
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 4735caad26ed..20029a79a5de 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -313,7 +313,6 @@ static int create_cache_proc_entries(struct cache_detail *cd)
313 cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc); 313 cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc);
314 if (cd->proc_ent == NULL) 314 if (cd->proc_ent == NULL)
315 goto out_nomem; 315 goto out_nomem;
316 cd->proc_ent->owner = cd->owner;
317 cd->channel_ent = cd->content_ent = NULL; 316 cd->channel_ent = cd->content_ent = NULL;
318 317
319 p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR, 318 p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR,
@@ -321,7 +320,6 @@ static int create_cache_proc_entries(struct cache_detail *cd)
321 cd->flush_ent = p; 320 cd->flush_ent = p;
322 if (p == NULL) 321 if (p == NULL)
323 goto out_nomem; 322 goto out_nomem;
324 p->owner = cd->owner;
325 323
326 if (cd->cache_request || cd->cache_parse) { 324 if (cd->cache_request || cd->cache_parse) {
327 p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, 325 p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR,
@@ -329,7 +327,6 @@ static int create_cache_proc_entries(struct cache_detail *cd)
329 cd->channel_ent = p; 327 cd->channel_ent = p;
330 if (p == NULL) 328 if (p == NULL)
331 goto out_nomem; 329 goto out_nomem;
332 p->owner = cd->owner;
333 } 330 }
334 if (cd->cache_show) { 331 if (cd->cache_show) {
335 p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR, 332 p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR,
@@ -337,7 +334,6 @@ static int create_cache_proc_entries(struct cache_detail *cd)
337 cd->content_ent = p; 334 cd->content_ent = p;
338 if (p == NULL) 335 if (p == NULL)
339 goto out_nomem; 336 goto out_nomem;
340 p->owner = cd->owner;
341 } 337 }
342 return 0; 338 return 0;
343out_nomem: 339out_nomem:
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 836f15c0c4a3..5abab094441f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1032,27 +1032,20 @@ call_connect_status(struct rpc_task *task)
1032 dprint_status(task); 1032 dprint_status(task);
1033 1033
1034 task->tk_status = 0; 1034 task->tk_status = 0;
1035 if (status >= 0) { 1035 if (status >= 0 || status == -EAGAIN) {
1036 clnt->cl_stats->netreconn++; 1036 clnt->cl_stats->netreconn++;
1037 task->tk_action = call_transmit; 1037 task->tk_action = call_transmit;
1038 return; 1038 return;
1039 } 1039 }
1040 1040
1041 /* Something failed: remote service port may have changed */
1042 rpc_force_rebind(clnt);
1043
1044 switch (status) { 1041 switch (status) {
1045 case -ENOTCONN:
1046 case -EAGAIN:
1047 task->tk_action = call_bind;
1048 if (!RPC_IS_SOFT(task))
1049 return;
1050 /* if soft mounted, test if we've timed out */ 1042 /* if soft mounted, test if we've timed out */
1051 case -ETIMEDOUT: 1043 case -ETIMEDOUT:
1052 task->tk_action = call_timeout; 1044 task->tk_action = call_timeout;
1053 return; 1045 break;
1046 default:
1047 rpc_exit(task, -EIO);
1054 } 1048 }
1055 rpc_exit(task, -EIO);
1056} 1049}
1057 1050
1058/* 1051/*
@@ -1105,14 +1098,26 @@ static void
1105call_transmit_status(struct rpc_task *task) 1098call_transmit_status(struct rpc_task *task)
1106{ 1099{
1107 task->tk_action = call_status; 1100 task->tk_action = call_status;
1108 /* 1101 switch (task->tk_status) {
1109 * Special case: if we've been waiting on the socket's write_space() 1102 case -EAGAIN:
1110 * callback, then don't call xprt_end_transmit(). 1103 break;
1111 */ 1104 default:
1112 if (task->tk_status == -EAGAIN) 1105 xprt_end_transmit(task);
1113 return; 1106 /*
1114 xprt_end_transmit(task); 1107 * Special cases: if we've been waiting on the
1115 rpc_task_force_reencode(task); 1108 * socket's write_space() callback, or if the
1109 * socket just returned a connection error,
1110 * then hold onto the transport lock.
1111 */
1112 case -ECONNREFUSED:
1113 case -ECONNRESET:
1114 case -ENOTCONN:
1115 case -EHOSTDOWN:
1116 case -EHOSTUNREACH:
1117 case -ENETUNREACH:
1118 case -EPIPE:
1119 rpc_task_force_reencode(task);
1120 }
1116} 1121}
1117 1122
1118/* 1123/*
@@ -1152,9 +1157,12 @@ call_status(struct rpc_task *task)
1152 xprt_conditional_disconnect(task->tk_xprt, 1157 xprt_conditional_disconnect(task->tk_xprt,
1153 req->rq_connect_cookie); 1158 req->rq_connect_cookie);
1154 break; 1159 break;
1160 case -ECONNRESET:
1155 case -ECONNREFUSED: 1161 case -ECONNREFUSED:
1156 case -ENOTCONN:
1157 rpc_force_rebind(clnt); 1162 rpc_force_rebind(clnt);
1163 rpc_delay(task, 3*HZ);
1164 case -EPIPE:
1165 case -ENOTCONN:
1158 task->tk_action = call_bind; 1166 task->tk_action = call_bind;
1159 break; 1167 break;
1160 case -EAGAIN: 1168 case -EAGAIN:
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 03ae007641e4..beee6da33035 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -63,9 +63,16 @@ enum {
63 * r_owner 63 * r_owner
64 * 64 *
65 * The "owner" is allowed to unset a service in the rpcbind database. 65 * The "owner" is allowed to unset a service in the rpcbind database.
66 * We always use the following (arbitrary) fixed string. 66 *
67 * For AF_LOCAL SET/UNSET requests, rpcbind treats this string as a
68 * UID which it maps to a local user name via a password lookup.
69 * In all other cases it is ignored.
70 *
71 * For SET/UNSET requests, user space provides a value, even for
72 * network requests, and GETADDR uses an empty string. We follow
73 * those precedents here.
67 */ 74 */
68#define RPCB_OWNER_STRING "rpcb" 75#define RPCB_OWNER_STRING "0"
69#define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) 76#define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING)
70 77
71static void rpcb_getport_done(struct rpc_task *, void *); 78static void rpcb_getport_done(struct rpc_task *, void *);
@@ -124,12 +131,6 @@ static const struct sockaddr_in rpcb_inaddr_loopback = {
124 .sin_port = htons(RPCBIND_PORT), 131 .sin_port = htons(RPCBIND_PORT),
125}; 132};
126 133
127static const struct sockaddr_in6 rpcb_in6addr_loopback = {
128 .sin6_family = AF_INET6,
129 .sin6_addr = IN6ADDR_LOOPBACK_INIT,
130 .sin6_port = htons(RPCBIND_PORT),
131};
132
133static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr, 134static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr,
134 size_t addrlen, u32 version) 135 size_t addrlen, u32 version)
135{ 136{
@@ -176,9 +177,10 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
176 return rpc_create(&args); 177 return rpc_create(&args);
177} 178}
178 179
179static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, 180static int rpcb_register_call(const u32 version, struct rpc_message *msg)
180 u32 version, struct rpc_message *msg)
181{ 181{
182 struct sockaddr *addr = (struct sockaddr *)&rpcb_inaddr_loopback;
183 size_t addrlen = sizeof(rpcb_inaddr_loopback);
182 struct rpc_clnt *rpcb_clnt; 184 struct rpc_clnt *rpcb_clnt;
183 int result, error = 0; 185 int result, error = 0;
184 186
@@ -192,7 +194,7 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
192 error = PTR_ERR(rpcb_clnt); 194 error = PTR_ERR(rpcb_clnt);
193 195
194 if (error < 0) { 196 if (error < 0) {
195 printk(KERN_WARNING "RPC: failed to contact local rpcbind " 197 dprintk("RPC: failed to contact local rpcbind "
196 "server (errno %d).\n", -error); 198 "server (errno %d).\n", -error);
197 return error; 199 return error;
198 } 200 }
@@ -254,25 +256,23 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port)
254 if (port) 256 if (port)
255 msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; 257 msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET];
256 258
257 return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, 259 return rpcb_register_call(RPCBVERS_2, &msg);
258 sizeof(rpcb_inaddr_loopback),
259 RPCBVERS_2, &msg);
260} 260}
261 261
262/* 262/*
263 * Fill in AF_INET family-specific arguments to register 263 * Fill in AF_INET family-specific arguments to register
264 */ 264 */
265static int rpcb_register_netid4(struct sockaddr_in *address_to_register, 265static int rpcb_register_inet4(const struct sockaddr *sap,
266 struct rpc_message *msg) 266 struct rpc_message *msg)
267{ 267{
268 const struct sockaddr_in *sin = (const struct sockaddr_in *)sap;
268 struct rpcbind_args *map = msg->rpc_argp; 269 struct rpcbind_args *map = msg->rpc_argp;
269 unsigned short port = ntohs(address_to_register->sin_port); 270 unsigned short port = ntohs(sin->sin_port);
270 char buf[32]; 271 char buf[32];
271 272
272 /* Construct AF_INET universal address */ 273 /* Construct AF_INET universal address */
273 snprintf(buf, sizeof(buf), "%pI4.%u.%u", 274 snprintf(buf, sizeof(buf), "%pI4.%u.%u",
274 &address_to_register->sin_addr.s_addr, 275 &sin->sin_addr.s_addr, port >> 8, port & 0xff);
275 port >> 8, port & 0xff);
276 map->r_addr = buf; 276 map->r_addr = buf;
277 277
278 dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " 278 dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with "
@@ -284,29 +284,27 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register,
284 if (port) 284 if (port)
285 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; 285 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
286 286
287 return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, 287 return rpcb_register_call(RPCBVERS_4, msg);
288 sizeof(rpcb_inaddr_loopback),
289 RPCBVERS_4, msg);
290} 288}
291 289
292/* 290/*
293 * Fill in AF_INET6 family-specific arguments to register 291 * Fill in AF_INET6 family-specific arguments to register
294 */ 292 */
295static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, 293static int rpcb_register_inet6(const struct sockaddr *sap,
296 struct rpc_message *msg) 294 struct rpc_message *msg)
297{ 295{
296 const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap;
298 struct rpcbind_args *map = msg->rpc_argp; 297 struct rpcbind_args *map = msg->rpc_argp;
299 unsigned short port = ntohs(address_to_register->sin6_port); 298 unsigned short port = ntohs(sin6->sin6_port);
300 char buf[64]; 299 char buf[64];
301 300
302 /* Construct AF_INET6 universal address */ 301 /* Construct AF_INET6 universal address */
303 if (ipv6_addr_any(&address_to_register->sin6_addr)) 302 if (ipv6_addr_any(&sin6->sin6_addr))
304 snprintf(buf, sizeof(buf), "::.%u.%u", 303 snprintf(buf, sizeof(buf), "::.%u.%u",
305 port >> 8, port & 0xff); 304 port >> 8, port & 0xff);
306 else 305 else
307 snprintf(buf, sizeof(buf), "%pI6.%u.%u", 306 snprintf(buf, sizeof(buf), "%pI6.%u.%u",
308 &address_to_register->sin6_addr, 307 &sin6->sin6_addr, port >> 8, port & 0xff);
309 port >> 8, port & 0xff);
310 map->r_addr = buf; 308 map->r_addr = buf;
311 309
312 dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " 310 dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with "
@@ -318,9 +316,21 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
318 if (port) 316 if (port)
319 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; 317 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
320 318
321 return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback, 319 return rpcb_register_call(RPCBVERS_4, msg);
322 sizeof(rpcb_in6addr_loopback), 320}
323 RPCBVERS_4, msg); 321
322static int rpcb_unregister_all_protofamilies(struct rpc_message *msg)
323{
324 struct rpcbind_args *map = msg->rpc_argp;
325
326 dprintk("RPC: unregistering [%u, %u, '%s'] with "
327 "local rpcbind\n",
328 map->r_prog, map->r_vers, map->r_netid);
329
330 map->r_addr = "";
331 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
332
333 return rpcb_register_call(RPCBVERS_4, msg);
324} 334}
325 335
326/** 336/**
@@ -340,10 +350,11 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
340 * invoke this function once for each [program, version, address, 350 * invoke this function once for each [program, version, address,
341 * netid] tuple they wish to advertise. 351 * netid] tuple they wish to advertise.
342 * 352 *
343 * Callers may also unregister RPC services that are no longer 353 * Callers may also unregister RPC services that are registered at a
344 * available by setting the port number in the passed-in address 354 * specific address by setting the port number in @address to zero.
345 * to zero. Callers pass a netid of "" to unregister all 355 * They may unregister all registered protocol families at once for
346 * transport netids associated with [program, version, address]. 356 * a service by passing a NULL @address argument. If @netid is ""
357 * then all netids for [program, version, address] are unregistered.
347 * 358 *
348 * This function uses rpcbind protocol version 4 to contact the 359 * This function uses rpcbind protocol version 4 to contact the
349 * local rpcbind daemon. The local rpcbind daemon must support 360 * local rpcbind daemon. The local rpcbind daemon must support
@@ -378,13 +389,14 @@ int rpcb_v4_register(const u32 program, const u32 version,
378 .rpc_argp = &map, 389 .rpc_argp = &map,
379 }; 390 };
380 391
392 if (address == NULL)
393 return rpcb_unregister_all_protofamilies(&msg);
394
381 switch (address->sa_family) { 395 switch (address->sa_family) {
382 case AF_INET: 396 case AF_INET:
383 return rpcb_register_netid4((struct sockaddr_in *)address, 397 return rpcb_register_inet4(address, &msg);
384 &msg);
385 case AF_INET6: 398 case AF_INET6:
386 return rpcb_register_netid6((struct sockaddr_in6 *)address, 399 return rpcb_register_inet6(address, &msg);
387 &msg);
388 } 400 }
389 401
390 return -EAFNOSUPPORT; 402 return -EAFNOSUPPORT;
@@ -579,7 +591,7 @@ void rpcb_getport_async(struct rpc_task *task)
579 map->r_xprt = xprt_get(xprt); 591 map->r_xprt = xprt_get(xprt);
580 map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); 592 map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
581 map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR); 593 map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR);
582 map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ 594 map->r_owner = "";
583 map->r_status = -EIO; 595 map->r_status = -EIO;
584 596
585 child = rpcb_call_async(rpcb_clnt, map, proc); 597 child = rpcb_call_async(rpcb_clnt, map, proc);
@@ -703,11 +715,16 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
703 *portp = 0; 715 *portp = 0;
704 addr_len = ntohl(*p++); 716 addr_len = ntohl(*p++);
705 717
718 if (addr_len == 0) {
719 dprintk("RPC: rpcb_decode_getaddr: "
720 "service is not registered\n");
721 return 0;
722 }
723
706 /* 724 /*
707 * Simple sanity check. The smallest possible universal 725 * Simple sanity check.
708 * address is an IPv4 address string containing 11 bytes.
709 */ 726 */
710 if (addr_len < 11 || addr_len > RPCBIND_MAXUADDRLEN) 727 if (addr_len > RPCBIND_MAXUADDRLEN)
711 goto out_err; 728 goto out_err;
712 729
713 /* 730 /*
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 085372ef4feb..1ef6e46d9da2 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -262,14 +262,8 @@ void
262rpc_proc_init(void) 262rpc_proc_init(void)
263{ 263{
264 dprintk("RPC: registering /proc/net/rpc\n"); 264 dprintk("RPC: registering /proc/net/rpc\n");
265 if (!proc_net_rpc) { 265 if (!proc_net_rpc)
266 struct proc_dir_entry *ent; 266 proc_net_rpc = proc_mkdir("rpc", init_net.proc_net);
267 ent = proc_mkdir("rpc", init_net.proc_net);
268 if (ent) {
269 ent->owner = THIS_MODULE;
270 proc_net_rpc = ent;
271 }
272 }
273} 267}
274 268
275void 269void
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index c51fed4d1af1..9f2f2412a2f3 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -312,7 +312,7 @@ svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
312 switch (m->mode) { 312 switch (m->mode) {
313 case SVC_POOL_PERCPU: 313 case SVC_POOL_PERCPU:
314 { 314 {
315 set_cpus_allowed_ptr(task, &cpumask_of_cpu(node)); 315 set_cpus_allowed_ptr(task, cpumask_of(node));
316 break; 316 break;
317 } 317 }
318 case SVC_POOL_PERNODE: 318 case SVC_POOL_PERNODE:
@@ -359,7 +359,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu)
359 */ 359 */
360static struct svc_serv * 360static struct svc_serv *
361__svc_create(struct svc_program *prog, unsigned int bufsize, int npools, 361__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
362 sa_family_t family, void (*shutdown)(struct svc_serv *serv)) 362 void (*shutdown)(struct svc_serv *serv))
363{ 363{
364 struct svc_serv *serv; 364 struct svc_serv *serv;
365 unsigned int vers; 365 unsigned int vers;
@@ -368,7 +368,6 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
368 368
369 if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) 369 if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
370 return NULL; 370 return NULL;
371 serv->sv_family = family;
372 serv->sv_name = prog->pg_name; 371 serv->sv_name = prog->pg_name;
373 serv->sv_program = prog; 372 serv->sv_program = prog;
374 serv->sv_nrthreads = 1; 373 serv->sv_nrthreads = 1;
@@ -427,21 +426,21 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
427 426
428struct svc_serv * 427struct svc_serv *
429svc_create(struct svc_program *prog, unsigned int bufsize, 428svc_create(struct svc_program *prog, unsigned int bufsize,
430 sa_family_t family, void (*shutdown)(struct svc_serv *serv)) 429 void (*shutdown)(struct svc_serv *serv))
431{ 430{
432 return __svc_create(prog, bufsize, /*npools*/1, family, shutdown); 431 return __svc_create(prog, bufsize, /*npools*/1, shutdown);
433} 432}
434EXPORT_SYMBOL_GPL(svc_create); 433EXPORT_SYMBOL_GPL(svc_create);
435 434
436struct svc_serv * 435struct svc_serv *
437svc_create_pooled(struct svc_program *prog, unsigned int bufsize, 436svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
438 sa_family_t family, void (*shutdown)(struct svc_serv *serv), 437 void (*shutdown)(struct svc_serv *serv),
439 svc_thread_fn func, struct module *mod) 438 svc_thread_fn func, struct module *mod)
440{ 439{
441 struct svc_serv *serv; 440 struct svc_serv *serv;
442 unsigned int npools = svc_pool_map_get(); 441 unsigned int npools = svc_pool_map_get();
443 442
444 serv = __svc_create(prog, bufsize, npools, family, shutdown); 443 serv = __svc_create(prog, bufsize, npools, shutdown);
445 444
446 if (serv != NULL) { 445 if (serv != NULL) {
447 serv->sv_function = func; 446 serv->sv_function = func;
@@ -719,8 +718,6 @@ svc_exit_thread(struct svc_rqst *rqstp)
719} 718}
720EXPORT_SYMBOL_GPL(svc_exit_thread); 719EXPORT_SYMBOL_GPL(svc_exit_thread);
721 720
722#ifdef CONFIG_SUNRPC_REGISTER_V4
723
724/* 721/*
725 * Register an "inet" protocol family netid with the local 722 * Register an "inet" protocol family netid with the local
726 * rpcbind daemon via an rpcbind v4 SET request. 723 * rpcbind daemon via an rpcbind v4 SET request.
@@ -735,12 +732,13 @@ static int __svc_rpcb_register4(const u32 program, const u32 version,
735 const unsigned short protocol, 732 const unsigned short protocol,
736 const unsigned short port) 733 const unsigned short port)
737{ 734{
738 struct sockaddr_in sin = { 735 const struct sockaddr_in sin = {
739 .sin_family = AF_INET, 736 .sin_family = AF_INET,
740 .sin_addr.s_addr = htonl(INADDR_ANY), 737 .sin_addr.s_addr = htonl(INADDR_ANY),
741 .sin_port = htons(port), 738 .sin_port = htons(port),
742 }; 739 };
743 char *netid; 740 const char *netid;
741 int error;
744 742
745 switch (protocol) { 743 switch (protocol) {
746 case IPPROTO_UDP: 744 case IPPROTO_UDP:
@@ -750,13 +748,23 @@ static int __svc_rpcb_register4(const u32 program, const u32 version,
750 netid = RPCBIND_NETID_TCP; 748 netid = RPCBIND_NETID_TCP;
751 break; 749 break;
752 default: 750 default:
753 return -EPROTONOSUPPORT; 751 return -ENOPROTOOPT;
754 } 752 }
755 753
756 return rpcb_v4_register(program, version, 754 error = rpcb_v4_register(program, version,
757 (struct sockaddr *)&sin, netid); 755 (const struct sockaddr *)&sin, netid);
756
757 /*
758 * User space didn't support rpcbind v4, so retry this
759 * registration request with the legacy rpcbind v2 protocol.
760 */
761 if (error == -EPROTONOSUPPORT)
762 error = rpcb_register(program, version, protocol, port);
763
764 return error;
758} 765}
759 766
767#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
760/* 768/*
761 * Register an "inet6" protocol family netid with the local 769 * Register an "inet6" protocol family netid with the local
762 * rpcbind daemon via an rpcbind v4 SET request. 770 * rpcbind daemon via an rpcbind v4 SET request.
@@ -771,12 +779,13 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
771 const unsigned short protocol, 779 const unsigned short protocol,
772 const unsigned short port) 780 const unsigned short port)
773{ 781{
774 struct sockaddr_in6 sin6 = { 782 const struct sockaddr_in6 sin6 = {
775 .sin6_family = AF_INET6, 783 .sin6_family = AF_INET6,
776 .sin6_addr = IN6ADDR_ANY_INIT, 784 .sin6_addr = IN6ADDR_ANY_INIT,
777 .sin6_port = htons(port), 785 .sin6_port = htons(port),
778 }; 786 };
779 char *netid; 787 const char *netid;
788 int error;
780 789
781 switch (protocol) { 790 switch (protocol) {
782 case IPPROTO_UDP: 791 case IPPROTO_UDP:
@@ -786,12 +795,22 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
786 netid = RPCBIND_NETID_TCP6; 795 netid = RPCBIND_NETID_TCP6;
787 break; 796 break;
788 default: 797 default:
789 return -EPROTONOSUPPORT; 798 return -ENOPROTOOPT;
790 } 799 }
791 800
792 return rpcb_v4_register(program, version, 801 error = rpcb_v4_register(program, version,
793 (struct sockaddr *)&sin6, netid); 802 (const struct sockaddr *)&sin6, netid);
803
804 /*
805 * User space didn't support rpcbind version 4, so we won't
806 * use a PF_INET6 listener.
807 */
808 if (error == -EPROTONOSUPPORT)
809 error = -EAFNOSUPPORT;
810
811 return error;
794} 812}
813#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
795 814
796/* 815/*
797 * Register a kernel RPC service via rpcbind version 4. 816 * Register a kernel RPC service via rpcbind version 4.
@@ -799,69 +818,43 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
799 * Returns zero on success; a negative errno value is returned 818 * Returns zero on success; a negative errno value is returned
800 * if any error occurs. 819 * if any error occurs.
801 */ 820 */
802static int __svc_register(const u32 program, const u32 version, 821static int __svc_register(const char *progname,
803 const sa_family_t family, 822 const u32 program, const u32 version,
823 const int family,
804 const unsigned short protocol, 824 const unsigned short protocol,
805 const unsigned short port) 825 const unsigned short port)
806{ 826{
807 int error; 827 int error = -EAFNOSUPPORT;
808 828
809 switch (family) { 829 switch (family) {
810 case AF_INET: 830 case PF_INET:
811 return __svc_rpcb_register4(program, version, 831 error = __svc_rpcb_register4(program, version,
812 protocol, port); 832 protocol, port);
813 case AF_INET6: 833 break;
834#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
835 case PF_INET6:
814 error = __svc_rpcb_register6(program, version, 836 error = __svc_rpcb_register6(program, version,
815 protocol, port); 837 protocol, port);
816 if (error < 0) 838#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
817 return error;
818
819 /*
820 * Work around bug in some versions of Linux rpcbind
821 * which don't allow registration of both inet and
822 * inet6 netids.
823 *
824 * Error return ignored for now.
825 */
826 __svc_rpcb_register4(program, version,
827 protocol, port);
828 return 0;
829 } 839 }
830 840
831 return -EAFNOSUPPORT; 841 if (error < 0)
832} 842 printk(KERN_WARNING "svc: failed to register %sv%u RPC "
833 843 "service (errno %d).\n", progname, version, -error);
834#else /* CONFIG_SUNRPC_REGISTER_V4 */ 844 return error;
835
836/*
837 * Register a kernel RPC service via rpcbind version 2.
838 *
839 * Returns zero on success; a negative errno value is returned
840 * if any error occurs.
841 */
842static int __svc_register(const u32 program, const u32 version,
843 sa_family_t family,
844 const unsigned short protocol,
845 const unsigned short port)
846{
847 if (family != AF_INET)
848 return -EAFNOSUPPORT;
849
850 return rpcb_register(program, version, protocol, port);
851} 845}
852 846
853#endif /* CONFIG_SUNRPC_REGISTER_V4 */
854
855/** 847/**
856 * svc_register - register an RPC service with the local portmapper 848 * svc_register - register an RPC service with the local portmapper
857 * @serv: svc_serv struct for the service to register 849 * @serv: svc_serv struct for the service to register
850 * @family: protocol family of service's listener socket
858 * @proto: transport protocol number to advertise 851 * @proto: transport protocol number to advertise
859 * @port: port to advertise 852 * @port: port to advertise
860 * 853 *
861 * Service is registered for any address in serv's address family 854 * Service is registered for any address in the passed-in protocol family
862 */ 855 */
863int svc_register(const struct svc_serv *serv, const unsigned short proto, 856int svc_register(const struct svc_serv *serv, const int family,
864 const unsigned short port) 857 const unsigned short proto, const unsigned short port)
865{ 858{
866 struct svc_program *progp; 859 struct svc_program *progp;
867 unsigned int i; 860 unsigned int i;
@@ -879,15 +872,15 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto,
879 i, 872 i,
880 proto == IPPROTO_UDP? "udp" : "tcp", 873 proto == IPPROTO_UDP? "udp" : "tcp",
881 port, 874 port,
882 serv->sv_family, 875 family,
883 progp->pg_vers[i]->vs_hidden? 876 progp->pg_vers[i]->vs_hidden?
884 " (but not telling portmap)" : ""); 877 " (but not telling portmap)" : "");
885 878
886 if (progp->pg_vers[i]->vs_hidden) 879 if (progp->pg_vers[i]->vs_hidden)
887 continue; 880 continue;
888 881
889 error = __svc_register(progp->pg_prog, i, 882 error = __svc_register(progp->pg_name, progp->pg_prog,
890 serv->sv_family, proto, port); 883 i, family, proto, port);
891 if (error < 0) 884 if (error < 0)
892 break; 885 break;
893 } 886 }
@@ -896,38 +889,31 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto,
896 return error; 889 return error;
897} 890}
898 891
899#ifdef CONFIG_SUNRPC_REGISTER_V4 892/*
900 893 * If user space is running rpcbind, it should take the v4 UNSET
894 * and clear everything for this [program, version]. If user space
895 * is running portmap, it will reject the v4 UNSET, but won't have
896 * any "inet6" entries anyway. So a PMAP_UNSET should be sufficient
897 * in this case to clear all existing entries for [program, version].
898 */
901static void __svc_unregister(const u32 program, const u32 version, 899static void __svc_unregister(const u32 program, const u32 version,
902 const char *progname) 900 const char *progname)
903{ 901{
904 struct sockaddr_in6 sin6 = {
905 .sin6_family = AF_INET6,
906 .sin6_addr = IN6ADDR_ANY_INIT,
907 .sin6_port = 0,
908 };
909 int error; 902 int error;
910 903
911 error = rpcb_v4_register(program, version, 904 error = rpcb_v4_register(program, version, NULL, "");
912 (struct sockaddr *)&sin6, "");
913 dprintk("svc: %s(%sv%u), error %d\n",
914 __func__, progname, version, error);
915}
916
917#else /* CONFIG_SUNRPC_REGISTER_V4 */
918 905
919static void __svc_unregister(const u32 program, const u32 version, 906 /*
920 const char *progname) 907 * User space didn't support rpcbind v4, so retry this
921{ 908 * request with the legacy rpcbind v2 protocol.
922 int error; 909 */
910 if (error == -EPROTONOSUPPORT)
911 error = rpcb_register(program, version, 0, 0);
923 912
924 error = rpcb_register(program, version, 0, 0);
925 dprintk("svc: %s(%sv%u), error %d\n", 913 dprintk("svc: %s(%sv%u), error %d\n",
926 __func__, progname, version, error); 914 __func__, progname, version, error);
927} 915}
928 916
929#endif /* CONFIG_SUNRPC_REGISTER_V4 */
930
931/* 917/*
932 * All netids, bind addresses and ports registered for [program, version] 918 * All netids, bind addresses and ports registered for [program, version]
933 * are removed from the local rpcbind database (if the service is not 919 * are removed from the local rpcbind database (if the service is not
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index e588df5d6b34..2819ee093f36 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -161,7 +161,9 @@ EXPORT_SYMBOL_GPL(svc_xprt_init);
161 161
162static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, 162static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
163 struct svc_serv *serv, 163 struct svc_serv *serv,
164 unsigned short port, int flags) 164 const int family,
165 const unsigned short port,
166 int flags)
165{ 167{
166 struct sockaddr_in sin = { 168 struct sockaddr_in sin = {
167 .sin_family = AF_INET, 169 .sin_family = AF_INET,
@@ -176,12 +178,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
176 struct sockaddr *sap; 178 struct sockaddr *sap;
177 size_t len; 179 size_t len;
178 180
179 switch (serv->sv_family) { 181 switch (family) {
180 case AF_INET: 182 case PF_INET:
181 sap = (struct sockaddr *)&sin; 183 sap = (struct sockaddr *)&sin;
182 len = sizeof(sin); 184 len = sizeof(sin);
183 break; 185 break;
184 case AF_INET6: 186 case PF_INET6:
185 sap = (struct sockaddr *)&sin6; 187 sap = (struct sockaddr *)&sin6;
186 len = sizeof(sin6); 188 len = sizeof(sin6);
187 break; 189 break;
@@ -192,7 +194,8 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
192 return xcl->xcl_ops->xpo_create(serv, sap, len, flags); 194 return xcl->xcl_ops->xpo_create(serv, sap, len, flags);
193} 195}
194 196
195int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, 197int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
198 const int family, const unsigned short port,
196 int flags) 199 int flags)
197{ 200{
198 struct svc_xprt_class *xcl; 201 struct svc_xprt_class *xcl;
@@ -209,7 +212,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
209 goto err; 212 goto err;
210 213
211 spin_unlock(&svc_xprt_class_lock); 214 spin_unlock(&svc_xprt_class_lock);
212 newxprt = __svc_xpo_create(xcl, serv, port, flags); 215 newxprt = __svc_xpo_create(xcl, serv, family, port, flags);
213 if (IS_ERR(newxprt)) { 216 if (IS_ERR(newxprt)) {
214 module_put(xcl->xcl_owner); 217 module_put(xcl->xcl_owner);
215 return PTR_ERR(newxprt); 218 return PTR_ERR(newxprt);
@@ -1033,7 +1036,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
1033 return dr; 1036 return dr;
1034} 1037}
1035 1038
1036/* 1039/**
1040 * svc_find_xprt - find an RPC transport instance
1041 * @serv: pointer to svc_serv to search
1042 * @xcl_name: C string containing transport's class name
1043 * @af: Address family of transport's local address
1044 * @port: transport's IP port number
1045 *
1037 * Return the transport instance pointer for the endpoint accepting 1046 * Return the transport instance pointer for the endpoint accepting
1038 * connections/peer traffic from the specified transport class, 1047 * connections/peer traffic from the specified transport class,
1039 * address family and port. 1048 * address family and port.
@@ -1042,14 +1051,14 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
1042 * wild-card, and will result in matching the first transport in the 1051 * wild-card, and will result in matching the first transport in the
1043 * service's list that has a matching class name. 1052 * service's list that has a matching class name.
1044 */ 1053 */
1045struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name, 1054struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
1046 int af, int port) 1055 const sa_family_t af, const unsigned short port)
1047{ 1056{
1048 struct svc_xprt *xprt; 1057 struct svc_xprt *xprt;
1049 struct svc_xprt *found = NULL; 1058 struct svc_xprt *found = NULL;
1050 1059
1051 /* Sanity check the args */ 1060 /* Sanity check the args */
1052 if (!serv || !xcl_name) 1061 if (serv == NULL || xcl_name == NULL)
1053 return found; 1062 return found;
1054 1063
1055 spin_lock_bh(&serv->sv_lock); 1064 spin_lock_bh(&serv->sv_lock);
@@ -1058,7 +1067,7 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name,
1058 continue; 1067 continue;
1059 if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family) 1068 if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family)
1060 continue; 1069 continue;
1061 if (port && port != svc_xprt_local_port(xprt)) 1070 if (port != 0 && port != svc_xprt_local_port(xprt))
1062 continue; 1071 continue;
1063 found = xprt; 1072 found = xprt;
1064 svc_xprt_get(xprt); 1073 svc_xprt_get(xprt);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 5763e6460fea..9d504234af4a 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1110,7 +1110,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1110 struct svc_sock *svsk; 1110 struct svc_sock *svsk;
1111 struct sock *inet; 1111 struct sock *inet;
1112 int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); 1112 int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
1113 int val;
1114 1113
1115 dprintk("svc: svc_setup_socket %p\n", sock); 1114 dprintk("svc: svc_setup_socket %p\n", sock);
1116 if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { 1115 if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
@@ -1122,7 +1121,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1122 1121
1123 /* Register socket with portmapper */ 1122 /* Register socket with portmapper */
1124 if (*errp >= 0 && pmap_register) 1123 if (*errp >= 0 && pmap_register)
1125 *errp = svc_register(serv, inet->sk_protocol, 1124 *errp = svc_register(serv, inet->sk_family, inet->sk_protocol,
1126 ntohs(inet_sk(inet)->sport)); 1125 ntohs(inet_sk(inet)->sport));
1127 1126
1128 if (*errp < 0) { 1127 if (*errp < 0) {
@@ -1143,18 +1142,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1143 else 1142 else
1144 svc_tcp_init(svsk, serv); 1143 svc_tcp_init(svsk, serv);
1145 1144
1146 /*
1147 * We start one listener per sv_serv. We want AF_INET
1148 * requests to be automatically shunted to our AF_INET6
1149 * listener using a mapped IPv4 address. Make sure
1150 * no-one starts an equivalent IPv4 listener, which
1151 * would steal our incoming connections.
1152 */
1153 val = 0;
1154 if (serv->sv_family == AF_INET6)
1155 kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
1156 (char *)&val, sizeof(val));
1157
1158 dprintk("svc: svc_setup_socket created %p (inet %p)\n", 1145 dprintk("svc: svc_setup_socket created %p (inet %p)\n",
1159 svsk, svsk->sk_sk); 1146 svsk, svsk->sk_sk);
1160 1147
@@ -1222,6 +1209,8 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1222 struct sockaddr_storage addr; 1209 struct sockaddr_storage addr;
1223 struct sockaddr *newsin = (struct sockaddr *)&addr; 1210 struct sockaddr *newsin = (struct sockaddr *)&addr;
1224 int newlen; 1211 int newlen;
1212 int family;
1213 int val;
1225 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); 1214 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
1226 1215
1227 dprintk("svc: svc_create_socket(%s, %d, %s)\n", 1216 dprintk("svc: svc_create_socket(%s, %d, %s)\n",
@@ -1233,14 +1222,35 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1233 "sockets supported\n"); 1222 "sockets supported\n");
1234 return ERR_PTR(-EINVAL); 1223 return ERR_PTR(-EINVAL);
1235 } 1224 }
1225
1236 type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; 1226 type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
1227 switch (sin->sa_family) {
1228 case AF_INET6:
1229 family = PF_INET6;
1230 break;
1231 case AF_INET:
1232 family = PF_INET;
1233 break;
1234 default:
1235 return ERR_PTR(-EINVAL);
1236 }
1237 1237
1238 error = sock_create_kern(sin->sa_family, type, protocol, &sock); 1238 error = sock_create_kern(family, type, protocol, &sock);
1239 if (error < 0) 1239 if (error < 0)
1240 return ERR_PTR(error); 1240 return ERR_PTR(error);
1241 1241
1242 svc_reclassify_socket(sock); 1242 svc_reclassify_socket(sock);
1243 1243
1244 /*
1245 * If this is an PF_INET6 listener, we want to avoid
1246 * getting requests from IPv4 remotes. Those should
1247 * be shunted to a PF_INET listener via rpcbind.
1248 */
1249 val = 1;
1250 if (family == PF_INET6)
1251 kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
1252 (char *)&val, sizeof(val));
1253
1244 if (type == SOCK_STREAM) 1254 if (type == SOCK_STREAM)
1245 sock->sk->sk_reuse = 1; /* allow address reuse */ 1255 sock->sk->sk_reuse = 1; /* allow address reuse */
1246 error = kernel_bind(sock, sin, len); 1256 error = kernel_bind(sock, sin, len);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 62098d101a1f..a0bfe53f1621 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -152,6 +152,37 @@ out:
152EXPORT_SYMBOL_GPL(xprt_unregister_transport); 152EXPORT_SYMBOL_GPL(xprt_unregister_transport);
153 153
154/** 154/**
155 * xprt_load_transport - load a transport implementation
156 * @transport_name: transport to load
157 *
158 * Returns:
159 * 0: transport successfully loaded
160 * -ENOENT: transport module not available
161 */
162int xprt_load_transport(const char *transport_name)
163{
164 struct xprt_class *t;
165 char module_name[sizeof t->name + 5];
166 int result;
167
168 result = 0;
169 spin_lock(&xprt_list_lock);
170 list_for_each_entry(t, &xprt_list, list) {
171 if (strcmp(t->name, transport_name) == 0) {
172 spin_unlock(&xprt_list_lock);
173 goto out;
174 }
175 }
176 spin_unlock(&xprt_list_lock);
177 strcpy(module_name, "xprt");
178 strncat(module_name, transport_name, sizeof t->name);
179 result = request_module(module_name);
180out:
181 return result;
182}
183EXPORT_SYMBOL_GPL(xprt_load_transport);
184
185/**
155 * xprt_reserve_xprt - serialize write access to transports 186 * xprt_reserve_xprt - serialize write access to transports
156 * @task: task that is requesting access to the transport 187 * @task: task that is requesting access to the transport
157 * 188 *
@@ -580,7 +611,7 @@ void xprt_disconnect_done(struct rpc_xprt *xprt)
580 dprintk("RPC: disconnected transport %p\n", xprt); 611 dprintk("RPC: disconnected transport %p\n", xprt);
581 spin_lock_bh(&xprt->transport_lock); 612 spin_lock_bh(&xprt->transport_lock);
582 xprt_clear_connected(xprt); 613 xprt_clear_connected(xprt);
583 xprt_wake_pending_tasks(xprt, -ENOTCONN); 614 xprt_wake_pending_tasks(xprt, -EAGAIN);
584 spin_unlock_bh(&xprt->transport_lock); 615 spin_unlock_bh(&xprt->transport_lock);
585} 616}
586EXPORT_SYMBOL_GPL(xprt_disconnect_done); 617EXPORT_SYMBOL_GPL(xprt_disconnect_done);
@@ -598,7 +629,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
598 /* Try to schedule an autoclose RPC call */ 629 /* Try to schedule an autoclose RPC call */
599 if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) 630 if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
600 queue_work(rpciod_workqueue, &xprt->task_cleanup); 631 queue_work(rpciod_workqueue, &xprt->task_cleanup);
601 xprt_wake_pending_tasks(xprt, -ENOTCONN); 632 xprt_wake_pending_tasks(xprt, -EAGAIN);
602 spin_unlock_bh(&xprt->transport_lock); 633 spin_unlock_bh(&xprt->transport_lock);
603} 634}
604 635
@@ -625,7 +656,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie)
625 /* Try to schedule an autoclose RPC call */ 656 /* Try to schedule an autoclose RPC call */
626 if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) 657 if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
627 queue_work(rpciod_workqueue, &xprt->task_cleanup); 658 queue_work(rpciod_workqueue, &xprt->task_cleanup);
628 xprt_wake_pending_tasks(xprt, -ENOTCONN); 659 xprt_wake_pending_tasks(xprt, -EAGAIN);
629out: 660out:
630 spin_unlock_bh(&xprt->transport_lock); 661 spin_unlock_bh(&xprt->transport_lock);
631} 662}
@@ -695,9 +726,8 @@ static void xprt_connect_status(struct rpc_task *task)
695 } 726 }
696 727
697 switch (task->tk_status) { 728 switch (task->tk_status) {
698 case -ENOTCONN: 729 case -EAGAIN:
699 dprintk("RPC: %5u xprt_connect_status: connection broken\n", 730 dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid);
700 task->tk_pid);
701 break; 731 break;
702 case -ETIMEDOUT: 732 case -ETIMEDOUT:
703 dprintk("RPC: %5u xprt_connect_status: connect attempt timed " 733 dprintk("RPC: %5u xprt_connect_status: connect attempt timed "
@@ -818,15 +848,8 @@ int xprt_prepare_transmit(struct rpc_task *task)
818 err = req->rq_received; 848 err = req->rq_received;
819 goto out_unlock; 849 goto out_unlock;
820 } 850 }
821 if (!xprt->ops->reserve_xprt(task)) { 851 if (!xprt->ops->reserve_xprt(task))
822 err = -EAGAIN; 852 err = -EAGAIN;
823 goto out_unlock;
824 }
825
826 if (!xprt_connected(xprt)) {
827 err = -ENOTCONN;
828 goto out_unlock;
829 }
830out_unlock: 853out_unlock:
831 spin_unlock_bh(&xprt->transport_lock); 854 spin_unlock_bh(&xprt->transport_lock);
832 return err; 855 return err;
@@ -870,32 +893,26 @@ void xprt_transmit(struct rpc_task *task)
870 req->rq_connect_cookie = xprt->connect_cookie; 893 req->rq_connect_cookie = xprt->connect_cookie;
871 req->rq_xtime = jiffies; 894 req->rq_xtime = jiffies;
872 status = xprt->ops->send_request(task); 895 status = xprt->ops->send_request(task);
873 if (status == 0) { 896 if (status != 0) {
874 dprintk("RPC: %5u xmit complete\n", task->tk_pid); 897 task->tk_status = status;
875 spin_lock_bh(&xprt->transport_lock); 898 return;
899 }
876 900
877 xprt->ops->set_retrans_timeout(task); 901 dprintk("RPC: %5u xmit complete\n", task->tk_pid);
902 spin_lock_bh(&xprt->transport_lock);
878 903
879 xprt->stat.sends++; 904 xprt->ops->set_retrans_timeout(task);
880 xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
881 xprt->stat.bklog_u += xprt->backlog.qlen;
882 905
883 /* Don't race with disconnect */ 906 xprt->stat.sends++;
884 if (!xprt_connected(xprt)) 907 xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
885 task->tk_status = -ENOTCONN; 908 xprt->stat.bklog_u += xprt->backlog.qlen;
886 else if (!req->rq_received)
887 rpc_sleep_on(&xprt->pending, task, xprt_timer);
888 spin_unlock_bh(&xprt->transport_lock);
889 return;
890 }
891 909
892 /* Note: at this point, task->tk_sleeping has not yet been set, 910 /* Don't race with disconnect */
893 * hence there is no danger of the waking up task being put on 911 if (!xprt_connected(xprt))
894 * schedq, and being picked up by a parallel run of rpciod(). 912 task->tk_status = -ENOTCONN;
895 */ 913 else if (!req->rq_received)
896 task->tk_status = status; 914 rpc_sleep_on(&xprt->pending, task, xprt_timer);
897 if (status == -ECONNREFUSED) 915 spin_unlock_bh(&xprt->transport_lock);
898 rpc_sleep_on(&xprt->sending, task, NULL);
899} 916}
900 917
901static inline void do_xprt_reserve(struct rpc_task *task) 918static inline void do_xprt_reserve(struct rpc_task *task)
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 14106d26bb95..e5e28d1946a4 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -310,6 +310,19 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
310 __func__, pad, destp, rqst->rq_slen, curlen); 310 __func__, pad, destp, rqst->rq_slen, curlen);
311 311
312 copy_len = rqst->rq_snd_buf.page_len; 312 copy_len = rqst->rq_snd_buf.page_len;
313
314 if (rqst->rq_snd_buf.tail[0].iov_len) {
315 curlen = rqst->rq_snd_buf.tail[0].iov_len;
316 if (destp + copy_len != rqst->rq_snd_buf.tail[0].iov_base) {
317 memmove(destp + copy_len,
318 rqst->rq_snd_buf.tail[0].iov_base, curlen);
319 r_xprt->rx_stats.pullup_copy_count += curlen;
320 }
321 dprintk("RPC: %s: tail destp 0x%p len %d\n",
322 __func__, destp + copy_len, curlen);
323 rqst->rq_svec[0].iov_len += curlen;
324 }
325
313 r_xprt->rx_stats.pullup_copy_count += copy_len; 326 r_xprt->rx_stats.pullup_copy_count += copy_len;
314 npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT; 327 npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT;
315 for (i = 0; copy_len && i < npages; i++) { 328 for (i = 0; copy_len && i < npages; i++) {
@@ -332,17 +345,6 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
332 destp += curlen; 345 destp += curlen;
333 copy_len -= curlen; 346 copy_len -= curlen;
334 } 347 }
335 if (rqst->rq_snd_buf.tail[0].iov_len) {
336 curlen = rqst->rq_snd_buf.tail[0].iov_len;
337 if (destp != rqst->rq_snd_buf.tail[0].iov_base) {
338 memcpy(destp,
339 rqst->rq_snd_buf.tail[0].iov_base, curlen);
340 r_xprt->rx_stats.pullup_copy_count += curlen;
341 }
342 dprintk("RPC: %s: tail destp 0x%p len %d curlen %d\n",
343 __func__, destp, copy_len, curlen);
344 rqst->rq_svec[0].iov_len += curlen;
345 }
346 /* header now contains entire send message */ 348 /* header now contains entire send message */
347 return pad; 349 return pad;
348} 350}
@@ -656,7 +658,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
656 if (curlen > rqst->rq_rcv_buf.tail[0].iov_len) 658 if (curlen > rqst->rq_rcv_buf.tail[0].iov_len)
657 curlen = rqst->rq_rcv_buf.tail[0].iov_len; 659 curlen = rqst->rq_rcv_buf.tail[0].iov_len;
658 if (rqst->rq_rcv_buf.tail[0].iov_base != srcp) 660 if (rqst->rq_rcv_buf.tail[0].iov_base != srcp)
659 memcpy(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen); 661 memmove(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen);
660 dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n", 662 dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n",
661 __func__, srcp, copy_len, curlen); 663 __func__, srcp, copy_len, curlen);
662 rqst->rq_rcv_buf.tail[0].iov_len = curlen; 664 rqst->rq_rcv_buf.tail[0].iov_len = curlen;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index a3334e3b73cc..6c26a675435a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -191,7 +191,6 @@ static int map_xdr(struct svcxprt_rdma *xprt,
191 struct xdr_buf *xdr, 191 struct xdr_buf *xdr,
192 struct svc_rdma_req_map *vec) 192 struct svc_rdma_req_map *vec)
193{ 193{
194 int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
195 int sge_no; 194 int sge_no;
196 u32 sge_bytes; 195 u32 sge_bytes;
197 u32 page_bytes; 196 u32 page_bytes;
@@ -235,7 +234,11 @@ static int map_xdr(struct svcxprt_rdma *xprt,
235 sge_no++; 234 sge_no++;
236 } 235 }
237 236
238 BUG_ON(sge_no > sge_max); 237 dprintk("svcrdma: map_xdr: sge_no %d page_no %d "
238 "page_base %u page_len %u head_len %zu tail_len %zu\n",
239 sge_no, page_no, xdr->page_base, xdr->page_len,
240 xdr->head[0].iov_len, xdr->tail[0].iov_len);
241
239 vec->count = sge_no; 242 vec->count = sge_no;
240 return 0; 243 return 0;
241} 244}
@@ -579,7 +582,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
579 ctxt->sge[page_no+1].length = 0; 582 ctxt->sge[page_no+1].length = 0;
580 } 583 }
581 BUG_ON(sge_no > rdma->sc_max_sge); 584 BUG_ON(sge_no > rdma->sc_max_sge);
582 BUG_ON(sge_no > ctxt->count);
583 memset(&send_wr, 0, sizeof send_wr); 585 memset(&send_wr, 0, sizeof send_wr);
584 ctxt->wr_op = IB_WR_SEND; 586 ctxt->wr_op = IB_WR_SEND;
585 send_wr.wr_id = (unsigned long)ctxt; 587 send_wr.wr_id = (unsigned long)ctxt;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 568330eebbfe..d40ff50887aa 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -49,6 +49,9 @@ unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;
49unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; 49unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
50unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; 50unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
51 51
52#define XS_TCP_LINGER_TO (15U * HZ)
53static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
54
52/* 55/*
53 * We can register our own files under /proc/sys/sunrpc by 56 * We can register our own files under /proc/sys/sunrpc by
54 * calling register_sysctl_table() again. The files in that 57 * calling register_sysctl_table() again. The files in that
@@ -117,6 +120,14 @@ static ctl_table xs_tunables_table[] = {
117 .extra2 = &xprt_max_resvport_limit 120 .extra2 = &xprt_max_resvport_limit
118 }, 121 },
119 { 122 {
123 .procname = "tcp_fin_timeout",
124 .data = &xs_tcp_fin_timeout,
125 .maxlen = sizeof(xs_tcp_fin_timeout),
126 .mode = 0644,
127 .proc_handler = &proc_dointvec_jiffies,
128 .strategy = sysctl_jiffies
129 },
130 {
120 .ctl_name = 0, 131 .ctl_name = 0,
121 }, 132 },
122}; 133};
@@ -521,11 +532,12 @@ static void xs_nospace_callback(struct rpc_task *task)
521 * @task: task to put to sleep 532 * @task: task to put to sleep
522 * 533 *
523 */ 534 */
524static void xs_nospace(struct rpc_task *task) 535static int xs_nospace(struct rpc_task *task)
525{ 536{
526 struct rpc_rqst *req = task->tk_rqstp; 537 struct rpc_rqst *req = task->tk_rqstp;
527 struct rpc_xprt *xprt = req->rq_xprt; 538 struct rpc_xprt *xprt = req->rq_xprt;
528 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 539 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
540 int ret = 0;
529 541
530 dprintk("RPC: %5u xmit incomplete (%u left of %u)\n", 542 dprintk("RPC: %5u xmit incomplete (%u left of %u)\n",
531 task->tk_pid, req->rq_slen - req->rq_bytes_sent, 543 task->tk_pid, req->rq_slen - req->rq_bytes_sent,
@@ -537,6 +549,7 @@ static void xs_nospace(struct rpc_task *task)
537 /* Don't race with disconnect */ 549 /* Don't race with disconnect */
538 if (xprt_connected(xprt)) { 550 if (xprt_connected(xprt)) {
539 if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { 551 if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
552 ret = -EAGAIN;
540 /* 553 /*
541 * Notify TCP that we're limited by the application 554 * Notify TCP that we're limited by the application
542 * window size 555 * window size
@@ -548,10 +561,11 @@ static void xs_nospace(struct rpc_task *task)
548 } 561 }
549 } else { 562 } else {
550 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); 563 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
551 task->tk_status = -ENOTCONN; 564 ret = -ENOTCONN;
552 } 565 }
553 566
554 spin_unlock_bh(&xprt->transport_lock); 567 spin_unlock_bh(&xprt->transport_lock);
568 return ret;
555} 569}
556 570
557/** 571/**
@@ -594,6 +608,8 @@ static int xs_udp_send_request(struct rpc_task *task)
594 /* Still some bytes left; set up for a retry later. */ 608 /* Still some bytes left; set up for a retry later. */
595 status = -EAGAIN; 609 status = -EAGAIN;
596 } 610 }
611 if (!transport->sock)
612 goto out;
597 613
598 switch (status) { 614 switch (status) {
599 case -ENOTSOCK: 615 case -ENOTSOCK:
@@ -601,21 +617,19 @@ static int xs_udp_send_request(struct rpc_task *task)
601 /* Should we call xs_close() here? */ 617 /* Should we call xs_close() here? */
602 break; 618 break;
603 case -EAGAIN: 619 case -EAGAIN:
604 xs_nospace(task); 620 status = xs_nospace(task);
605 break; 621 break;
622 default:
623 dprintk("RPC: sendmsg returned unrecognized error %d\n",
624 -status);
606 case -ENETUNREACH: 625 case -ENETUNREACH:
607 case -EPIPE: 626 case -EPIPE:
608 case -ECONNREFUSED: 627 case -ECONNREFUSED:
609 /* When the server has died, an ICMP port unreachable message 628 /* When the server has died, an ICMP port unreachable message
610 * prompts ECONNREFUSED. */ 629 * prompts ECONNREFUSED. */
611 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); 630 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
612 break;
613 default:
614 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
615 dprintk("RPC: sendmsg returned unrecognized error %d\n",
616 -status);
617 } 631 }
618 632out:
619 return status; 633 return status;
620} 634}
621 635
@@ -697,6 +711,8 @@ static int xs_tcp_send_request(struct rpc_task *task)
697 status = -EAGAIN; 711 status = -EAGAIN;
698 break; 712 break;
699 } 713 }
714 if (!transport->sock)
715 goto out;
700 716
701 switch (status) { 717 switch (status) {
702 case -ENOTSOCK: 718 case -ENOTSOCK:
@@ -704,23 +720,19 @@ static int xs_tcp_send_request(struct rpc_task *task)
704 /* Should we call xs_close() here? */ 720 /* Should we call xs_close() here? */
705 break; 721 break;
706 case -EAGAIN: 722 case -EAGAIN:
707 xs_nospace(task); 723 status = xs_nospace(task);
708 break; 724 break;
725 default:
726 dprintk("RPC: sendmsg returned unrecognized error %d\n",
727 -status);
709 case -ECONNRESET: 728 case -ECONNRESET:
729 case -EPIPE:
710 xs_tcp_shutdown(xprt); 730 xs_tcp_shutdown(xprt);
711 case -ECONNREFUSED: 731 case -ECONNREFUSED:
712 case -ENOTCONN: 732 case -ENOTCONN:
713 case -EPIPE:
714 status = -ENOTCONN;
715 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
716 break;
717 default:
718 dprintk("RPC: sendmsg returned unrecognized error %d\n",
719 -status);
720 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); 733 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
721 xs_tcp_shutdown(xprt);
722 } 734 }
723 735out:
724 return status; 736 return status;
725} 737}
726 738
@@ -767,23 +779,13 @@ static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *s
767 sk->sk_error_report = transport->old_error_report; 779 sk->sk_error_report = transport->old_error_report;
768} 780}
769 781
770/** 782static void xs_reset_transport(struct sock_xprt *transport)
771 * xs_close - close a socket
772 * @xprt: transport
773 *
774 * This is used when all requests are complete; ie, no DRC state remains
775 * on the server we want to save.
776 */
777static void xs_close(struct rpc_xprt *xprt)
778{ 783{
779 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
780 struct socket *sock = transport->sock; 784 struct socket *sock = transport->sock;
781 struct sock *sk = transport->inet; 785 struct sock *sk = transport->inet;
782 786
783 if (!sk) 787 if (sk == NULL)
784 goto clear_close_wait; 788 return;
785
786 dprintk("RPC: xs_close xprt %p\n", xprt);
787 789
788 write_lock_bh(&sk->sk_callback_lock); 790 write_lock_bh(&sk->sk_callback_lock);
789 transport->inet = NULL; 791 transport->inet = NULL;
@@ -797,8 +799,25 @@ static void xs_close(struct rpc_xprt *xprt)
797 sk->sk_no_check = 0; 799 sk->sk_no_check = 0;
798 800
799 sock_release(sock); 801 sock_release(sock);
800clear_close_wait: 802}
803
804/**
805 * xs_close - close a socket
806 * @xprt: transport
807 *
808 * This is used when all requests are complete; ie, no DRC state remains
809 * on the server we want to save.
810 */
811static void xs_close(struct rpc_xprt *xprt)
812{
813 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
814
815 dprintk("RPC: xs_close xprt %p\n", xprt);
816
817 xs_reset_transport(transport);
818
801 smp_mb__before_clear_bit(); 819 smp_mb__before_clear_bit();
820 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
802 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 821 clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
803 clear_bit(XPRT_CLOSING, &xprt->state); 822 clear_bit(XPRT_CLOSING, &xprt->state);
804 smp_mb__after_clear_bit(); 823 smp_mb__after_clear_bit();
@@ -1126,6 +1145,47 @@ out:
1126 read_unlock(&sk->sk_callback_lock); 1145 read_unlock(&sk->sk_callback_lock);
1127} 1146}
1128 1147
1148/*
1149 * Do the equivalent of linger/linger2 handling for dealing with
1150 * broken servers that don't close the socket in a timely
1151 * fashion
1152 */
1153static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt,
1154 unsigned long timeout)
1155{
1156 struct sock_xprt *transport;
1157
1158 if (xprt_test_and_set_connecting(xprt))
1159 return;
1160 set_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1161 transport = container_of(xprt, struct sock_xprt, xprt);
1162 queue_delayed_work(rpciod_workqueue, &transport->connect_worker,
1163 timeout);
1164}
1165
1166static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
1167{
1168 struct sock_xprt *transport;
1169
1170 transport = container_of(xprt, struct sock_xprt, xprt);
1171
1172 if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) ||
1173 !cancel_delayed_work(&transport->connect_worker))
1174 return;
1175 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1176 xprt_clear_connecting(xprt);
1177}
1178
1179static void xs_sock_mark_closed(struct rpc_xprt *xprt)
1180{
1181 smp_mb__before_clear_bit();
1182 clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
1183 clear_bit(XPRT_CLOSING, &xprt->state);
1184 smp_mb__after_clear_bit();
1185 /* Mark transport as closed and wake up all pending tasks */
1186 xprt_disconnect_done(xprt);
1187}
1188
1129/** 1189/**
1130 * xs_tcp_state_change - callback to handle TCP socket state changes 1190 * xs_tcp_state_change - callback to handle TCP socket state changes
1131 * @sk: socket whose state has changed 1191 * @sk: socket whose state has changed
@@ -1158,7 +1218,7 @@ static void xs_tcp_state_change(struct sock *sk)
1158 transport->tcp_flags = 1218 transport->tcp_flags =
1159 TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; 1219 TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
1160 1220
1161 xprt_wake_pending_tasks(xprt, 0); 1221 xprt_wake_pending_tasks(xprt, -EAGAIN);
1162 } 1222 }
1163 spin_unlock_bh(&xprt->transport_lock); 1223 spin_unlock_bh(&xprt->transport_lock);
1164 break; 1224 break;
@@ -1171,10 +1231,10 @@ static void xs_tcp_state_change(struct sock *sk)
1171 clear_bit(XPRT_CONNECTED, &xprt->state); 1231 clear_bit(XPRT_CONNECTED, &xprt->state);
1172 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 1232 clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
1173 smp_mb__after_clear_bit(); 1233 smp_mb__after_clear_bit();
1234 xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
1174 break; 1235 break;
1175 case TCP_CLOSE_WAIT: 1236 case TCP_CLOSE_WAIT:
1176 /* The server initiated a shutdown of the socket */ 1237 /* The server initiated a shutdown of the socket */
1177 set_bit(XPRT_CLOSING, &xprt->state);
1178 xprt_force_disconnect(xprt); 1238 xprt_force_disconnect(xprt);
1179 case TCP_SYN_SENT: 1239 case TCP_SYN_SENT:
1180 xprt->connect_cookie++; 1240 xprt->connect_cookie++;
@@ -1187,40 +1247,35 @@ static void xs_tcp_state_change(struct sock *sk)
1187 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; 1247 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
1188 break; 1248 break;
1189 case TCP_LAST_ACK: 1249 case TCP_LAST_ACK:
1250 set_bit(XPRT_CLOSING, &xprt->state);
1251 xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
1190 smp_mb__before_clear_bit(); 1252 smp_mb__before_clear_bit();
1191 clear_bit(XPRT_CONNECTED, &xprt->state); 1253 clear_bit(XPRT_CONNECTED, &xprt->state);
1192 smp_mb__after_clear_bit(); 1254 smp_mb__after_clear_bit();
1193 break; 1255 break;
1194 case TCP_CLOSE: 1256 case TCP_CLOSE:
1195 smp_mb__before_clear_bit(); 1257 xs_tcp_cancel_linger_timeout(xprt);
1196 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 1258 xs_sock_mark_closed(xprt);
1197 clear_bit(XPRT_CLOSING, &xprt->state);
1198 smp_mb__after_clear_bit();
1199 /* Mark transport as closed and wake up all pending tasks */
1200 xprt_disconnect_done(xprt);
1201 } 1259 }
1202 out: 1260 out:
1203 read_unlock(&sk->sk_callback_lock); 1261 read_unlock(&sk->sk_callback_lock);
1204} 1262}
1205 1263
1206/** 1264/**
1207 * xs_tcp_error_report - callback mainly for catching RST events 1265 * xs_error_report - callback mainly for catching socket errors
1208 * @sk: socket 1266 * @sk: socket
1209 */ 1267 */
1210static void xs_tcp_error_report(struct sock *sk) 1268static void xs_error_report(struct sock *sk)
1211{ 1269{
1212 struct rpc_xprt *xprt; 1270 struct rpc_xprt *xprt;
1213 1271
1214 read_lock(&sk->sk_callback_lock); 1272 read_lock(&sk->sk_callback_lock);
1215 if (sk->sk_err != ECONNRESET || sk->sk_state != TCP_ESTABLISHED)
1216 goto out;
1217 if (!(xprt = xprt_from_sock(sk))) 1273 if (!(xprt = xprt_from_sock(sk)))
1218 goto out; 1274 goto out;
1219 dprintk("RPC: %s client %p...\n" 1275 dprintk("RPC: %s client %p...\n"
1220 "RPC: error %d\n", 1276 "RPC: error %d\n",
1221 __func__, xprt, sk->sk_err); 1277 __func__, xprt, sk->sk_err);
1222 1278 xprt_wake_pending_tasks(xprt, -EAGAIN);
1223 xprt_force_disconnect(xprt);
1224out: 1279out:
1225 read_unlock(&sk->sk_callback_lock); 1280 read_unlock(&sk->sk_callback_lock);
1226} 1281}
@@ -1494,6 +1549,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1494 sk->sk_user_data = xprt; 1549 sk->sk_user_data = xprt;
1495 sk->sk_data_ready = xs_udp_data_ready; 1550 sk->sk_data_ready = xs_udp_data_ready;
1496 sk->sk_write_space = xs_udp_write_space; 1551 sk->sk_write_space = xs_udp_write_space;
1552 sk->sk_error_report = xs_error_report;
1497 sk->sk_no_check = UDP_CSUM_NORCV; 1553 sk->sk_no_check = UDP_CSUM_NORCV;
1498 sk->sk_allocation = GFP_ATOMIC; 1554 sk->sk_allocation = GFP_ATOMIC;
1499 1555
@@ -1526,9 +1582,10 @@ static void xs_udp_connect_worker4(struct work_struct *work)
1526 goto out; 1582 goto out;
1527 1583
1528 /* Start by resetting any existing state */ 1584 /* Start by resetting any existing state */
1529 xs_close(xprt); 1585 xs_reset_transport(transport);
1530 1586
1531 if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { 1587 err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1588 if (err < 0) {
1532 dprintk("RPC: can't create UDP transport socket (%d).\n", -err); 1589 dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
1533 goto out; 1590 goto out;
1534 } 1591 }
@@ -1545,8 +1602,8 @@ static void xs_udp_connect_worker4(struct work_struct *work)
1545 xs_udp_finish_connecting(xprt, sock); 1602 xs_udp_finish_connecting(xprt, sock);
1546 status = 0; 1603 status = 0;
1547out: 1604out:
1548 xprt_wake_pending_tasks(xprt, status);
1549 xprt_clear_connecting(xprt); 1605 xprt_clear_connecting(xprt);
1606 xprt_wake_pending_tasks(xprt, status);
1550} 1607}
1551 1608
1552/** 1609/**
@@ -1567,9 +1624,10 @@ static void xs_udp_connect_worker6(struct work_struct *work)
1567 goto out; 1624 goto out;
1568 1625
1569 /* Start by resetting any existing state */ 1626 /* Start by resetting any existing state */
1570 xs_close(xprt); 1627 xs_reset_transport(transport);
1571 1628
1572 if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { 1629 err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock);
1630 if (err < 0) {
1573 dprintk("RPC: can't create UDP transport socket (%d).\n", -err); 1631 dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
1574 goto out; 1632 goto out;
1575 } 1633 }
@@ -1586,18 +1644,17 @@ static void xs_udp_connect_worker6(struct work_struct *work)
1586 xs_udp_finish_connecting(xprt, sock); 1644 xs_udp_finish_connecting(xprt, sock);
1587 status = 0; 1645 status = 0;
1588out: 1646out:
1589 xprt_wake_pending_tasks(xprt, status);
1590 xprt_clear_connecting(xprt); 1647 xprt_clear_connecting(xprt);
1648 xprt_wake_pending_tasks(xprt, status);
1591} 1649}
1592 1650
1593/* 1651/*
1594 * We need to preserve the port number so the reply cache on the server can 1652 * We need to preserve the port number so the reply cache on the server can
1595 * find our cached RPC replies when we get around to reconnecting. 1653 * find our cached RPC replies when we get around to reconnecting.
1596 */ 1654 */
1597static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) 1655static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
1598{ 1656{
1599 int result; 1657 int result;
1600 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
1601 struct sockaddr any; 1658 struct sockaddr any;
1602 1659
1603 dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); 1660 dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt);
@@ -1609,11 +1666,24 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
1609 memset(&any, 0, sizeof(any)); 1666 memset(&any, 0, sizeof(any));
1610 any.sa_family = AF_UNSPEC; 1667 any.sa_family = AF_UNSPEC;
1611 result = kernel_connect(transport->sock, &any, sizeof(any), 0); 1668 result = kernel_connect(transport->sock, &any, sizeof(any), 0);
1612 if (result) 1669 if (!result)
1670 xs_sock_mark_closed(xprt);
1671 else
1613 dprintk("RPC: AF_UNSPEC connect return code %d\n", 1672 dprintk("RPC: AF_UNSPEC connect return code %d\n",
1614 result); 1673 result);
1615} 1674}
1616 1675
1676static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
1677{
1678 unsigned int state = transport->inet->sk_state;
1679
1680 if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED)
1681 return;
1682 if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT))
1683 return;
1684 xs_abort_connection(xprt, transport);
1685}
1686
1617static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 1687static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1618{ 1688{
1619 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1689 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -1629,7 +1699,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1629 sk->sk_data_ready = xs_tcp_data_ready; 1699 sk->sk_data_ready = xs_tcp_data_ready;
1630 sk->sk_state_change = xs_tcp_state_change; 1700 sk->sk_state_change = xs_tcp_state_change;
1631 sk->sk_write_space = xs_tcp_write_space; 1701 sk->sk_write_space = xs_tcp_write_space;
1632 sk->sk_error_report = xs_tcp_error_report; 1702 sk->sk_error_report = xs_error_report;
1633 sk->sk_allocation = GFP_ATOMIC; 1703 sk->sk_allocation = GFP_ATOMIC;
1634 1704
1635 /* socket options */ 1705 /* socket options */
@@ -1657,37 +1727,42 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1657} 1727}
1658 1728
1659/** 1729/**
1660 * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint 1730 * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint
1661 * @work: RPC transport to connect 1731 * @xprt: RPC transport to connect
1732 * @transport: socket transport to connect
1733 * @create_sock: function to create a socket of the correct type
1662 * 1734 *
1663 * Invoked by a work queue tasklet. 1735 * Invoked by a work queue tasklet.
1664 */ 1736 */
1665static void xs_tcp_connect_worker4(struct work_struct *work) 1737static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
1738 struct sock_xprt *transport,
1739 struct socket *(*create_sock)(struct rpc_xprt *,
1740 struct sock_xprt *))
1666{ 1741{
1667 struct sock_xprt *transport =
1668 container_of(work, struct sock_xprt, connect_worker.work);
1669 struct rpc_xprt *xprt = &transport->xprt;
1670 struct socket *sock = transport->sock; 1742 struct socket *sock = transport->sock;
1671 int err, status = -EIO; 1743 int status = -EIO;
1672 1744
1673 if (xprt->shutdown) 1745 if (xprt->shutdown)
1674 goto out; 1746 goto out;
1675 1747
1676 if (!sock) { 1748 if (!sock) {
1677 /* start from scratch */ 1749 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1678 if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { 1750 sock = create_sock(xprt, transport);
1679 dprintk("RPC: can't create TCP transport socket (%d).\n", -err); 1751 if (IS_ERR(sock)) {
1752 status = PTR_ERR(sock);
1680 goto out; 1753 goto out;
1681 } 1754 }
1682 xs_reclassify_socket4(sock); 1755 } else {
1756 int abort_and_exit;
1683 1757
1684 if (xs_bind4(transport, sock) < 0) { 1758 abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
1685 sock_release(sock); 1759 &xprt->state);
1686 goto out;
1687 }
1688 } else
1689 /* "close" the socket, preserving the local port */ 1760 /* "close" the socket, preserving the local port */
1690 xs_tcp_reuse_connection(xprt); 1761 xs_tcp_reuse_connection(xprt, transport);
1762
1763 if (abort_and_exit)
1764 goto out_eagain;
1765 }
1691 1766
1692 dprintk("RPC: worker connecting xprt %p to address: %s\n", 1767 dprintk("RPC: worker connecting xprt %p to address: %s\n",
1693 xprt, xprt->address_strings[RPC_DISPLAY_ALL]); 1768 xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
@@ -1696,83 +1771,104 @@ static void xs_tcp_connect_worker4(struct work_struct *work)
1696 dprintk("RPC: %p connect status %d connected %d sock state %d\n", 1771 dprintk("RPC: %p connect status %d connected %d sock state %d\n",
1697 xprt, -status, xprt_connected(xprt), 1772 xprt, -status, xprt_connected(xprt),
1698 sock->sk->sk_state); 1773 sock->sk->sk_state);
1699 if (status < 0) { 1774 switch (status) {
1700 switch (status) { 1775 case -ECONNREFUSED:
1701 case -EINPROGRESS: 1776 case -ECONNRESET:
1702 case -EALREADY: 1777 case -ENETUNREACH:
1703 goto out_clear; 1778 /* retry with existing socket, after a delay */
1704 case -ECONNREFUSED: 1779 case 0:
1705 case -ECONNRESET: 1780 case -EINPROGRESS:
1706 /* retry with existing socket, after a delay */ 1781 case -EALREADY:
1707 break; 1782 xprt_clear_connecting(xprt);
1708 default: 1783 return;
1709 /* get rid of existing socket, and retry */
1710 xs_tcp_shutdown(xprt);
1711 }
1712 } 1784 }
1785 /* get rid of existing socket, and retry */
1786 xs_tcp_shutdown(xprt);
1787 printk("%s: connect returned unhandled error %d\n",
1788 __func__, status);
1789out_eagain:
1790 status = -EAGAIN;
1713out: 1791out:
1714 xprt_wake_pending_tasks(xprt, status);
1715out_clear:
1716 xprt_clear_connecting(xprt); 1792 xprt_clear_connecting(xprt);
1793 xprt_wake_pending_tasks(xprt, status);
1794}
1795
1796static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt,
1797 struct sock_xprt *transport)
1798{
1799 struct socket *sock;
1800 int err;
1801
1802 /* start from scratch */
1803 err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
1804 if (err < 0) {
1805 dprintk("RPC: can't create TCP transport socket (%d).\n",
1806 -err);
1807 goto out_err;
1808 }
1809 xs_reclassify_socket4(sock);
1810
1811 if (xs_bind4(transport, sock) < 0) {
1812 sock_release(sock);
1813 goto out_err;
1814 }
1815 return sock;
1816out_err:
1817 return ERR_PTR(-EIO);
1717} 1818}
1718 1819
1719/** 1820/**
1720 * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint 1821 * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
1721 * @work: RPC transport to connect 1822 * @work: RPC transport to connect
1722 * 1823 *
1723 * Invoked by a work queue tasklet. 1824 * Invoked by a work queue tasklet.
1724 */ 1825 */
1725static void xs_tcp_connect_worker6(struct work_struct *work) 1826static void xs_tcp_connect_worker4(struct work_struct *work)
1726{ 1827{
1727 struct sock_xprt *transport = 1828 struct sock_xprt *transport =
1728 container_of(work, struct sock_xprt, connect_worker.work); 1829 container_of(work, struct sock_xprt, connect_worker.work);
1729 struct rpc_xprt *xprt = &transport->xprt; 1830 struct rpc_xprt *xprt = &transport->xprt;
1730 struct socket *sock = transport->sock;
1731 int err, status = -EIO;
1732 1831
1733 if (xprt->shutdown) 1832 xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4);
1734 goto out; 1833}
1735 1834
1736 if (!sock) { 1835static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt,
1737 /* start from scratch */ 1836 struct sock_xprt *transport)
1738 if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { 1837{
1739 dprintk("RPC: can't create TCP transport socket (%d).\n", -err); 1838 struct socket *sock;
1740 goto out; 1839 int err;
1741 } 1840
1742 xs_reclassify_socket6(sock); 1841 /* start from scratch */
1842 err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock);
1843 if (err < 0) {
1844 dprintk("RPC: can't create TCP transport socket (%d).\n",
1845 -err);
1846 goto out_err;
1847 }
1848 xs_reclassify_socket6(sock);
1743 1849
1744 if (xs_bind6(transport, sock) < 0) { 1850 if (xs_bind6(transport, sock) < 0) {
1745 sock_release(sock); 1851 sock_release(sock);
1746 goto out; 1852 goto out_err;
1747 } 1853 }
1748 } else 1854 return sock;
1749 /* "close" the socket, preserving the local port */ 1855out_err:
1750 xs_tcp_reuse_connection(xprt); 1856 return ERR_PTR(-EIO);
1857}
1751 1858
1752 dprintk("RPC: worker connecting xprt %p to address: %s\n", 1859/**
1753 xprt, xprt->address_strings[RPC_DISPLAY_ALL]); 1860 * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
1861 * @work: RPC transport to connect
1862 *
1863 * Invoked by a work queue tasklet.
1864 */
1865static void xs_tcp_connect_worker6(struct work_struct *work)
1866{
1867 struct sock_xprt *transport =
1868 container_of(work, struct sock_xprt, connect_worker.work);
1869 struct rpc_xprt *xprt = &transport->xprt;
1754 1870
1755 status = xs_tcp_finish_connecting(xprt, sock); 1871 xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6);
1756 dprintk("RPC: %p connect status %d connected %d sock state %d\n",
1757 xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
1758 if (status < 0) {
1759 switch (status) {
1760 case -EINPROGRESS:
1761 case -EALREADY:
1762 goto out_clear;
1763 case -ECONNREFUSED:
1764 case -ECONNRESET:
1765 /* retry with existing socket, after a delay */
1766 break;
1767 default:
1768 /* get rid of existing socket, and retry */
1769 xs_tcp_shutdown(xprt);
1770 }
1771 }
1772out:
1773 xprt_wake_pending_tasks(xprt, status);
1774out_clear:
1775 xprt_clear_connecting(xprt);
1776} 1872}
1777 1873
1778/** 1874/**
@@ -1817,9 +1913,6 @@ static void xs_tcp_connect(struct rpc_task *task)
1817{ 1913{
1818 struct rpc_xprt *xprt = task->tk_xprt; 1914 struct rpc_xprt *xprt = task->tk_xprt;
1819 1915
1820 /* Initiate graceful shutdown of the socket if not already done */
1821 if (test_bit(XPRT_CONNECTED, &xprt->state))
1822 xs_tcp_shutdown(xprt);
1823 /* Exit if we need to wait for socket shutdown to complete */ 1916 /* Exit if we need to wait for socket shutdown to complete */
1824 if (test_bit(XPRT_CLOSING, &xprt->state)) 1917 if (test_bit(XPRT_CLOSING, &xprt->state))
1825 return; 1918 return;