aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-13 20:13:19 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-13 20:13:19 -0400
commit06b8ab55289345ab191bf4bf0e4acc6d4bdf293d (patch)
tree9af9215097e26c026f30a58c6ca3092ec15d1e1e /net
parentdc1cc85133120e49c223f36aa77d398b8abac727 (diff)
parent71a6ec8ac587418ceb6b420def1ca44b334c1ff7 (diff)
Merge tag 'nfs-for-3.17-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust: "Highlights include: - stable fix for a bug in nfs3_list_one_acl() - speed up NFS path walks by supporting LOOKUP_RCU - more read/write code cleanups - pNFS fixes for layout return on close - fixes for the RCU handling in the rpcsec_gss code - more NFS/RDMA fixes" * tag 'nfs-for-3.17-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (79 commits) nfs: reject changes to resvport and sharecache during remount NFS: Avoid infinite loop when RELEASE_LOCKOWNER getting expired error SUNRPC: remove all refcounting of groupinfo from rpcauth_lookupcred NFS: fix two problems in lookup_revalidate in RCU-walk NFS: allow lockless access to access_cache NFS: teach nfs_lookup_verify_inode to handle LOOKUP_RCU NFS: teach nfs_neg_need_reval to understand LOOKUP_RCU NFS: support RCU_WALK in nfs_permission() sunrpc/auth: allow lockless (rcu) lookup of credential cache. NFS: prepare for RCU-walk support but pushing tests later in code. NFS: nfs4_lookup_revalidate: only evaluate parent if it will be used. NFS: add checks for returned value of try_module_get() nfs: clear_request_commit while holding i_lock pnfs: add pnfs_put_lseg_async pnfs: find swapped pages on pnfs commit lists too nfs: fix comment and add warn_on for PG_INODE_REF nfs: check wait_on_bit_lock err in page_group_lock sunrpc: remove "ec" argument from encrypt_v2 operation sunrpc: clean up sparse endianness warnings in gss_krb5_wrap.c sunrpc: clean up sparse endianness warnings in gss_krb5_seal.c ...
Diffstat (limited to 'net')
-rw-r--r--net/sunrpc/addr.c16
-rw-r--r--net/sunrpc/auth.c68
-rw-r--r--net/sunrpc/auth_generic.c6
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c126
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c9
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c28
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c20
-rw-r--r--net/sunrpc/auth_null.c2
-rw-r--r--net/sunrpc/clnt.c5
-rw-r--r--net/sunrpc/rpc_pipe.c2
-rw-r--r--net/sunrpc/xprt.c1
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c83
-rw-r--r--net/sunrpc/xprtrdma/transport.c17
-rw-r--r--net/sunrpc/xprtrdma/verbs.c739
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h61
-rw-r--r--net/sunrpc/xprtsock.c9
16 files changed, 809 insertions, 383 deletions
diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index a622ad64acd8..2e0a6f92e563 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -176,7 +176,7 @@ static int rpc_parse_scope_id(struct net *net, const char *buf,
176 len = (buf + buflen) - delim - 1; 176 len = (buf + buflen) - delim - 1;
177 p = kstrndup(delim + 1, len, GFP_KERNEL); 177 p = kstrndup(delim + 1, len, GFP_KERNEL);
178 if (p) { 178 if (p) {
179 unsigned long scope_id = 0; 179 u32 scope_id = 0;
180 struct net_device *dev; 180 struct net_device *dev;
181 181
182 dev = dev_get_by_name(net, p); 182 dev = dev_get_by_name(net, p);
@@ -184,7 +184,7 @@ static int rpc_parse_scope_id(struct net *net, const char *buf,
184 scope_id = dev->ifindex; 184 scope_id = dev->ifindex;
185 dev_put(dev); 185 dev_put(dev);
186 } else { 186 } else {
187 if (strict_strtoul(p, 10, &scope_id) == 0) { 187 if (kstrtou32(p, 10, &scope_id) == 0) {
188 kfree(p); 188 kfree(p);
189 return 0; 189 return 0;
190 } 190 }
@@ -304,7 +304,7 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags)
304 * @sap: buffer into which to plant socket address 304 * @sap: buffer into which to plant socket address
305 * @salen: size of buffer 305 * @salen: size of buffer
306 * 306 *
307 * @uaddr does not have to be '\0'-terminated, but strict_strtoul() and 307 * @uaddr does not have to be '\0'-terminated, but kstrtou8() and
308 * rpc_pton() require proper string termination to be successful. 308 * rpc_pton() require proper string termination to be successful.
309 * 309 *
310 * Returns the size of the socket address if successful; otherwise 310 * Returns the size of the socket address if successful; otherwise
@@ -315,7 +315,7 @@ size_t rpc_uaddr2sockaddr(struct net *net, const char *uaddr,
315 const size_t salen) 315 const size_t salen)
316{ 316{
317 char *c, buf[RPCBIND_MAXUADDRLEN + sizeof('\0')]; 317 char *c, buf[RPCBIND_MAXUADDRLEN + sizeof('\0')];
318 unsigned long portlo, porthi; 318 u8 portlo, porthi;
319 unsigned short port; 319 unsigned short port;
320 320
321 if (uaddr_len > RPCBIND_MAXUADDRLEN) 321 if (uaddr_len > RPCBIND_MAXUADDRLEN)
@@ -327,18 +327,14 @@ size_t rpc_uaddr2sockaddr(struct net *net, const char *uaddr,
327 c = strrchr(buf, '.'); 327 c = strrchr(buf, '.');
328 if (unlikely(c == NULL)) 328 if (unlikely(c == NULL))
329 return 0; 329 return 0;
330 if (unlikely(strict_strtoul(c + 1, 10, &portlo) != 0)) 330 if (unlikely(kstrtou8(c + 1, 10, &portlo) != 0))
331 return 0;
332 if (unlikely(portlo > 255))
333 return 0; 331 return 0;
334 332
335 *c = '\0'; 333 *c = '\0';
336 c = strrchr(buf, '.'); 334 c = strrchr(buf, '.');
337 if (unlikely(c == NULL)) 335 if (unlikely(c == NULL))
338 return 0; 336 return 0;
339 if (unlikely(strict_strtoul(c + 1, 10, &porthi) != 0)) 337 if (unlikely(kstrtou8(c + 1, 10, &porthi) != 0))
340 return 0;
341 if (unlikely(porthi > 255))
342 return 0; 338 return 0;
343 339
344 port = (unsigned short)((porthi << 8) | portlo); 340 port = (unsigned short)((porthi << 8) | portlo);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index f77366717420..383eb919ac0b 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -48,7 +48,7 @@ static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp)
48 48
49 if (!val) 49 if (!val)
50 goto out_inval; 50 goto out_inval;
51 ret = strict_strtoul(val, 0, &num); 51 ret = kstrtoul(val, 0, &num);
52 if (ret == -EINVAL) 52 if (ret == -EINVAL)
53 goto out_inval; 53 goto out_inval;
54 nbits = fls(num); 54 nbits = fls(num);
@@ -80,6 +80,10 @@ static struct kernel_param_ops param_ops_hashtbl_sz = {
80module_param_named(auth_hashtable_size, auth_hashbits, hashtbl_sz, 0644); 80module_param_named(auth_hashtable_size, auth_hashbits, hashtbl_sz, 0644);
81MODULE_PARM_DESC(auth_hashtable_size, "RPC credential cache hashtable size"); 81MODULE_PARM_DESC(auth_hashtable_size, "RPC credential cache hashtable size");
82 82
83static unsigned long auth_max_cred_cachesize = ULONG_MAX;
84module_param(auth_max_cred_cachesize, ulong, 0644);
85MODULE_PARM_DESC(auth_max_cred_cachesize, "RPC credential maximum total cache size");
86
83static u32 87static u32
84pseudoflavor_to_flavor(u32 flavor) { 88pseudoflavor_to_flavor(u32 flavor) {
85 if (flavor > RPC_AUTH_MAXFLAVOR) 89 if (flavor > RPC_AUTH_MAXFLAVOR)
@@ -363,6 +367,15 @@ rpcauth_cred_key_to_expire(struct rpc_cred *cred)
363} 367}
364EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire); 368EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire);
365 369
370char *
371rpcauth_stringify_acceptor(struct rpc_cred *cred)
372{
373 if (!cred->cr_ops->crstringify_acceptor)
374 return NULL;
375 return cred->cr_ops->crstringify_acceptor(cred);
376}
377EXPORT_SYMBOL_GPL(rpcauth_stringify_acceptor);
378
366/* 379/*
367 * Destroy a list of credentials 380 * Destroy a list of credentials
368 */ 381 */
@@ -472,6 +485,20 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
472 return freed; 485 return freed;
473} 486}
474 487
488static unsigned long
489rpcauth_cache_do_shrink(int nr_to_scan)
490{
491 LIST_HEAD(free);
492 unsigned long freed;
493
494 spin_lock(&rpc_credcache_lock);
495 freed = rpcauth_prune_expired(&free, nr_to_scan);
496 spin_unlock(&rpc_credcache_lock);
497 rpcauth_destroy_credlist(&free);
498
499 return freed;
500}
501
475/* 502/*
476 * Run memory cache shrinker. 503 * Run memory cache shrinker.
477 */ 504 */
@@ -479,9 +506,6 @@ static unsigned long
479rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) 506rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
480 507
481{ 508{
482 LIST_HEAD(free);
483 unsigned long freed;
484
485 if ((sc->gfp_mask & GFP_KERNEL) != GFP_KERNEL) 509 if ((sc->gfp_mask & GFP_KERNEL) != GFP_KERNEL)
486 return SHRINK_STOP; 510 return SHRINK_STOP;
487 511
@@ -489,12 +513,7 @@ rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
489 if (list_empty(&cred_unused)) 513 if (list_empty(&cred_unused))
490 return SHRINK_STOP; 514 return SHRINK_STOP;
491 515
492 spin_lock(&rpc_credcache_lock); 516 return rpcauth_cache_do_shrink(sc->nr_to_scan);
493 freed = rpcauth_prune_expired(&free, sc->nr_to_scan);
494 spin_unlock(&rpc_credcache_lock);
495 rpcauth_destroy_credlist(&free);
496
497 return freed;
498} 517}
499 518
500static unsigned long 519static unsigned long
@@ -504,6 +523,21 @@ rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
504 return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; 523 return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
505} 524}
506 525
526static void
527rpcauth_cache_enforce_limit(void)
528{
529 unsigned long diff;
530 unsigned int nr_to_scan;
531
532 if (number_cred_unused <= auth_max_cred_cachesize)
533 return;
534 diff = number_cred_unused - auth_max_cred_cachesize;
535 nr_to_scan = 100;
536 if (diff < nr_to_scan)
537 nr_to_scan = diff;
538 rpcauth_cache_do_shrink(nr_to_scan);
539}
540
507/* 541/*
508 * Look up a process' credentials in the authentication cache 542 * Look up a process' credentials in the authentication cache
509 */ 543 */
@@ -523,6 +557,12 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
523 hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) { 557 hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) {
524 if (!entry->cr_ops->crmatch(acred, entry, flags)) 558 if (!entry->cr_ops->crmatch(acred, entry, flags))
525 continue; 559 continue;
560 if (flags & RPCAUTH_LOOKUP_RCU) {
561 if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) &&
562 !test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags))
563 cred = entry;
564 break;
565 }
526 spin_lock(&cache->lock); 566 spin_lock(&cache->lock);
527 if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) { 567 if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) {
528 spin_unlock(&cache->lock); 568 spin_unlock(&cache->lock);
@@ -537,6 +577,9 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
537 if (cred != NULL) 577 if (cred != NULL)
538 goto found; 578 goto found;
539 579
580 if (flags & RPCAUTH_LOOKUP_RCU)
581 return ERR_PTR(-ECHILD);
582
540 new = auth->au_ops->crcreate(auth, acred, flags); 583 new = auth->au_ops->crcreate(auth, acred, flags);
541 if (IS_ERR(new)) { 584 if (IS_ERR(new)) {
542 cred = new; 585 cred = new;
@@ -557,6 +600,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
557 } else 600 } else
558 list_add_tail(&new->cr_lru, &free); 601 list_add_tail(&new->cr_lru, &free);
559 spin_unlock(&cache->lock); 602 spin_unlock(&cache->lock);
603 rpcauth_cache_enforce_limit();
560found: 604found:
561 if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) && 605 if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) &&
562 cred->cr_ops->cr_init != NULL && 606 cred->cr_ops->cr_init != NULL &&
@@ -586,10 +630,8 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags)
586 memset(&acred, 0, sizeof(acred)); 630 memset(&acred, 0, sizeof(acred));
587 acred.uid = cred->fsuid; 631 acred.uid = cred->fsuid;
588 acred.gid = cred->fsgid; 632 acred.gid = cred->fsgid;
589 acred.group_info = get_group_info(((struct cred *)cred)->group_info); 633 acred.group_info = cred->group_info;
590
591 ret = auth->au_ops->lookup_cred(auth, &acred, flags); 634 ret = auth->au_ops->lookup_cred(auth, &acred, flags);
592 put_group_info(acred.group_info);
593 return ret; 635 return ret;
594} 636}
595EXPORT_SYMBOL_GPL(rpcauth_lookupcred); 637EXPORT_SYMBOL_GPL(rpcauth_lookupcred);
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index ed04869b2d4f..6f6b829c9e8e 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -38,6 +38,12 @@ struct rpc_cred *rpc_lookup_cred(void)
38} 38}
39EXPORT_SYMBOL_GPL(rpc_lookup_cred); 39EXPORT_SYMBOL_GPL(rpc_lookup_cred);
40 40
41struct rpc_cred *rpc_lookup_cred_nonblock(void)
42{
43 return rpcauth_lookupcred(&generic_auth, RPCAUTH_LOOKUP_RCU);
44}
45EXPORT_SYMBOL_GPL(rpc_lookup_cred_nonblock);
46
41/* 47/*
42 * Public call interface for looking up machine creds. 48 * Public call interface for looking up machine creds.
43 */ 49 */
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index b6e440baccc3..afb292cd797d 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -183,8 +183,9 @@ gss_cred_get_ctx(struct rpc_cred *cred)
183 struct gss_cl_ctx *ctx = NULL; 183 struct gss_cl_ctx *ctx = NULL;
184 184
185 rcu_read_lock(); 185 rcu_read_lock();
186 if (gss_cred->gc_ctx) 186 ctx = rcu_dereference(gss_cred->gc_ctx);
187 ctx = gss_get_ctx(gss_cred->gc_ctx); 187 if (ctx)
188 gss_get_ctx(ctx);
188 rcu_read_unlock(); 189 rcu_read_unlock();
189 return ctx; 190 return ctx;
190} 191}
@@ -262,9 +263,22 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
262 p = ERR_PTR(ret); 263 p = ERR_PTR(ret);
263 goto err; 264 goto err;
264 } 265 }
265 dprintk("RPC: %s Success. gc_expiry %lu now %lu timeout %u\n", 266
266 __func__, ctx->gc_expiry, now, timeout); 267 /* is there any trailing data? */
267 return q; 268 if (q == end) {
269 p = q;
270 goto done;
271 }
272
273 /* pull in acceptor name (if there is one) */
274 p = simple_get_netobj(q, end, &ctx->gc_acceptor);
275 if (IS_ERR(p))
276 goto err;
277done:
278 dprintk("RPC: %s Success. gc_expiry %lu now %lu timeout %u acceptor %.*s\n",
279 __func__, ctx->gc_expiry, now, timeout, ctx->gc_acceptor.len,
280 ctx->gc_acceptor.data);
281 return p;
268err: 282err:
269 dprintk("RPC: %s returns error %ld\n", __func__, -PTR_ERR(p)); 283 dprintk("RPC: %s returns error %ld\n", __func__, -PTR_ERR(p));
270 return p; 284 return p;
@@ -1194,13 +1208,13 @@ gss_destroying_context(struct rpc_cred *cred)
1194{ 1208{
1195 struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); 1209 struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
1196 struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); 1210 struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth);
1211 struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1);
1197 struct rpc_task *task; 1212 struct rpc_task *task;
1198 1213
1199 if (gss_cred->gc_ctx == NULL || 1214 if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
1200 test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
1201 return 0; 1215 return 0;
1202 1216
1203 gss_cred->gc_ctx->gc_proc = RPC_GSS_PROC_DESTROY; 1217 ctx->gc_proc = RPC_GSS_PROC_DESTROY;
1204 cred->cr_ops = &gss_nullops; 1218 cred->cr_ops = &gss_nullops;
1205 1219
1206 /* Take a reference to ensure the cred will be destroyed either 1220 /* Take a reference to ensure the cred will be destroyed either
@@ -1225,6 +1239,7 @@ gss_do_free_ctx(struct gss_cl_ctx *ctx)
1225 1239
1226 gss_delete_sec_context(&ctx->gc_gss_ctx); 1240 gss_delete_sec_context(&ctx->gc_gss_ctx);
1227 kfree(ctx->gc_wire_ctx.data); 1241 kfree(ctx->gc_wire_ctx.data);
1242 kfree(ctx->gc_acceptor.data);
1228 kfree(ctx); 1243 kfree(ctx);
1229} 1244}
1230 1245
@@ -1260,7 +1275,7 @@ gss_destroy_nullcred(struct rpc_cred *cred)
1260{ 1275{
1261 struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); 1276 struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
1262 struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); 1277 struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth);
1263 struct gss_cl_ctx *ctx = gss_cred->gc_ctx; 1278 struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1);
1264 1279
1265 RCU_INIT_POINTER(gss_cred->gc_ctx, NULL); 1280 RCU_INIT_POINTER(gss_cred->gc_ctx, NULL);
1266 call_rcu(&cred->cr_rcu, gss_free_cred_callback); 1281 call_rcu(&cred->cr_rcu, gss_free_cred_callback);
@@ -1332,6 +1347,36 @@ gss_cred_init(struct rpc_auth *auth, struct rpc_cred *cred)
1332 return err; 1347 return err;
1333} 1348}
1334 1349
1350static char *
1351gss_stringify_acceptor(struct rpc_cred *cred)
1352{
1353 char *string = NULL;
1354 struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
1355 struct gss_cl_ctx *ctx;
1356 struct xdr_netobj *acceptor;
1357
1358 rcu_read_lock();
1359 ctx = rcu_dereference(gss_cred->gc_ctx);
1360 if (!ctx)
1361 goto out;
1362
1363 acceptor = &ctx->gc_acceptor;
1364
1365 /* no point if there's no string */
1366 if (!acceptor->len)
1367 goto out;
1368
1369 string = kmalloc(acceptor->len + 1, GFP_KERNEL);
1370 if (!string)
1371 goto out;
1372
1373 memcpy(string, acceptor->data, acceptor->len);
1374 string[acceptor->len] = '\0';
1375out:
1376 rcu_read_unlock();
1377 return string;
1378}
1379
1335/* 1380/*
1336 * Returns -EACCES if GSS context is NULL or will expire within the 1381 * Returns -EACCES if GSS context is NULL or will expire within the
1337 * timeout (miliseconds) 1382 * timeout (miliseconds)
@@ -1340,15 +1385,16 @@ static int
1340gss_key_timeout(struct rpc_cred *rc) 1385gss_key_timeout(struct rpc_cred *rc)
1341{ 1386{
1342 struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base); 1387 struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
1388 struct gss_cl_ctx *ctx;
1343 unsigned long now = jiffies; 1389 unsigned long now = jiffies;
1344 unsigned long expire; 1390 unsigned long expire;
1345 1391
1346 if (gss_cred->gc_ctx == NULL) 1392 rcu_read_lock();
1347 return -EACCES; 1393 ctx = rcu_dereference(gss_cred->gc_ctx);
1348 1394 if (ctx)
1349 expire = gss_cred->gc_ctx->gc_expiry - (gss_key_expire_timeo * HZ); 1395 expire = ctx->gc_expiry - (gss_key_expire_timeo * HZ);
1350 1396 rcu_read_unlock();
1351 if (time_after(now, expire)) 1397 if (!ctx || time_after(now, expire))
1352 return -EACCES; 1398 return -EACCES;
1353 return 0; 1399 return 0;
1354} 1400}
@@ -1357,13 +1403,19 @@ static int
1357gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags) 1403gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
1358{ 1404{
1359 struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base); 1405 struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
1406 struct gss_cl_ctx *ctx;
1360 int ret; 1407 int ret;
1361 1408
1362 if (test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags)) 1409 if (test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags))
1363 goto out; 1410 goto out;
1364 /* Don't match with creds that have expired. */ 1411 /* Don't match with creds that have expired. */
1365 if (time_after(jiffies, gss_cred->gc_ctx->gc_expiry)) 1412 rcu_read_lock();
1413 ctx = rcu_dereference(gss_cred->gc_ctx);
1414 if (!ctx || time_after(jiffies, ctx->gc_expiry)) {
1415 rcu_read_unlock();
1366 return 0; 1416 return 0;
1417 }
1418 rcu_read_unlock();
1367 if (!test_bit(RPCAUTH_CRED_UPTODATE, &rc->cr_flags)) 1419 if (!test_bit(RPCAUTH_CRED_UPTODATE, &rc->cr_flags))
1368 return 0; 1420 return 0;
1369out: 1421out:
@@ -1909,29 +1961,31 @@ static const struct rpc_authops authgss_ops = {
1909}; 1961};
1910 1962
1911static const struct rpc_credops gss_credops = { 1963static const struct rpc_credops gss_credops = {
1912 .cr_name = "AUTH_GSS", 1964 .cr_name = "AUTH_GSS",
1913 .crdestroy = gss_destroy_cred, 1965 .crdestroy = gss_destroy_cred,
1914 .cr_init = gss_cred_init, 1966 .cr_init = gss_cred_init,
1915 .crbind = rpcauth_generic_bind_cred, 1967 .crbind = rpcauth_generic_bind_cred,
1916 .crmatch = gss_match, 1968 .crmatch = gss_match,
1917 .crmarshal = gss_marshal, 1969 .crmarshal = gss_marshal,
1918 .crrefresh = gss_refresh, 1970 .crrefresh = gss_refresh,
1919 .crvalidate = gss_validate, 1971 .crvalidate = gss_validate,
1920 .crwrap_req = gss_wrap_req, 1972 .crwrap_req = gss_wrap_req,
1921 .crunwrap_resp = gss_unwrap_resp, 1973 .crunwrap_resp = gss_unwrap_resp,
1922 .crkey_timeout = gss_key_timeout, 1974 .crkey_timeout = gss_key_timeout,
1975 .crstringify_acceptor = gss_stringify_acceptor,
1923}; 1976};
1924 1977
1925static const struct rpc_credops gss_nullops = { 1978static const struct rpc_credops gss_nullops = {
1926 .cr_name = "AUTH_GSS", 1979 .cr_name = "AUTH_GSS",
1927 .crdestroy = gss_destroy_nullcred, 1980 .crdestroy = gss_destroy_nullcred,
1928 .crbind = rpcauth_generic_bind_cred, 1981 .crbind = rpcauth_generic_bind_cred,
1929 .crmatch = gss_match, 1982 .crmatch = gss_match,
1930 .crmarshal = gss_marshal, 1983 .crmarshal = gss_marshal,
1931 .crrefresh = gss_refresh_null, 1984 .crrefresh = gss_refresh_null,
1932 .crvalidate = gss_validate, 1985 .crvalidate = gss_validate,
1933 .crwrap_req = gss_wrap_req, 1986 .crwrap_req = gss_wrap_req,
1934 .crunwrap_resp = gss_unwrap_resp, 1987 .crunwrap_resp = gss_unwrap_resp,
1988 .crstringify_acceptor = gss_stringify_acceptor,
1935}; 1989};
1936 1990
1937static const struct rpc_pipe_ops gss_upcall_ops_v0 = { 1991static const struct rpc_pipe_ops gss_upcall_ops_v0 = {
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 0f43e894bc0a..f5ed9f6ece06 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -641,7 +641,7 @@ out:
641 641
642u32 642u32
643gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, 643gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
644 struct xdr_buf *buf, int ec, struct page **pages) 644 struct xdr_buf *buf, struct page **pages)
645{ 645{
646 u32 err; 646 u32 err;
647 struct xdr_netobj hmac; 647 struct xdr_netobj hmac;
@@ -684,13 +684,8 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
684 ecptr = buf->tail[0].iov_base; 684 ecptr = buf->tail[0].iov_base;
685 } 685 }
686 686
687 memset(ecptr, 'X', ec);
688 buf->tail[0].iov_len += ec;
689 buf->len += ec;
690
691 /* copy plaintext gss token header after filler (if any) */ 687 /* copy plaintext gss token header after filler (if any) */
692 memcpy(ecptr + ec, buf->head[0].iov_base + offset, 688 memcpy(ecptr, buf->head[0].iov_base + offset, GSS_KRB5_TOK_HDR_LEN);
693 GSS_KRB5_TOK_HDR_LEN);
694 buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN; 689 buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN;
695 buf->len += GSS_KRB5_TOK_HDR_LEN; 690 buf->len += GSS_KRB5_TOK_HDR_LEN;
696 691
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 62ae3273186c..42768e5c3994 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -70,31 +70,37 @@
70 70
71DEFINE_SPINLOCK(krb5_seq_lock); 71DEFINE_SPINLOCK(krb5_seq_lock);
72 72
73static char * 73static void *
74setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token) 74setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
75{ 75{
76 __be16 *ptr, *krb5_hdr; 76 u16 *ptr;
77 void *krb5_hdr;
77 int body_size = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength; 78 int body_size = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
78 79
79 token->len = g_token_size(&ctx->mech_used, body_size); 80 token->len = g_token_size(&ctx->mech_used, body_size);
80 81
81 ptr = (__be16 *)token->data; 82 ptr = (u16 *)token->data;
82 g_make_token_header(&ctx->mech_used, body_size, (unsigned char **)&ptr); 83 g_make_token_header(&ctx->mech_used, body_size, (unsigned char **)&ptr);
83 84
84 /* ptr now at start of header described in rfc 1964, section 1.2.1: */ 85 /* ptr now at start of header described in rfc 1964, section 1.2.1: */
85 krb5_hdr = ptr; 86 krb5_hdr = ptr;
86 *ptr++ = KG_TOK_MIC_MSG; 87 *ptr++ = KG_TOK_MIC_MSG;
87 *ptr++ = cpu_to_le16(ctx->gk5e->signalg); 88 /*
89 * signalg is stored as if it were converted from LE to host endian, even
90 * though it's an opaque pair of bytes according to the RFC.
91 */
92 *ptr++ = (__force u16)cpu_to_le16(ctx->gk5e->signalg);
88 *ptr++ = SEAL_ALG_NONE; 93 *ptr++ = SEAL_ALG_NONE;
89 *ptr++ = 0xffff; 94 *ptr = 0xffff;
90 95
91 return (char *)krb5_hdr; 96 return krb5_hdr;
92} 97}
93 98
94static void * 99static void *
95setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token) 100setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
96{ 101{
97 __be16 *ptr, *krb5_hdr; 102 u16 *ptr;
103 void *krb5_hdr;
98 u8 *p, flags = 0x00; 104 u8 *p, flags = 0x00;
99 105
100 if ((ctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0) 106 if ((ctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0)
@@ -104,15 +110,15 @@ setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
104 110
105 /* Per rfc 4121, sec 4.2.6.1, there is no header, 111 /* Per rfc 4121, sec 4.2.6.1, there is no header,
106 * just start the token */ 112 * just start the token */
107 krb5_hdr = ptr = (__be16 *)token->data; 113 krb5_hdr = ptr = (u16 *)token->data;
108 114
109 *ptr++ = KG2_TOK_MIC; 115 *ptr++ = KG2_TOK_MIC;
110 p = (u8 *)ptr; 116 p = (u8 *)ptr;
111 *p++ = flags; 117 *p++ = flags;
112 *p++ = 0xff; 118 *p++ = 0xff;
113 ptr = (__be16 *)p; 119 ptr = (u16 *)p;
114 *ptr++ = 0xffff;
115 *ptr++ = 0xffff; 120 *ptr++ = 0xffff;
121 *ptr = 0xffff;
116 122
117 token->len = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength; 123 token->len = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
118 return krb5_hdr; 124 return krb5_hdr;
@@ -181,7 +187,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
181 spin_lock(&krb5_seq_lock); 187 spin_lock(&krb5_seq_lock);
182 seq_send = ctx->seq_send64++; 188 seq_send = ctx->seq_send64++;
183 spin_unlock(&krb5_seq_lock); 189 spin_unlock(&krb5_seq_lock);
184 *((u64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send); 190 *((__be64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send);
185 191
186 if (ctx->initiate) { 192 if (ctx->initiate) {
187 cksumkey = ctx->initiator_sign; 193 cksumkey = ctx->initiator_sign;
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 42560e55d978..4b614c604fe0 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -201,9 +201,15 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
201 201
202 msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength; 202 msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength;
203 203
204 *(__be16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg); 204 /*
205 memset(ptr + 4, 0xff, 4); 205 * signalg and sealalg are stored as if they were converted from LE
206 *(__be16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg); 206 * to host endian, even though they're opaque pairs of bytes according
207 * to the RFC.
208 */
209 *(__le16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg);
210 *(__le16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg);
211 ptr[6] = 0xff;
212 ptr[7] = 0xff;
207 213
208 gss_krb5_make_confounder(msg_start, conflen); 214 gss_krb5_make_confounder(msg_start, conflen);
209 215
@@ -438,7 +444,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
438 u8 *ptr, *plainhdr; 444 u8 *ptr, *plainhdr;
439 s32 now; 445 s32 now;
440 u8 flags = 0x00; 446 u8 flags = 0x00;
441 __be16 *be16ptr, ec = 0; 447 __be16 *be16ptr;
442 __be64 *be64ptr; 448 __be64 *be64ptr;
443 u32 err; 449 u32 err;
444 450
@@ -468,16 +474,16 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
468 be16ptr = (__be16 *)ptr; 474 be16ptr = (__be16 *)ptr;
469 475
470 blocksize = crypto_blkcipher_blocksize(kctx->acceptor_enc); 476 blocksize = crypto_blkcipher_blocksize(kctx->acceptor_enc);
471 *be16ptr++ = cpu_to_be16(ec); 477 *be16ptr++ = 0;
472 /* "inner" token header always uses 0 for RRC */ 478 /* "inner" token header always uses 0 for RRC */
473 *be16ptr++ = cpu_to_be16(0); 479 *be16ptr++ = 0;
474 480
475 be64ptr = (__be64 *)be16ptr; 481 be64ptr = (__be64 *)be16ptr;
476 spin_lock(&krb5_seq_lock); 482 spin_lock(&krb5_seq_lock);
477 *be64ptr = cpu_to_be64(kctx->seq_send64++); 483 *be64ptr = cpu_to_be64(kctx->seq_send64++);
478 spin_unlock(&krb5_seq_lock); 484 spin_unlock(&krb5_seq_lock);
479 485
480 err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, ec, pages); 486 err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, pages);
481 if (err) 487 if (err)
482 return err; 488 return err;
483 489
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index f0ebe07978a2..712c123e04e9 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -35,6 +35,8 @@ nul_destroy(struct rpc_auth *auth)
35static struct rpc_cred * 35static struct rpc_cred *
36nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) 36nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
37{ 37{
38 if (flags & RPCAUTH_LOOKUP_RCU)
39 return &null_cred;
38 return get_rpccred(&null_cred); 40 return get_rpccred(&null_cred);
39} 41}
40 42
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 2e6ab10734f6..488ddeed9363 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1746,6 +1746,7 @@ call_bind_status(struct rpc_task *task)
1746 case -EHOSTDOWN: 1746 case -EHOSTDOWN:
1747 case -EHOSTUNREACH: 1747 case -EHOSTUNREACH:
1748 case -ENETUNREACH: 1748 case -ENETUNREACH:
1749 case -ENOBUFS:
1749 case -EPIPE: 1750 case -EPIPE:
1750 dprintk("RPC: %5u remote rpcbind unreachable: %d\n", 1751 dprintk("RPC: %5u remote rpcbind unreachable: %d\n",
1751 task->tk_pid, task->tk_status); 1752 task->tk_pid, task->tk_status);
@@ -1812,6 +1813,8 @@ call_connect_status(struct rpc_task *task)
1812 case -ECONNABORTED: 1813 case -ECONNABORTED:
1813 case -ENETUNREACH: 1814 case -ENETUNREACH:
1814 case -EHOSTUNREACH: 1815 case -EHOSTUNREACH:
1816 case -ENOBUFS:
1817 case -EPIPE:
1815 if (RPC_IS_SOFTCONN(task)) 1818 if (RPC_IS_SOFTCONN(task))
1816 break; 1819 break;
1817 /* retry with existing socket, after a delay */ 1820 /* retry with existing socket, after a delay */
@@ -1918,6 +1921,7 @@ call_transmit_status(struct rpc_task *task)
1918 case -ECONNRESET: 1921 case -ECONNRESET:
1919 case -ECONNABORTED: 1922 case -ECONNABORTED:
1920 case -ENOTCONN: 1923 case -ENOTCONN:
1924 case -ENOBUFS:
1921 case -EPIPE: 1925 case -EPIPE:
1922 rpc_task_force_reencode(task); 1926 rpc_task_force_reencode(task);
1923 } 1927 }
@@ -2034,6 +2038,7 @@ call_status(struct rpc_task *task)
2034 case -ECONNRESET: 2038 case -ECONNRESET:
2035 case -ECONNABORTED: 2039 case -ECONNABORTED:
2036 rpc_force_rebind(clnt); 2040 rpc_force_rebind(clnt);
2041 case -ENOBUFS:
2037 rpc_delay(task, 3*HZ); 2042 rpc_delay(task, 3*HZ);
2038 case -EPIPE: 2043 case -EPIPE:
2039 case -ENOTCONN: 2044 case -ENOTCONN:
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index b18554898562..2d12b76b5a64 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -195,7 +195,7 @@ static struct inode *
195rpc_alloc_inode(struct super_block *sb) 195rpc_alloc_inode(struct super_block *sb)
196{ 196{
197 struct rpc_inode *rpci; 197 struct rpc_inode *rpci;
198 rpci = (struct rpc_inode *)kmem_cache_alloc(rpc_inode_cachep, GFP_KERNEL); 198 rpci = kmem_cache_alloc(rpc_inode_cachep, GFP_KERNEL);
199 if (!rpci) 199 if (!rpci)
200 return NULL; 200 return NULL;
201 return &rpci->vfs_inode; 201 return &rpci->vfs_inode;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 51c63165073c..56e4e150e80e 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -744,6 +744,7 @@ static void xprt_connect_status(struct rpc_task *task)
744 case -ECONNABORTED: 744 case -ECONNABORTED:
745 case -ENETUNREACH: 745 case -ENETUNREACH:
746 case -EHOSTUNREACH: 746 case -EHOSTUNREACH:
747 case -EPIPE:
747 case -EAGAIN: 748 case -EAGAIN:
748 dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid); 749 dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid);
749 break; 750 break;
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 693966d3f33b..6166c985fe24 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -53,14 +53,6 @@
53# define RPCDBG_FACILITY RPCDBG_TRANS 53# define RPCDBG_FACILITY RPCDBG_TRANS
54#endif 54#endif
55 55
56enum rpcrdma_chunktype {
57 rpcrdma_noch = 0,
58 rpcrdma_readch,
59 rpcrdma_areadch,
60 rpcrdma_writech,
61 rpcrdma_replych
62};
63
64#ifdef RPC_DEBUG 56#ifdef RPC_DEBUG
65static const char transfertypes[][12] = { 57static const char transfertypes[][12] = {
66 "pure inline", /* no chunks */ 58 "pure inline", /* no chunks */
@@ -279,13 +271,37 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
279 return (unsigned char *)iptr - (unsigned char *)headerp; 271 return (unsigned char *)iptr - (unsigned char *)headerp;
280 272
281out: 273out:
282 for (pos = 0; nchunks--;) 274 if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) {
283 pos += rpcrdma_deregister_external( 275 for (pos = 0; nchunks--;)
284 &req->rl_segments[pos], r_xprt); 276 pos += rpcrdma_deregister_external(
277 &req->rl_segments[pos], r_xprt);
278 }
285 return n; 279 return n;
286} 280}
287 281
288/* 282/*
283 * Marshal chunks. This routine returns the header length
284 * consumed by marshaling.
285 *
286 * Returns positive RPC/RDMA header size, or negative errno.
287 */
288
289ssize_t
290rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result)
291{
292 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
293 struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)req->rl_base;
294
295 if (req->rl_rtype != rpcrdma_noch)
296 result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf,
297 headerp, req->rl_rtype);
298 else if (req->rl_wtype != rpcrdma_noch)
299 result = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf,
300 headerp, req->rl_wtype);
301 return result;
302}
303
304/*
289 * Copy write data inline. 305 * Copy write data inline.
290 * This function is used for "small" requests. Data which is passed 306 * This function is used for "small" requests. Data which is passed
291 * to RPC via iovecs (or page list) is copied directly into the 307 * to RPC via iovecs (or page list) is copied directly into the
@@ -377,7 +393,6 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
377 char *base; 393 char *base;
378 size_t rpclen, padlen; 394 size_t rpclen, padlen;
379 ssize_t hdrlen; 395 ssize_t hdrlen;
380 enum rpcrdma_chunktype rtype, wtype;
381 struct rpcrdma_msg *headerp; 396 struct rpcrdma_msg *headerp;
382 397
383 /* 398 /*
@@ -415,13 +430,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
415 * into pages; otherwise use reply chunks. 430 * into pages; otherwise use reply chunks.
416 */ 431 */
417 if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst)) 432 if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst))
418 wtype = rpcrdma_noch; 433 req->rl_wtype = rpcrdma_noch;
419 else if (rqst->rq_rcv_buf.page_len == 0) 434 else if (rqst->rq_rcv_buf.page_len == 0)
420 wtype = rpcrdma_replych; 435 req->rl_wtype = rpcrdma_replych;
421 else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) 436 else if (rqst->rq_rcv_buf.flags & XDRBUF_READ)
422 wtype = rpcrdma_writech; 437 req->rl_wtype = rpcrdma_writech;
423 else 438 else
424 wtype = rpcrdma_replych; 439 req->rl_wtype = rpcrdma_replych;
425 440
426 /* 441 /*
427 * Chunks needed for arguments? 442 * Chunks needed for arguments?
@@ -438,16 +453,16 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
438 * TBD check NFSv4 setacl 453 * TBD check NFSv4 setacl
439 */ 454 */
440 if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) 455 if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
441 rtype = rpcrdma_noch; 456 req->rl_rtype = rpcrdma_noch;
442 else if (rqst->rq_snd_buf.page_len == 0) 457 else if (rqst->rq_snd_buf.page_len == 0)
443 rtype = rpcrdma_areadch; 458 req->rl_rtype = rpcrdma_areadch;
444 else 459 else
445 rtype = rpcrdma_readch; 460 req->rl_rtype = rpcrdma_readch;
446 461
447 /* The following simplification is not true forever */ 462 /* The following simplification is not true forever */
448 if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) 463 if (req->rl_rtype != rpcrdma_noch && req->rl_wtype == rpcrdma_replych)
449 wtype = rpcrdma_noch; 464 req->rl_wtype = rpcrdma_noch;
450 if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) { 465 if (req->rl_rtype != rpcrdma_noch && req->rl_wtype != rpcrdma_noch) {
451 dprintk("RPC: %s: cannot marshal multiple chunk lists\n", 466 dprintk("RPC: %s: cannot marshal multiple chunk lists\n",
452 __func__); 467 __func__);
453 return -EIO; 468 return -EIO;
@@ -461,7 +476,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
461 * When padding is in use and applies to the transfer, insert 476 * When padding is in use and applies to the transfer, insert
462 * it and change the message type. 477 * it and change the message type.
463 */ 478 */
464 if (rtype == rpcrdma_noch) { 479 if (req->rl_rtype == rpcrdma_noch) {
465 480
466 padlen = rpcrdma_inline_pullup(rqst, 481 padlen = rpcrdma_inline_pullup(rqst,
467 RPCRDMA_INLINE_PAD_VALUE(rqst)); 482 RPCRDMA_INLINE_PAD_VALUE(rqst));
@@ -476,7 +491,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
476 headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; 491 headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
477 headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; 492 headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
478 hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ 493 hdrlen += 2 * sizeof(u32); /* extra words in padhdr */
479 if (wtype != rpcrdma_noch) { 494 if (req->rl_wtype != rpcrdma_noch) {
480 dprintk("RPC: %s: invalid chunk list\n", 495 dprintk("RPC: %s: invalid chunk list\n",
481 __func__); 496 __func__);
482 return -EIO; 497 return -EIO;
@@ -497,30 +512,18 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
497 * on receive. Therefore, we request a reply chunk 512 * on receive. Therefore, we request a reply chunk
498 * for non-writes wherever feasible and efficient. 513 * for non-writes wherever feasible and efficient.
499 */ 514 */
500 if (wtype == rpcrdma_noch) 515 if (req->rl_wtype == rpcrdma_noch)
501 wtype = rpcrdma_replych; 516 req->rl_wtype = rpcrdma_replych;
502 } 517 }
503 } 518 }
504 519
505 /* 520 hdrlen = rpcrdma_marshal_chunks(rqst, hdrlen);
506 * Marshal chunks. This routine will return the header length
507 * consumed by marshaling.
508 */
509 if (rtype != rpcrdma_noch) {
510 hdrlen = rpcrdma_create_chunks(rqst,
511 &rqst->rq_snd_buf, headerp, rtype);
512 wtype = rtype; /* simplify dprintk */
513
514 } else if (wtype != rpcrdma_noch) {
515 hdrlen = rpcrdma_create_chunks(rqst,
516 &rqst->rq_rcv_buf, headerp, wtype);
517 }
518 if (hdrlen < 0) 521 if (hdrlen < 0)
519 return hdrlen; 522 return hdrlen;
520 523
521 dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" 524 dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd"
522 " headerp 0x%p base 0x%p lkey 0x%x\n", 525 " headerp 0x%p base 0x%p lkey 0x%x\n",
523 __func__, transfertypes[wtype], hdrlen, rpclen, padlen, 526 __func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen,
524 headerp, base, req->rl_iov.lkey); 527 headerp, base, req->rl_iov.lkey);
525 528
526 /* 529 /*
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 66f91f0d071a..2faac4940563 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -296,7 +296,6 @@ xprt_setup_rdma(struct xprt_create *args)
296 296
297 xprt->resvport = 0; /* privileged port not needed */ 297 xprt->resvport = 0; /* privileged port not needed */
298 xprt->tsh_size = 0; /* RPC-RDMA handles framing */ 298 xprt->tsh_size = 0; /* RPC-RDMA handles framing */
299 xprt->max_payload = RPCRDMA_MAX_DATA_SEGS * PAGE_SIZE;
300 xprt->ops = &xprt_rdma_procs; 299 xprt->ops = &xprt_rdma_procs;
301 300
302 /* 301 /*
@@ -382,6 +381,9 @@ xprt_setup_rdma(struct xprt_create *args)
382 new_ep->rep_xprt = xprt; 381 new_ep->rep_xprt = xprt;
383 382
384 xprt_rdma_format_addresses(xprt); 383 xprt_rdma_format_addresses(xprt);
384 xprt->max_payload = rpcrdma_max_payload(new_xprt);
385 dprintk("RPC: %s: transport data payload maximum: %zu bytes\n",
386 __func__, xprt->max_payload);
385 387
386 if (!try_module_get(THIS_MODULE)) 388 if (!try_module_get(THIS_MODULE))
387 goto out4; 389 goto out4;
@@ -412,7 +414,7 @@ xprt_rdma_close(struct rpc_xprt *xprt)
412 if (r_xprt->rx_ep.rep_connected > 0) 414 if (r_xprt->rx_ep.rep_connected > 0)
413 xprt->reestablish_timeout = 0; 415 xprt->reestablish_timeout = 0;
414 xprt_disconnect_done(xprt); 416 xprt_disconnect_done(xprt);
415 (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); 417 rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
416} 418}
417 419
418static void 420static void
@@ -595,13 +597,14 @@ xprt_rdma_send_request(struct rpc_task *task)
595 struct rpc_xprt *xprt = rqst->rq_xprt; 597 struct rpc_xprt *xprt = rqst->rq_xprt;
596 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 598 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
597 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 599 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
598 int rc; 600 int rc = 0;
599 601
600 if (req->rl_niovs == 0) { 602 if (req->rl_niovs == 0)
601 rc = rpcrdma_marshal_req(rqst); 603 rc = rpcrdma_marshal_req(rqst);
602 if (rc < 0) 604 else if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
603 goto failed_marshal; 605 rc = rpcrdma_marshal_chunks(rqst, 0);
604 } 606 if (rc < 0)
607 goto failed_marshal;
605 608
606 if (req->rl_reply == NULL) /* e.g. reconnection */ 609 if (req->rl_reply == NULL) /* e.g. reconnection */
607 rpcrdma_recv_buffer_get(req); 610 rpcrdma_recv_buffer_get(req);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 13dbd1c389ff..61c41298b4ea 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -61,6 +61,8 @@
61# define RPCDBG_FACILITY RPCDBG_TRANS 61# define RPCDBG_FACILITY RPCDBG_TRANS
62#endif 62#endif
63 63
64static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
65
64/* 66/*
65 * internal functions 67 * internal functions
66 */ 68 */
@@ -103,17 +105,6 @@ rpcrdma_run_tasklet(unsigned long data)
103 105
104static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL); 106static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
105 107
106static inline void
107rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep)
108{
109 unsigned long flags;
110
111 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
112 list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g);
113 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
114 tasklet_schedule(&rpcrdma_tasklet_g);
115}
116
117static void 108static void
118rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) 109rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
119{ 110{
@@ -153,12 +144,7 @@ rpcrdma_sendcq_process_wc(struct ib_wc *wc)
153 if (wc->wr_id == 0ULL) 144 if (wc->wr_id == 0ULL)
154 return; 145 return;
155 if (wc->status != IB_WC_SUCCESS) 146 if (wc->status != IB_WC_SUCCESS)
156 return; 147 frmr->r.frmr.fr_state = FRMR_IS_STALE;
157
158 if (wc->opcode == IB_WC_FAST_REG_MR)
159 frmr->r.frmr.state = FRMR_IS_VALID;
160 else if (wc->opcode == IB_WC_LOCAL_INV)
161 frmr->r.frmr.state = FRMR_IS_INVALID;
162} 148}
163 149
164static int 150static int
@@ -217,7 +203,7 @@ rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
217} 203}
218 204
219static void 205static void
220rpcrdma_recvcq_process_wc(struct ib_wc *wc) 206rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
221{ 207{
222 struct rpcrdma_rep *rep = 208 struct rpcrdma_rep *rep =
223 (struct rpcrdma_rep *)(unsigned long)wc->wr_id; 209 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
@@ -248,28 +234,38 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc)
248 } 234 }
249 235
250out_schedule: 236out_schedule:
251 rpcrdma_schedule_tasklet(rep); 237 list_add_tail(&rep->rr_list, sched_list);
252} 238}
253 239
254static int 240static int
255rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) 241rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
256{ 242{
243 struct list_head sched_list;
257 struct ib_wc *wcs; 244 struct ib_wc *wcs;
258 int budget, count, rc; 245 int budget, count, rc;
246 unsigned long flags;
259 247
248 INIT_LIST_HEAD(&sched_list);
260 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE; 249 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
261 do { 250 do {
262 wcs = ep->rep_recv_wcs; 251 wcs = ep->rep_recv_wcs;
263 252
264 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs); 253 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
265 if (rc <= 0) 254 if (rc <= 0)
266 return rc; 255 goto out_schedule;
267 256
268 count = rc; 257 count = rc;
269 while (count-- > 0) 258 while (count-- > 0)
270 rpcrdma_recvcq_process_wc(wcs++); 259 rpcrdma_recvcq_process_wc(wcs++, &sched_list);
271 } while (rc == RPCRDMA_POLLSIZE && --budget); 260 } while (rc == RPCRDMA_POLLSIZE && --budget);
272 return 0; 261 rc = 0;
262
263out_schedule:
264 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
265 list_splice_tail(&sched_list, &rpcrdma_tasklets_g);
266 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
267 tasklet_schedule(&rpcrdma_tasklet_g);
268 return rc;
273} 269}
274 270
275/* 271/*
@@ -310,6 +306,13 @@ rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
310 rpcrdma_recvcq_poll(cq, ep); 306 rpcrdma_recvcq_poll(cq, ep);
311} 307}
312 308
309static void
310rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
311{
312 rpcrdma_recvcq_upcall(ep->rep_attr.recv_cq, ep);
313 rpcrdma_sendcq_upcall(ep->rep_attr.send_cq, ep);
314}
315
313#ifdef RPC_DEBUG 316#ifdef RPC_DEBUG
314static const char * const conn[] = { 317static const char * const conn[] = {
315 "address resolved", 318 "address resolved",
@@ -323,8 +326,16 @@ static const char * const conn[] = {
323 "rejected", 326 "rejected",
324 "established", 327 "established",
325 "disconnected", 328 "disconnected",
326 "device removal" 329 "device removal",
330 "multicast join",
331 "multicast error",
332 "address change",
333 "timewait exit",
327}; 334};
335
336#define CONNECTION_MSG(status) \
337 ((status) < ARRAY_SIZE(conn) ? \
338 conn[(status)] : "unrecognized connection error")
328#endif 339#endif
329 340
330static int 341static int
@@ -382,23 +393,18 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
382 case RDMA_CM_EVENT_DEVICE_REMOVAL: 393 case RDMA_CM_EVENT_DEVICE_REMOVAL:
383 connstate = -ENODEV; 394 connstate = -ENODEV;
384connected: 395connected:
385 dprintk("RPC: %s: %s: %pI4:%u (ep 0x%p event 0x%x)\n",
386 __func__,
387 (event->event <= 11) ? conn[event->event] :
388 "unknown connection error",
389 &addr->sin_addr.s_addr,
390 ntohs(addr->sin_port),
391 ep, event->event);
392 atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1); 396 atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
393 dprintk("RPC: %s: %sconnected\n", 397 dprintk("RPC: %s: %sconnected\n",
394 __func__, connstate > 0 ? "" : "dis"); 398 __func__, connstate > 0 ? "" : "dis");
395 ep->rep_connected = connstate; 399 ep->rep_connected = connstate;
396 ep->rep_func(ep); 400 ep->rep_func(ep);
397 wake_up_all(&ep->rep_connect_wait); 401 wake_up_all(&ep->rep_connect_wait);
398 break; 402 /*FALLTHROUGH*/
399 default: 403 default:
400 dprintk("RPC: %s: unexpected CM event %d\n", 404 dprintk("RPC: %s: %pI4:%u (ep 0x%p): %s\n",
401 __func__, event->event); 405 __func__, &addr->sin_addr.s_addr,
406 ntohs(addr->sin_port), ep,
407 CONNECTION_MSG(event->event));
402 break; 408 break;
403 } 409 }
404 410
@@ -558,12 +564,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
558 if (!ia->ri_id->device->alloc_fmr) { 564 if (!ia->ri_id->device->alloc_fmr) {
559 dprintk("RPC: %s: MTHCAFMR registration " 565 dprintk("RPC: %s: MTHCAFMR registration "
560 "not supported by HCA\n", __func__); 566 "not supported by HCA\n", __func__);
561#if RPCRDMA_PERSISTENT_REGISTRATION
562 memreg = RPCRDMA_ALLPHYSICAL; 567 memreg = RPCRDMA_ALLPHYSICAL;
563#else
564 rc = -ENOMEM;
565 goto out2;
566#endif
567 } 568 }
568 } 569 }
569 570
@@ -578,20 +579,16 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
578 switch (memreg) { 579 switch (memreg) {
579 case RPCRDMA_FRMR: 580 case RPCRDMA_FRMR:
580 break; 581 break;
581#if RPCRDMA_PERSISTENT_REGISTRATION
582 case RPCRDMA_ALLPHYSICAL: 582 case RPCRDMA_ALLPHYSICAL:
583 mem_priv = IB_ACCESS_LOCAL_WRITE | 583 mem_priv = IB_ACCESS_LOCAL_WRITE |
584 IB_ACCESS_REMOTE_WRITE | 584 IB_ACCESS_REMOTE_WRITE |
585 IB_ACCESS_REMOTE_READ; 585 IB_ACCESS_REMOTE_READ;
586 goto register_setup; 586 goto register_setup;
587#endif
588 case RPCRDMA_MTHCAFMR: 587 case RPCRDMA_MTHCAFMR:
589 if (ia->ri_have_dma_lkey) 588 if (ia->ri_have_dma_lkey)
590 break; 589 break;
591 mem_priv = IB_ACCESS_LOCAL_WRITE; 590 mem_priv = IB_ACCESS_LOCAL_WRITE;
592#if RPCRDMA_PERSISTENT_REGISTRATION
593 register_setup: 591 register_setup:
594#endif
595 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); 592 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
596 if (IS_ERR(ia->ri_bind_mem)) { 593 if (IS_ERR(ia->ri_bind_mem)) {
597 printk(KERN_ALERT "%s: ib_get_dma_mr for " 594 printk(KERN_ALERT "%s: ib_get_dma_mr for "
@@ -613,6 +610,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
613 /* Else will do memory reg/dereg for each chunk */ 610 /* Else will do memory reg/dereg for each chunk */
614 ia->ri_memreg_strategy = memreg; 611 ia->ri_memreg_strategy = memreg;
615 612
613 rwlock_init(&ia->ri_qplock);
616 return 0; 614 return 0;
617out2: 615out2:
618 rdma_destroy_id(ia->ri_id); 616 rdma_destroy_id(ia->ri_id);
@@ -826,10 +824,7 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
826 cancel_delayed_work_sync(&ep->rep_connect_worker); 824 cancel_delayed_work_sync(&ep->rep_connect_worker);
827 825
828 if (ia->ri_id->qp) { 826 if (ia->ri_id->qp) {
829 rc = rpcrdma_ep_disconnect(ep, ia); 827 rpcrdma_ep_disconnect(ep, ia);
830 if (rc)
831 dprintk("RPC: %s: rpcrdma_ep_disconnect"
832 " returned %i\n", __func__, rc);
833 rdma_destroy_qp(ia->ri_id); 828 rdma_destroy_qp(ia->ri_id);
834 ia->ri_id->qp = NULL; 829 ia->ri_id->qp = NULL;
835 } 830 }
@@ -859,7 +854,7 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
859int 854int
860rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) 855rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
861{ 856{
862 struct rdma_cm_id *id; 857 struct rdma_cm_id *id, *old;
863 int rc = 0; 858 int rc = 0;
864 int retry_count = 0; 859 int retry_count = 0;
865 860
@@ -867,13 +862,12 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
867 struct rpcrdma_xprt *xprt; 862 struct rpcrdma_xprt *xprt;
868retry: 863retry:
869 dprintk("RPC: %s: reconnecting...\n", __func__); 864 dprintk("RPC: %s: reconnecting...\n", __func__);
870 rc = rpcrdma_ep_disconnect(ep, ia);
871 if (rc && rc != -ENOTCONN)
872 dprintk("RPC: %s: rpcrdma_ep_disconnect"
873 " status %i\n", __func__, rc);
874 865
875 rpcrdma_clean_cq(ep->rep_attr.recv_cq); 866 rpcrdma_ep_disconnect(ep, ia);
876 rpcrdma_clean_cq(ep->rep_attr.send_cq); 867 rpcrdma_flush_cqs(ep);
868
869 if (ia->ri_memreg_strategy == RPCRDMA_FRMR)
870 rpcrdma_reset_frmrs(ia);
877 871
878 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); 872 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
879 id = rpcrdma_create_id(xprt, ia, 873 id = rpcrdma_create_id(xprt, ia,
@@ -905,9 +899,14 @@ retry:
905 rc = -ENETUNREACH; 899 rc = -ENETUNREACH;
906 goto out; 900 goto out;
907 } 901 }
908 rdma_destroy_qp(ia->ri_id); 902
909 rdma_destroy_id(ia->ri_id); 903 write_lock(&ia->ri_qplock);
904 old = ia->ri_id;
910 ia->ri_id = id; 905 ia->ri_id = id;
906 write_unlock(&ia->ri_qplock);
907
908 rdma_destroy_qp(old);
909 rdma_destroy_id(old);
911 } else { 910 } else {
912 dprintk("RPC: %s: connecting...\n", __func__); 911 dprintk("RPC: %s: connecting...\n", __func__);
913 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); 912 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
@@ -974,13 +973,12 @@ out:
974 * This call is not reentrant, and must not be made in parallel 973 * This call is not reentrant, and must not be made in parallel
975 * on the same endpoint. 974 * on the same endpoint.
976 */ 975 */
977int 976void
978rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) 977rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
979{ 978{
980 int rc; 979 int rc;
981 980
982 rpcrdma_clean_cq(ep->rep_attr.recv_cq); 981 rpcrdma_flush_cqs(ep);
983 rpcrdma_clean_cq(ep->rep_attr.send_cq);
984 rc = rdma_disconnect(ia->ri_id); 982 rc = rdma_disconnect(ia->ri_id);
985 if (!rc) { 983 if (!rc) {
986 /* returns without wait if not connected */ 984 /* returns without wait if not connected */
@@ -992,12 +990,93 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
992 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc); 990 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
993 ep->rep_connected = rc; 991 ep->rep_connected = rc;
994 } 992 }
993}
994
995static int
996rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
997{
998 int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
999 struct ib_fmr_attr fmr_attr = {
1000 .max_pages = RPCRDMA_MAX_DATA_SEGS,
1001 .max_maps = 1,
1002 .page_shift = PAGE_SHIFT
1003 };
1004 struct rpcrdma_mw *r;
1005 int i, rc;
1006
1007 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1008 dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
1009
1010 while (i--) {
1011 r = kzalloc(sizeof(*r), GFP_KERNEL);
1012 if (r == NULL)
1013 return -ENOMEM;
1014
1015 r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr);
1016 if (IS_ERR(r->r.fmr)) {
1017 rc = PTR_ERR(r->r.fmr);
1018 dprintk("RPC: %s: ib_alloc_fmr failed %i\n",
1019 __func__, rc);
1020 goto out_free;
1021 }
1022
1023 list_add(&r->mw_list, &buf->rb_mws);
1024 list_add(&r->mw_all, &buf->rb_all);
1025 }
1026 return 0;
1027
1028out_free:
1029 kfree(r);
1030 return rc;
1031}
1032
1033static int
1034rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1035{
1036 struct rpcrdma_frmr *f;
1037 struct rpcrdma_mw *r;
1038 int i, rc;
1039
1040 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1041 dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i);
1042
1043 while (i--) {
1044 r = kzalloc(sizeof(*r), GFP_KERNEL);
1045 if (r == NULL)
1046 return -ENOMEM;
1047 f = &r->r.frmr;
1048
1049 f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1050 ia->ri_max_frmr_depth);
1051 if (IS_ERR(f->fr_mr)) {
1052 rc = PTR_ERR(f->fr_mr);
1053 dprintk("RPC: %s: ib_alloc_fast_reg_mr "
1054 "failed %i\n", __func__, rc);
1055 goto out_free;
1056 }
1057
1058 f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device,
1059 ia->ri_max_frmr_depth);
1060 if (IS_ERR(f->fr_pgl)) {
1061 rc = PTR_ERR(f->fr_pgl);
1062 dprintk("RPC: %s: ib_alloc_fast_reg_page_list "
1063 "failed %i\n", __func__, rc);
1064
1065 ib_dereg_mr(f->fr_mr);
1066 goto out_free;
1067 }
1068
1069 list_add(&r->mw_list, &buf->rb_mws);
1070 list_add(&r->mw_all, &buf->rb_all);
1071 }
1072
1073 return 0;
1074
1075out_free:
1076 kfree(r);
995 return rc; 1077 return rc;
996} 1078}
997 1079
998/*
999 * Initialize buffer memory
1000 */
1001int 1080int
1002rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, 1081rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1003 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) 1082 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
@@ -1005,7 +1084,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1005 char *p; 1084 char *p;
1006 size_t len, rlen, wlen; 1085 size_t len, rlen, wlen;
1007 int i, rc; 1086 int i, rc;
1008 struct rpcrdma_mw *r;
1009 1087
1010 buf->rb_max_requests = cdata->max_requests; 1088 buf->rb_max_requests = cdata->max_requests;
1011 spin_lock_init(&buf->rb_lock); 1089 spin_lock_init(&buf->rb_lock);
@@ -1016,28 +1094,12 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1016 * 2. arrays of struct rpcrdma_req to fill in pointers 1094 * 2. arrays of struct rpcrdma_req to fill in pointers
1017 * 3. array of struct rpcrdma_rep for replies 1095 * 3. array of struct rpcrdma_rep for replies
1018 * 4. padding, if any 1096 * 4. padding, if any
1019 * 5. mw's, fmr's or frmr's, if any
1020 * Send/recv buffers in req/rep need to be registered 1097 * Send/recv buffers in req/rep need to be registered
1021 */ 1098 */
1022
1023 len = buf->rb_max_requests * 1099 len = buf->rb_max_requests *
1024 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); 1100 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
1025 len += cdata->padding; 1101 len += cdata->padding;
1026 switch (ia->ri_memreg_strategy) {
1027 case RPCRDMA_FRMR:
1028 len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
1029 sizeof(struct rpcrdma_mw);
1030 break;
1031 case RPCRDMA_MTHCAFMR:
1032 /* TBD we are perhaps overallocating here */
1033 len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
1034 sizeof(struct rpcrdma_mw);
1035 break;
1036 default:
1037 break;
1038 }
1039 1102
1040 /* allocate 1, 4 and 5 in one shot */
1041 p = kzalloc(len, GFP_KERNEL); 1103 p = kzalloc(len, GFP_KERNEL);
1042 if (p == NULL) { 1104 if (p == NULL) {
1043 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n", 1105 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
@@ -1064,51 +1126,17 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1064 p += cdata->padding; 1126 p += cdata->padding;
1065 1127
1066 INIT_LIST_HEAD(&buf->rb_mws); 1128 INIT_LIST_HEAD(&buf->rb_mws);
1067 r = (struct rpcrdma_mw *)p; 1129 INIT_LIST_HEAD(&buf->rb_all);
1068 switch (ia->ri_memreg_strategy) { 1130 switch (ia->ri_memreg_strategy) {
1069 case RPCRDMA_FRMR: 1131 case RPCRDMA_FRMR:
1070 for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) { 1132 rc = rpcrdma_init_frmrs(ia, buf);
1071 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, 1133 if (rc)
1072 ia->ri_max_frmr_depth); 1134 goto out;
1073 if (IS_ERR(r->r.frmr.fr_mr)) {
1074 rc = PTR_ERR(r->r.frmr.fr_mr);
1075 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1076 " failed %i\n", __func__, rc);
1077 goto out;
1078 }
1079 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1080 ia->ri_id->device,
1081 ia->ri_max_frmr_depth);
1082 if (IS_ERR(r->r.frmr.fr_pgl)) {
1083 rc = PTR_ERR(r->r.frmr.fr_pgl);
1084 dprintk("RPC: %s: "
1085 "ib_alloc_fast_reg_page_list "
1086 "failed %i\n", __func__, rc);
1087
1088 ib_dereg_mr(r->r.frmr.fr_mr);
1089 goto out;
1090 }
1091 list_add(&r->mw_list, &buf->rb_mws);
1092 ++r;
1093 }
1094 break; 1135 break;
1095 case RPCRDMA_MTHCAFMR: 1136 case RPCRDMA_MTHCAFMR:
1096 /* TBD we are perhaps overallocating here */ 1137 rc = rpcrdma_init_fmrs(ia, buf);
1097 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { 1138 if (rc)
1098 static struct ib_fmr_attr fa = 1139 goto out;
1099 { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT };
1100 r->r.fmr = ib_alloc_fmr(ia->ri_pd,
1101 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
1102 &fa);
1103 if (IS_ERR(r->r.fmr)) {
1104 rc = PTR_ERR(r->r.fmr);
1105 dprintk("RPC: %s: ib_alloc_fmr"
1106 " failed %i\n", __func__, rc);
1107 goto out;
1108 }
1109 list_add(&r->mw_list, &buf->rb_mws);
1110 ++r;
1111 }
1112 break; 1140 break;
1113 default: 1141 default:
1114 break; 1142 break;
@@ -1176,24 +1204,57 @@ out:
1176 return rc; 1204 return rc;
1177} 1205}
1178 1206
1179/* 1207static void
1180 * Unregister and destroy buffer memory. Need to deal with 1208rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf)
1181 * partial initialization, so it's callable from failed create. 1209{
1182 * Must be called before destroying endpoint, as registrations 1210 struct rpcrdma_mw *r;
1183 * reference it. 1211 int rc;
1184 */ 1212
1213 while (!list_empty(&buf->rb_all)) {
1214 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1215 list_del(&r->mw_all);
1216 list_del(&r->mw_list);
1217
1218 rc = ib_dealloc_fmr(r->r.fmr);
1219 if (rc)
1220 dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
1221 __func__, rc);
1222
1223 kfree(r);
1224 }
1225}
1226
1227static void
1228rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf)
1229{
1230 struct rpcrdma_mw *r;
1231 int rc;
1232
1233 while (!list_empty(&buf->rb_all)) {
1234 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1235 list_del(&r->mw_all);
1236 list_del(&r->mw_list);
1237
1238 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1239 if (rc)
1240 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1241 __func__, rc);
1242 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1243
1244 kfree(r);
1245 }
1246}
1247
1185void 1248void
1186rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) 1249rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1187{ 1250{
1188 int rc, i;
1189 struct rpcrdma_ia *ia = rdmab_to_ia(buf); 1251 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1190 struct rpcrdma_mw *r; 1252 int i;
1191 1253
1192 /* clean up in reverse order from create 1254 /* clean up in reverse order from create
1193 * 1. recv mr memory (mr free, then kfree) 1255 * 1. recv mr memory (mr free, then kfree)
1194 * 2. send mr memory (mr free, then kfree) 1256 * 2. send mr memory (mr free, then kfree)
1195 * 3. padding (if any) [moved to rpcrdma_ep_destroy] 1257 * 3. MWs
1196 * 4. arrays
1197 */ 1258 */
1198 dprintk("RPC: %s: entering\n", __func__); 1259 dprintk("RPC: %s: entering\n", __func__);
1199 1260
@@ -1212,34 +1273,217 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1212 } 1273 }
1213 } 1274 }
1214 1275
1276 switch (ia->ri_memreg_strategy) {
1277 case RPCRDMA_FRMR:
1278 rpcrdma_destroy_frmrs(buf);
1279 break;
1280 case RPCRDMA_MTHCAFMR:
1281 rpcrdma_destroy_fmrs(buf);
1282 break;
1283 default:
1284 break;
1285 }
1286
1287 kfree(buf->rb_pool);
1288}
1289
1290/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
1291 * an unusable state. Find FRMRs in this state and dereg / reg
1292 * each. FRMRs that are VALID and attached to an rpcrdma_req are
1293 * also torn down.
1294 *
1295 * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
1296 *
1297 * This is invoked only in the transport connect worker in order
1298 * to serialize with rpcrdma_register_frmr_external().
1299 */
1300static void
1301rpcrdma_reset_frmrs(struct rpcrdma_ia *ia)
1302{
1303 struct rpcrdma_xprt *r_xprt =
1304 container_of(ia, struct rpcrdma_xprt, rx_ia);
1305 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1306 struct list_head *pos;
1307 struct rpcrdma_mw *r;
1308 int rc;
1309
1310 list_for_each(pos, &buf->rb_all) {
1311 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1312
1313 if (r->r.frmr.fr_state == FRMR_IS_INVALID)
1314 continue;
1315
1316 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1317 if (rc)
1318 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1319 __func__, rc);
1320 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1321
1322 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1323 ia->ri_max_frmr_depth);
1324 if (IS_ERR(r->r.frmr.fr_mr)) {
1325 rc = PTR_ERR(r->r.frmr.fr_mr);
1326 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1327 " failed %i\n", __func__, rc);
1328 continue;
1329 }
1330 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1331 ia->ri_id->device,
1332 ia->ri_max_frmr_depth);
1333 if (IS_ERR(r->r.frmr.fr_pgl)) {
1334 rc = PTR_ERR(r->r.frmr.fr_pgl);
1335 dprintk("RPC: %s: "
1336 "ib_alloc_fast_reg_page_list "
1337 "failed %i\n", __func__, rc);
1338
1339 ib_dereg_mr(r->r.frmr.fr_mr);
1340 continue;
1341 }
1342 r->r.frmr.fr_state = FRMR_IS_INVALID;
1343 }
1344}
1345
1346/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1347 * some req segments uninitialized.
1348 */
1349static void
1350rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
1351{
1352 if (*mw) {
1353 list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
1354 *mw = NULL;
1355 }
1356}
1357
1358/* Cycle mw's back in reverse order, and "spin" them.
1359 * This delays and scrambles reuse as much as possible.
1360 */
1361static void
1362rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1363{
1364 struct rpcrdma_mr_seg *seg = req->rl_segments;
1365 struct rpcrdma_mr_seg *seg1 = seg;
1366 int i;
1367
1368 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
1369 rpcrdma_buffer_put_mr(&seg->mr_chunk.rl_mw, buf);
1370 rpcrdma_buffer_put_mr(&seg1->mr_chunk.rl_mw, buf);
1371}
1372
1373static void
1374rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1375{
1376 buf->rb_send_bufs[--buf->rb_send_index] = req;
1377 req->rl_niovs = 0;
1378 if (req->rl_reply) {
1379 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
1380 req->rl_reply->rr_func = NULL;
1381 req->rl_reply = NULL;
1382 }
1383}
1384
1385/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
1386 * Redo only the ib_post_send().
1387 */
1388static void
1389rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
1390{
1391 struct rpcrdma_xprt *r_xprt =
1392 container_of(ia, struct rpcrdma_xprt, rx_ia);
1393 struct ib_send_wr invalidate_wr, *bad_wr;
1394 int rc;
1395
1396 dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
1397
1398 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
1399 r->r.frmr.fr_state = FRMR_IS_INVALID;
1400
1401 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
1402 invalidate_wr.wr_id = (unsigned long)(void *)r;
1403 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1404 invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
1405 DECR_CQCOUNT(&r_xprt->rx_ep);
1406
1407 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1408 __func__, r, r->r.frmr.fr_mr->rkey);
1409
1410 read_lock(&ia->ri_qplock);
1411 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1412 read_unlock(&ia->ri_qplock);
1413 if (rc) {
1414 /* Force rpcrdma_buffer_get() to retry */
1415 r->r.frmr.fr_state = FRMR_IS_STALE;
1416 dprintk("RPC: %s: ib_post_send failed, %i\n",
1417 __func__, rc);
1418 }
1419}
1420
1421static void
1422rpcrdma_retry_flushed_linv(struct list_head *stale,
1423 struct rpcrdma_buffer *buf)
1424{
1425 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1426 struct list_head *pos;
1427 struct rpcrdma_mw *r;
1428 unsigned long flags;
1429
1430 list_for_each(pos, stale) {
1431 r = list_entry(pos, struct rpcrdma_mw, mw_list);
1432 rpcrdma_retry_local_inv(r, ia);
1433 }
1434
1435 spin_lock_irqsave(&buf->rb_lock, flags);
1436 list_splice_tail(stale, &buf->rb_mws);
1437 spin_unlock_irqrestore(&buf->rb_lock, flags);
1438}
1439
1440static struct rpcrdma_req *
1441rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1442 struct list_head *stale)
1443{
1444 struct rpcrdma_mw *r;
1445 int i;
1446
1447 i = RPCRDMA_MAX_SEGS - 1;
1215 while (!list_empty(&buf->rb_mws)) { 1448 while (!list_empty(&buf->rb_mws)) {
1216 r = list_entry(buf->rb_mws.next, 1449 r = list_entry(buf->rb_mws.next,
1217 struct rpcrdma_mw, mw_list); 1450 struct rpcrdma_mw, mw_list);
1218 list_del(&r->mw_list); 1451 list_del(&r->mw_list);
1219 switch (ia->ri_memreg_strategy) { 1452 if (r->r.frmr.fr_state == FRMR_IS_STALE) {
1220 case RPCRDMA_FRMR: 1453 list_add(&r->mw_list, stale);
1221 rc = ib_dereg_mr(r->r.frmr.fr_mr); 1454 continue;
1222 if (rc)
1223 dprintk("RPC: %s:"
1224 " ib_dereg_mr"
1225 " failed %i\n",
1226 __func__, rc);
1227 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1228 break;
1229 case RPCRDMA_MTHCAFMR:
1230 rc = ib_dealloc_fmr(r->r.fmr);
1231 if (rc)
1232 dprintk("RPC: %s:"
1233 " ib_dealloc_fmr"
1234 " failed %i\n",
1235 __func__, rc);
1236 break;
1237 default:
1238 break;
1239 } 1455 }
1456 req->rl_segments[i].mr_chunk.rl_mw = r;
1457 if (unlikely(i-- == 0))
1458 return req; /* Success */
1240 } 1459 }
1241 1460
1242 kfree(buf->rb_pool); 1461 /* Not enough entries on rb_mws for this req */
1462 rpcrdma_buffer_put_sendbuf(req, buf);
1463 rpcrdma_buffer_put_mrs(req, buf);
1464 return NULL;
1465}
1466
1467static struct rpcrdma_req *
1468rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1469{
1470 struct rpcrdma_mw *r;
1471 int i;
1472
1473 i = RPCRDMA_MAX_SEGS - 1;
1474 while (!list_empty(&buf->rb_mws)) {
1475 r = list_entry(buf->rb_mws.next,
1476 struct rpcrdma_mw, mw_list);
1477 list_del(&r->mw_list);
1478 req->rl_segments[i].mr_chunk.rl_mw = r;
1479 if (unlikely(i-- == 0))
1480 return req; /* Success */
1481 }
1482
1483 /* Not enough entries on rb_mws for this req */
1484 rpcrdma_buffer_put_sendbuf(req, buf);
1485 rpcrdma_buffer_put_mrs(req, buf);
1486 return NULL;
1243} 1487}
1244 1488
1245/* 1489/*
@@ -1254,10 +1498,10 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1254struct rpcrdma_req * 1498struct rpcrdma_req *
1255rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) 1499rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1256{ 1500{
1501 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
1502 struct list_head stale;
1257 struct rpcrdma_req *req; 1503 struct rpcrdma_req *req;
1258 unsigned long flags; 1504 unsigned long flags;
1259 int i;
1260 struct rpcrdma_mw *r;
1261 1505
1262 spin_lock_irqsave(&buffers->rb_lock, flags); 1506 spin_lock_irqsave(&buffers->rb_lock, flags);
1263 if (buffers->rb_send_index == buffers->rb_max_requests) { 1507 if (buffers->rb_send_index == buffers->rb_max_requests) {
@@ -1277,16 +1521,21 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1277 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL; 1521 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1278 } 1522 }
1279 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; 1523 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
1280 if (!list_empty(&buffers->rb_mws)) { 1524
1281 i = RPCRDMA_MAX_SEGS - 1; 1525 INIT_LIST_HEAD(&stale);
1282 do { 1526 switch (ia->ri_memreg_strategy) {
1283 r = list_entry(buffers->rb_mws.next, 1527 case RPCRDMA_FRMR:
1284 struct rpcrdma_mw, mw_list); 1528 req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
1285 list_del(&r->mw_list); 1529 break;
1286 req->rl_segments[i].mr_chunk.rl_mw = r; 1530 case RPCRDMA_MTHCAFMR:
1287 } while (--i >= 0); 1531 req = rpcrdma_buffer_get_fmrs(req, buffers);
1532 break;
1533 default:
1534 break;
1288 } 1535 }
1289 spin_unlock_irqrestore(&buffers->rb_lock, flags); 1536 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1537 if (!list_empty(&stale))
1538 rpcrdma_retry_flushed_linv(&stale, buffers);
1290 return req; 1539 return req;
1291} 1540}
1292 1541
@@ -1299,34 +1548,14 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
1299{ 1548{
1300 struct rpcrdma_buffer *buffers = req->rl_buffer; 1549 struct rpcrdma_buffer *buffers = req->rl_buffer;
1301 struct rpcrdma_ia *ia = rdmab_to_ia(buffers); 1550 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
1302 int i;
1303 unsigned long flags; 1551 unsigned long flags;
1304 1552
1305 spin_lock_irqsave(&buffers->rb_lock, flags); 1553 spin_lock_irqsave(&buffers->rb_lock, flags);
1306 buffers->rb_send_bufs[--buffers->rb_send_index] = req; 1554 rpcrdma_buffer_put_sendbuf(req, buffers);
1307 req->rl_niovs = 0;
1308 if (req->rl_reply) {
1309 buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply;
1310 req->rl_reply->rr_func = NULL;
1311 req->rl_reply = NULL;
1312 }
1313 switch (ia->ri_memreg_strategy) { 1555 switch (ia->ri_memreg_strategy) {
1314 case RPCRDMA_FRMR: 1556 case RPCRDMA_FRMR:
1315 case RPCRDMA_MTHCAFMR: 1557 case RPCRDMA_MTHCAFMR:
1316 /* 1558 rpcrdma_buffer_put_mrs(req, buffers);
1317 * Cycle mw's back in reverse order, and "spin" them.
1318 * This delays and scrambles reuse as much as possible.
1319 */
1320 i = 1;
1321 do {
1322 struct rpcrdma_mw **mw;
1323 mw = &req->rl_segments[i].mr_chunk.rl_mw;
1324 list_add_tail(&(*mw)->mw_list, &buffers->rb_mws);
1325 *mw = NULL;
1326 } while (++i < RPCRDMA_MAX_SEGS);
1327 list_add_tail(&req->rl_segments[0].mr_chunk.rl_mw->mw_list,
1328 &buffers->rb_mws);
1329 req->rl_segments[0].mr_chunk.rl_mw = NULL;
1330 break; 1559 break;
1331 default: 1560 default:
1332 break; 1561 break;
@@ -1388,6 +1617,9 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1388 */ 1617 */
1389 iov->addr = ib_dma_map_single(ia->ri_id->device, 1618 iov->addr = ib_dma_map_single(ia->ri_id->device,
1390 va, len, DMA_BIDIRECTIONAL); 1619 va, len, DMA_BIDIRECTIONAL);
1620 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
1621 return -ENOMEM;
1622
1391 iov->length = len; 1623 iov->length = len;
1392 1624
1393 if (ia->ri_have_dma_lkey) { 1625 if (ia->ri_have_dma_lkey) {
@@ -1483,8 +1715,10 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1483 struct rpcrdma_xprt *r_xprt) 1715 struct rpcrdma_xprt *r_xprt)
1484{ 1716{
1485 struct rpcrdma_mr_seg *seg1 = seg; 1717 struct rpcrdma_mr_seg *seg1 = seg;
1486 struct ib_send_wr invalidate_wr, frmr_wr, *bad_wr, *post_wr; 1718 struct rpcrdma_mw *mw = seg1->mr_chunk.rl_mw;
1487 1719 struct rpcrdma_frmr *frmr = &mw->r.frmr;
1720 struct ib_mr *mr = frmr->fr_mr;
1721 struct ib_send_wr fastreg_wr, *bad_wr;
1488 u8 key; 1722 u8 key;
1489 int len, pageoff; 1723 int len, pageoff;
1490 int i, rc; 1724 int i, rc;
@@ -1502,8 +1736,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1502 rpcrdma_map_one(ia, seg, writing); 1736 rpcrdma_map_one(ia, seg, writing);
1503 pa = seg->mr_dma; 1737 pa = seg->mr_dma;
1504 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { 1738 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
1505 seg1->mr_chunk.rl_mw->r.frmr.fr_pgl-> 1739 frmr->fr_pgl->page_list[page_no++] = pa;
1506 page_list[page_no++] = pa;
1507 pa += PAGE_SIZE; 1740 pa += PAGE_SIZE;
1508 } 1741 }
1509 len += seg->mr_len; 1742 len += seg->mr_len;
@@ -1515,65 +1748,51 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1515 break; 1748 break;
1516 } 1749 }
1517 dprintk("RPC: %s: Using frmr %p to map %d segments\n", 1750 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
1518 __func__, seg1->mr_chunk.rl_mw, i); 1751 __func__, mw, i);
1519 1752
1520 if (unlikely(seg1->mr_chunk.rl_mw->r.frmr.state == FRMR_IS_VALID)) { 1753 frmr->fr_state = FRMR_IS_VALID;
1521 dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n", 1754
1522 __func__, 1755 memset(&fastreg_wr, 0, sizeof(fastreg_wr));
1523 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey); 1756 fastreg_wr.wr_id = (unsigned long)(void *)mw;
1524 /* Invalidate before using. */ 1757 fastreg_wr.opcode = IB_WR_FAST_REG_MR;
1525 memset(&invalidate_wr, 0, sizeof invalidate_wr); 1758 fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
1526 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; 1759 fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
1527 invalidate_wr.next = &frmr_wr; 1760 fastreg_wr.wr.fast_reg.page_list_len = page_no;
1528 invalidate_wr.opcode = IB_WR_LOCAL_INV; 1761 fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1529 invalidate_wr.send_flags = IB_SEND_SIGNALED; 1762 fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
1530 invalidate_wr.ex.invalidate_rkey = 1763 if (fastreg_wr.wr.fast_reg.length < len) {
1531 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; 1764 rc = -EIO;
1532 DECR_CQCOUNT(&r_xprt->rx_ep); 1765 goto out_err;
1533 post_wr = &invalidate_wr;
1534 } else
1535 post_wr = &frmr_wr;
1536
1537 /* Prepare FRMR WR */
1538 memset(&frmr_wr, 0, sizeof frmr_wr);
1539 frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1540 frmr_wr.opcode = IB_WR_FAST_REG_MR;
1541 frmr_wr.send_flags = IB_SEND_SIGNALED;
1542 frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
1543 frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
1544 frmr_wr.wr.fast_reg.page_list_len = page_no;
1545 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1546 frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
1547 if (frmr_wr.wr.fast_reg.length < len) {
1548 while (seg1->mr_nsegs--)
1549 rpcrdma_unmap_one(ia, seg++);
1550 return -EIO;
1551 } 1766 }
1552 1767
1553 /* Bump the key */ 1768 /* Bump the key */
1554 key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); 1769 key = (u8)(mr->rkey & 0x000000FF);
1555 ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); 1770 ib_update_fast_reg_key(mr, ++key);
1556 1771
1557 frmr_wr.wr.fast_reg.access_flags = (writing ? 1772 fastreg_wr.wr.fast_reg.access_flags = (writing ?
1558 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 1773 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1559 IB_ACCESS_REMOTE_READ); 1774 IB_ACCESS_REMOTE_READ);
1560 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; 1775 fastreg_wr.wr.fast_reg.rkey = mr->rkey;
1561 DECR_CQCOUNT(&r_xprt->rx_ep); 1776 DECR_CQCOUNT(&r_xprt->rx_ep);
1562 1777
1563 rc = ib_post_send(ia->ri_id->qp, post_wr, &bad_wr); 1778 rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
1564
1565 if (rc) { 1779 if (rc) {
1566 dprintk("RPC: %s: failed ib_post_send for register," 1780 dprintk("RPC: %s: failed ib_post_send for register,"
1567 " status %i\n", __func__, rc); 1781 " status %i\n", __func__, rc);
1568 while (i--) 1782 ib_update_fast_reg_key(mr, --key);
1569 rpcrdma_unmap_one(ia, --seg); 1783 goto out_err;
1570 } else { 1784 } else {
1571 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; 1785 seg1->mr_rkey = mr->rkey;
1572 seg1->mr_base = seg1->mr_dma + pageoff; 1786 seg1->mr_base = seg1->mr_dma + pageoff;
1573 seg1->mr_nsegs = i; 1787 seg1->mr_nsegs = i;
1574 seg1->mr_len = len; 1788 seg1->mr_len = len;
1575 } 1789 }
1576 *nsegs = i; 1790 *nsegs = i;
1791 return 0;
1792out_err:
1793 frmr->fr_state = FRMR_IS_INVALID;
1794 while (i--)
1795 rpcrdma_unmap_one(ia, --seg);
1577 return rc; 1796 return rc;
1578} 1797}
1579 1798
@@ -1585,20 +1804,25 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1585 struct ib_send_wr invalidate_wr, *bad_wr; 1804 struct ib_send_wr invalidate_wr, *bad_wr;
1586 int rc; 1805 int rc;
1587 1806
1588 while (seg1->mr_nsegs--) 1807 seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
1589 rpcrdma_unmap_one(ia, seg++);
1590 1808
1591 memset(&invalidate_wr, 0, sizeof invalidate_wr); 1809 memset(&invalidate_wr, 0, sizeof invalidate_wr);
1592 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; 1810 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1593 invalidate_wr.opcode = IB_WR_LOCAL_INV; 1811 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1594 invalidate_wr.send_flags = IB_SEND_SIGNALED;
1595 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; 1812 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1596 DECR_CQCOUNT(&r_xprt->rx_ep); 1813 DECR_CQCOUNT(&r_xprt->rx_ep);
1597 1814
1815 read_lock(&ia->ri_qplock);
1816 while (seg1->mr_nsegs--)
1817 rpcrdma_unmap_one(ia, seg++);
1598 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); 1818 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1599 if (rc) 1819 read_unlock(&ia->ri_qplock);
1820 if (rc) {
1821 /* Force rpcrdma_buffer_get() to retry */
1822 seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
1600 dprintk("RPC: %s: failed ib_post_send for invalidate," 1823 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1601 " status %i\n", __func__, rc); 1824 " status %i\n", __func__, rc);
1825 }
1602 return rc; 1826 return rc;
1603} 1827}
1604 1828
@@ -1656,8 +1880,10 @@ rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
1656 1880
1657 list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l); 1881 list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
1658 rc = ib_unmap_fmr(&l); 1882 rc = ib_unmap_fmr(&l);
1883 read_lock(&ia->ri_qplock);
1659 while (seg1->mr_nsegs--) 1884 while (seg1->mr_nsegs--)
1660 rpcrdma_unmap_one(ia, seg++); 1885 rpcrdma_unmap_one(ia, seg++);
1886 read_unlock(&ia->ri_qplock);
1661 if (rc) 1887 if (rc)
1662 dprintk("RPC: %s: failed ib_unmap_fmr," 1888 dprintk("RPC: %s: failed ib_unmap_fmr,"
1663 " status %i\n", __func__, rc); 1889 " status %i\n", __func__, rc);
@@ -1673,7 +1899,6 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
1673 1899
1674 switch (ia->ri_memreg_strategy) { 1900 switch (ia->ri_memreg_strategy) {
1675 1901
1676#if RPCRDMA_PERSISTENT_REGISTRATION
1677 case RPCRDMA_ALLPHYSICAL: 1902 case RPCRDMA_ALLPHYSICAL:
1678 rpcrdma_map_one(ia, seg, writing); 1903 rpcrdma_map_one(ia, seg, writing);
1679 seg->mr_rkey = ia->ri_bind_mem->rkey; 1904 seg->mr_rkey = ia->ri_bind_mem->rkey;
@@ -1681,7 +1906,6 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
1681 seg->mr_nsegs = 1; 1906 seg->mr_nsegs = 1;
1682 nsegs = 1; 1907 nsegs = 1;
1683 break; 1908 break;
1684#endif
1685 1909
1686 /* Registration using frmr registration */ 1910 /* Registration using frmr registration */
1687 case RPCRDMA_FRMR: 1911 case RPCRDMA_FRMR:
@@ -1711,11 +1935,11 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
1711 1935
1712 switch (ia->ri_memreg_strategy) { 1936 switch (ia->ri_memreg_strategy) {
1713 1937
1714#if RPCRDMA_PERSISTENT_REGISTRATION
1715 case RPCRDMA_ALLPHYSICAL: 1938 case RPCRDMA_ALLPHYSICAL:
1939 read_lock(&ia->ri_qplock);
1716 rpcrdma_unmap_one(ia, seg); 1940 rpcrdma_unmap_one(ia, seg);
1941 read_unlock(&ia->ri_qplock);
1717 break; 1942 break;
1718#endif
1719 1943
1720 case RPCRDMA_FRMR: 1944 case RPCRDMA_FRMR:
1721 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt); 1945 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
@@ -1809,3 +2033,44 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1809 rc); 2033 rc);
1810 return rc; 2034 return rc;
1811} 2035}
2036
2037/* Physical mapping means one Read/Write list entry per-page.
2038 * All list entries must fit within an inline buffer
2039 *
2040 * NB: The server must return a Write list for NFS READ,
2041 * which has the same constraint. Factor in the inline
2042 * rsize as well.
2043 */
2044static size_t
2045rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt)
2046{
2047 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
2048 unsigned int inline_size, pages;
2049
2050 inline_size = min_t(unsigned int,
2051 cdata->inline_wsize, cdata->inline_rsize);
2052 inline_size -= RPCRDMA_HDRLEN_MIN;
2053 pages = inline_size / sizeof(struct rpcrdma_segment);
2054 return pages << PAGE_SHIFT;
2055}
2056
2057static size_t
2058rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt)
2059{
2060 return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
2061}
2062
2063size_t
2064rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt)
2065{
2066 size_t result;
2067
2068 switch (r_xprt->rx_ia.ri_memreg_strategy) {
2069 case RPCRDMA_ALLPHYSICAL:
2070 result = rpcrdma_physical_max_payload(r_xprt);
2071 break;
2072 default:
2073 result = rpcrdma_mr_max_payload(r_xprt);
2074 }
2075 return result;
2076}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 89e7cd479705..c419498b8f46 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -59,6 +59,7 @@
59 * Interface Adapter -- one per transport instance 59 * Interface Adapter -- one per transport instance
60 */ 60 */
61struct rpcrdma_ia { 61struct rpcrdma_ia {
62 rwlock_t ri_qplock;
62 struct rdma_cm_id *ri_id; 63 struct rdma_cm_id *ri_id;
63 struct ib_pd *ri_pd; 64 struct ib_pd *ri_pd;
64 struct ib_mr *ri_bind_mem; 65 struct ib_mr *ri_bind_mem;
@@ -98,6 +99,14 @@ struct rpcrdma_ep {
98#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) 99#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
99#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) 100#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
100 101
102enum rpcrdma_chunktype {
103 rpcrdma_noch = 0,
104 rpcrdma_readch,
105 rpcrdma_areadch,
106 rpcrdma_writech,
107 rpcrdma_replych
108};
109
101/* 110/*
102 * struct rpcrdma_rep -- this structure encapsulates state required to recv 111 * struct rpcrdma_rep -- this structure encapsulates state required to recv
103 * and complete a reply, asychronously. It needs several pieces of 112 * and complete a reply, asychronously. It needs several pieces of
@@ -137,6 +146,40 @@ struct rpcrdma_rep {
137}; 146};
138 147
139/* 148/*
149 * struct rpcrdma_mw - external memory region metadata
150 *
151 * An external memory region is any buffer or page that is registered
152 * on the fly (ie, not pre-registered).
153 *
154 * Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During
155 * call_allocate, rpcrdma_buffer_get() assigns one to each segment in
156 * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep
157 * track of registration metadata while each RPC is pending.
158 * rpcrdma_deregister_external() uses this metadata to unmap and
159 * release these resources when an RPC is complete.
160 */
161enum rpcrdma_frmr_state {
162 FRMR_IS_INVALID, /* ready to be used */
163 FRMR_IS_VALID, /* in use */
164 FRMR_IS_STALE, /* failed completion */
165};
166
167struct rpcrdma_frmr {
168 struct ib_fast_reg_page_list *fr_pgl;
169 struct ib_mr *fr_mr;
170 enum rpcrdma_frmr_state fr_state;
171};
172
173struct rpcrdma_mw {
174 union {
175 struct ib_fmr *fmr;
176 struct rpcrdma_frmr frmr;
177 } r;
178 struct list_head mw_list;
179 struct list_head mw_all;
180};
181
182/*
140 * struct rpcrdma_req -- structure central to the request/reply sequence. 183 * struct rpcrdma_req -- structure central to the request/reply sequence.
141 * 184 *
142 * N of these are associated with a transport instance, and stored in 185 * N of these are associated with a transport instance, and stored in
@@ -163,17 +206,7 @@ struct rpcrdma_rep {
163struct rpcrdma_mr_seg { /* chunk descriptors */ 206struct rpcrdma_mr_seg { /* chunk descriptors */
164 union { /* chunk memory handles */ 207 union { /* chunk memory handles */
165 struct ib_mr *rl_mr; /* if registered directly */ 208 struct ib_mr *rl_mr; /* if registered directly */
166 struct rpcrdma_mw { /* if registered from region */ 209 struct rpcrdma_mw *rl_mw; /* if registered from region */
167 union {
168 struct ib_fmr *fmr;
169 struct {
170 struct ib_fast_reg_page_list *fr_pgl;
171 struct ib_mr *fr_mr;
172 enum { FRMR_IS_INVALID, FRMR_IS_VALID } state;
173 } frmr;
174 } r;
175 struct list_head mw_list;
176 } *rl_mw;
177 } mr_chunk; 210 } mr_chunk;
178 u64 mr_base; /* registration result */ 211 u64 mr_base; /* registration result */
179 u32 mr_rkey; /* registration result */ 212 u32 mr_rkey; /* registration result */
@@ -191,6 +224,7 @@ struct rpcrdma_req {
191 unsigned int rl_niovs; /* 0, 2 or 4 */ 224 unsigned int rl_niovs; /* 0, 2 or 4 */
192 unsigned int rl_nchunks; /* non-zero if chunks */ 225 unsigned int rl_nchunks; /* non-zero if chunks */
193 unsigned int rl_connect_cookie; /* retry detection */ 226 unsigned int rl_connect_cookie; /* retry detection */
227 enum rpcrdma_chunktype rl_rtype, rl_wtype;
194 struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ 228 struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
195 struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ 229 struct rpcrdma_rep *rl_reply;/* holder for reply buffer */
196 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ 230 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */
@@ -214,6 +248,7 @@ struct rpcrdma_buffer {
214 atomic_t rb_credits; /* most recent server credits */ 248 atomic_t rb_credits; /* most recent server credits */
215 int rb_max_requests;/* client max requests */ 249 int rb_max_requests;/* client max requests */
216 struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ 250 struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */
251 struct list_head rb_all;
217 int rb_send_index; 252 int rb_send_index;
218 struct rpcrdma_req **rb_send_bufs; 253 struct rpcrdma_req **rb_send_bufs;
219 int rb_recv_index; 254 int rb_recv_index;
@@ -306,7 +341,7 @@ int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
306 struct rpcrdma_create_data_internal *); 341 struct rpcrdma_create_data_internal *);
307void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); 342void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
308int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); 343int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
309int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); 344void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
310 345
311int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, 346int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
312 struct rpcrdma_req *); 347 struct rpcrdma_req *);
@@ -346,7 +381,9 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *);
346/* 381/*
347 * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c 382 * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
348 */ 383 */
384ssize_t rpcrdma_marshal_chunks(struct rpc_rqst *, ssize_t);
349int rpcrdma_marshal_req(struct rpc_rqst *); 385int rpcrdma_marshal_req(struct rpc_rqst *);
386size_t rpcrdma_max_payload(struct rpcrdma_xprt *);
350 387
351/* Temporary NFS request map cache. Created in svc_rdma.c */ 388/* Temporary NFS request map cache. Created in svc_rdma.c */
352extern struct kmem_cache *svc_rdma_map_cachep; 389extern struct kmem_cache *svc_rdma_map_cachep;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index be8bbd5d65ec..43cd89eacfab 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -594,6 +594,7 @@ static int xs_local_send_request(struct rpc_task *task)
594 } 594 }
595 595
596 switch (status) { 596 switch (status) {
597 case -ENOBUFS:
597 case -EAGAIN: 598 case -EAGAIN:
598 status = xs_nospace(task); 599 status = xs_nospace(task);
599 break; 600 break;
@@ -661,6 +662,7 @@ static int xs_udp_send_request(struct rpc_task *task)
661 dprintk("RPC: sendmsg returned unrecognized error %d\n", 662 dprintk("RPC: sendmsg returned unrecognized error %d\n",
662 -status); 663 -status);
663 case -ENETUNREACH: 664 case -ENETUNREACH:
665 case -ENOBUFS:
664 case -EPIPE: 666 case -EPIPE:
665 case -ECONNREFUSED: 667 case -ECONNREFUSED:
666 /* When the server has died, an ICMP port unreachable message 668 /* When the server has died, an ICMP port unreachable message
@@ -758,6 +760,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
758 status = -ENOTCONN; 760 status = -ENOTCONN;
759 /* Should we call xs_close() here? */ 761 /* Should we call xs_close() here? */
760 break; 762 break;
763 case -ENOBUFS:
761 case -EAGAIN: 764 case -EAGAIN:
762 status = xs_nospace(task); 765 status = xs_nospace(task);
763 break; 766 break;
@@ -1946,6 +1949,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
1946 dprintk("RPC: xprt %p connected to %s\n", 1949 dprintk("RPC: xprt %p connected to %s\n",
1947 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); 1950 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
1948 xprt_set_connected(xprt); 1951 xprt_set_connected(xprt);
1952 case -ENOBUFS:
1949 break; 1953 break;
1950 case -ENOENT: 1954 case -ENOENT:
1951 dprintk("RPC: xprt %p: socket %s does not exist\n", 1955 dprintk("RPC: xprt %p: socket %s does not exist\n",
@@ -2281,6 +2285,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
2281 case -ECONNREFUSED: 2285 case -ECONNREFUSED:
2282 case -ECONNRESET: 2286 case -ECONNRESET:
2283 case -ENETUNREACH: 2287 case -ENETUNREACH:
2288 case -ENOBUFS:
2284 /* retry with existing socket, after a delay */ 2289 /* retry with existing socket, after a delay */
2285 goto out; 2290 goto out;
2286 } 2291 }
@@ -3054,12 +3059,12 @@ static int param_set_uint_minmax(const char *val,
3054 const struct kernel_param *kp, 3059 const struct kernel_param *kp,
3055 unsigned int min, unsigned int max) 3060 unsigned int min, unsigned int max)
3056{ 3061{
3057 unsigned long num; 3062 unsigned int num;
3058 int ret; 3063 int ret;
3059 3064
3060 if (!val) 3065 if (!val)
3061 return -EINVAL; 3066 return -EINVAL;
3062 ret = strict_strtoul(val, 0, &num); 3067 ret = kstrtouint(val, 0, &num);
3063 if (ret == -EINVAL || num < min || num > max) 3068 if (ret == -EINVAL || num < min || num > max)
3064 return -EINVAL; 3069 return -EINVAL;
3065 *((unsigned int *)kp->arg) = num; 3070 *((unsigned int *)kp->arg) = num;