aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-03-01 19:10:30 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-03-01 19:10:30 -0500
commit8f03cf50bc9443e92d6e54ac4d599357d6cb7cbb (patch)
tree0c38aab3a4c24d06fb05376b651157627bc1669d /net
parent25c4e6c3f0c14d1575aa488ff4ca47e045ae51a0 (diff)
parented92d8c137b7794c2c2aa14479298b9885967607 (diff)
Merge tag 'nfs-for-4.11-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
Pull NFS client updates from Anna Schumaker: "Highlights include: Stable bugfixes: - NFSv4: Fix memory and state leak in _nfs4_open_and_get_state - xprtrdma: Fix Read chunk padding - xprtrdma: Per-connection pad optimization - xprtrdma: Disable pad optimization by default - xprtrdma: Reduce required number of send SGEs - nlm: Ensure callback code also checks that the files match - pNFS/flexfiles: If the layout is invalid, it must be updated before retrying - NFSv4: Fix reboot recovery in copy offload - Revert "NFSv4.1: Handle NFS4ERR_BADSESSION/NFS4ERR_DEADSESSION replies to OP_SEQUENCE" - NFSv4: fix getacl head length estimation - NFSv4: fix getacl ERANGE for sum ACL buffer sizes Features: - Add and use dprintk_cont macros - Various cleanups to NFS v4.x to reduce code duplication and complexity - Remove unused cr_magic related code - Improvements to sunrpc "read from buffer" code - Clean up sunrpc timeout code and allow changing TCP timeout parameters - Remove duplicate mw_list management code in xprtrdma - Add generic functions for encoding and decoding xdr streams Bugfixes: - Clean up nfs_show_mountd_netid - Make layoutreturn_ops static and use NULL instead of 0 to fix sparse warnings - Properly handle -ERESTARTSYS in nfs_rename() - Check if register_shrinker() failed during rpcauth_init() - Properly clean up procfs/pipefs entries - Various NFS over RDMA related fixes - Silence unititialized variable warning in sunrpc" * tag 'nfs-for-4.11-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (64 commits) NFSv4: fix getacl ERANGE for some ACL buffer sizes NFSv4: fix getacl head length estimation Revert "NFSv4.1: Handle NFS4ERR_BADSESSION/NFS4ERR_DEADSESSION replies to OP_SEQUENCE" NFSv4: Fix reboot recovery in copy offload pNFS/flexfiles: If the layout is invalid, it must be updated before retrying NFSv4: Clean up owner/group attribute decode SUNRPC: Add a helper function xdr_stream_decode_string_dup() NFSv4: Remove bogus "struct nfs_client" argument from decode_ace() NFSv4: Fix the underestimation of delegation XDR space reservation NFSv4: Replace callback string decode function with a generic NFSv4: Replace the open coded decode_opaque_inline() with the new generic NFSv4: Replace ad-hoc xdr encode/decode helpers with xdr_stream_* generics SUNRPC: Add generic helpers for xdr_stream encode/decode sunrpc: silence uninitialized variable warning nlm: Ensure callback code also checks that the files match sunrpc: Allow xprt->ops->timer method to sleep xprtrdma: Refactor management of mw_list field xprtrdma: Handle stale connection rejection xprtrdma: Properly recover FRWRs with in-flight FASTREG WRs xprtrdma: Shrink send SGEs array ...
Diffstat (limited to 'net')
-rw-r--r--net/sunrpc/auth.c15
-rw-r--r--net/sunrpc/auth_null.c3
-rw-r--r--net/sunrpc/auth_unix.c18
-rw-r--r--net/sunrpc/cache.c68
-rw-r--r--net/sunrpc/clnt.c51
-rw-r--r--net/sunrpc/debugfs.c35
-rw-r--r--net/sunrpc/svcauth_unix.c4
-rw-r--r--net/sunrpc/xdr.c34
-rw-r--r--net/sunrpc/xprt.c2
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c5
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c11
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c82
-rw-r--r--net/sunrpc/xprtrdma/transport.c6
-rw-r--r--net/sunrpc/xprtrdma/verbs.c96
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h30
-rw-r--r--net/sunrpc/xprtsock.c94
16 files changed, 304 insertions, 250 deletions
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 2bff63a73cf8..a1ee933e3029 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -464,8 +464,10 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
464 * Note that the cred_unused list must be time-ordered. 464 * Note that the cred_unused list must be time-ordered.
465 */ 465 */
466 if (time_in_range(cred->cr_expire, expired, jiffies) && 466 if (time_in_range(cred->cr_expire, expired, jiffies) &&
467 test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) 467 test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
468 freed = SHRINK_STOP;
468 break; 469 break;
470 }
469 471
470 list_del_init(&cred->cr_lru); 472 list_del_init(&cred->cr_lru);
471 number_cred_unused--; 473 number_cred_unused--;
@@ -520,7 +522,7 @@ static unsigned long
520rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) 522rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
521 523
522{ 524{
523 return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; 525 return number_cred_unused * sysctl_vfs_cache_pressure / 100;
524} 526}
525 527
526static void 528static void
@@ -646,9 +648,6 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
646 cred->cr_auth = auth; 648 cred->cr_auth = auth;
647 cred->cr_ops = ops; 649 cred->cr_ops = ops;
648 cred->cr_expire = jiffies; 650 cred->cr_expire = jiffies;
649#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
650 cred->cr_magic = RPCAUTH_CRED_MAGIC;
651#endif
652 cred->cr_uid = acred->uid; 651 cred->cr_uid = acred->uid;
653} 652}
654EXPORT_SYMBOL_GPL(rpcauth_init_cred); 653EXPORT_SYMBOL_GPL(rpcauth_init_cred);
@@ -876,8 +875,12 @@ int __init rpcauth_init_module(void)
876 err = rpc_init_generic_auth(); 875 err = rpc_init_generic_auth();
877 if (err < 0) 876 if (err < 0)
878 goto out2; 877 goto out2;
879 register_shrinker(&rpc_cred_shrinker); 878 err = register_shrinker(&rpc_cred_shrinker);
879 if (err < 0)
880 goto out3;
880 return 0; 881 return 0;
882out3:
883 rpc_destroy_generic_auth();
881out2: 884out2:
882 rpc_destroy_authunix(); 885 rpc_destroy_authunix();
883out1: 886out1:
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 4d17376b2acb..5f3d527dff65 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -139,7 +139,4 @@ struct rpc_cred null_cred = {
139 .cr_ops = &null_credops, 139 .cr_ops = &null_credops,
140 .cr_count = ATOMIC_INIT(1), 140 .cr_count = ATOMIC_INIT(1),
141 .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE, 141 .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE,
142#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
143 .cr_magic = RPCAUTH_CRED_MAGIC,
144#endif
145}; 142};
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 306fc0f54596..82337e1ec9cd 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -14,12 +14,10 @@
14#include <linux/sunrpc/auth.h> 14#include <linux/sunrpc/auth.h>
15#include <linux/user_namespace.h> 15#include <linux/user_namespace.h>
16 16
17#define NFS_NGROUPS 16
18
19struct unx_cred { 17struct unx_cred {
20 struct rpc_cred uc_base; 18 struct rpc_cred uc_base;
21 kgid_t uc_gid; 19 kgid_t uc_gid;
22 kgid_t uc_gids[NFS_NGROUPS]; 20 kgid_t uc_gids[UNX_NGROUPS];
23}; 21};
24#define uc_uid uc_base.cr_uid 22#define uc_uid uc_base.cr_uid
25 23
@@ -82,13 +80,13 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t
82 80
83 if (acred->group_info != NULL) 81 if (acred->group_info != NULL)
84 groups = acred->group_info->ngroups; 82 groups = acred->group_info->ngroups;
85 if (groups > NFS_NGROUPS) 83 if (groups > UNX_NGROUPS)
86 groups = NFS_NGROUPS; 84 groups = UNX_NGROUPS;
87 85
88 cred->uc_gid = acred->gid; 86 cred->uc_gid = acred->gid;
89 for (i = 0; i < groups; i++) 87 for (i = 0; i < groups; i++)
90 cred->uc_gids[i] = acred->group_info->gid[i]; 88 cred->uc_gids[i] = acred->group_info->gid[i];
91 if (i < NFS_NGROUPS) 89 if (i < UNX_NGROUPS)
92 cred->uc_gids[i] = INVALID_GID; 90 cred->uc_gids[i] = INVALID_GID;
93 91
94 return &cred->uc_base; 92 return &cred->uc_base;
@@ -132,12 +130,12 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
132 130
133 if (acred->group_info != NULL) 131 if (acred->group_info != NULL)
134 groups = acred->group_info->ngroups; 132 groups = acred->group_info->ngroups;
135 if (groups > NFS_NGROUPS) 133 if (groups > UNX_NGROUPS)
136 groups = NFS_NGROUPS; 134 groups = UNX_NGROUPS;
137 for (i = 0; i < groups ; i++) 135 for (i = 0; i < groups ; i++)
138 if (!gid_eq(cred->uc_gids[i], acred->group_info->gid[i])) 136 if (!gid_eq(cred->uc_gids[i], acred->group_info->gid[i]))
139 return 0; 137 return 0;
140 if (groups < NFS_NGROUPS && gid_valid(cred->uc_gids[groups])) 138 if (groups < UNX_NGROUPS && gid_valid(cred->uc_gids[groups]))
141 return 0; 139 return 0;
142 return 1; 140 return 1;
143} 141}
@@ -166,7 +164,7 @@ unx_marshal(struct rpc_task *task, __be32 *p)
166 *p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid)); 164 *p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid));
167 *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid)); 165 *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid));
168 hold = p++; 166 hold = p++;
169 for (i = 0; i < 16 && gid_valid(cred->uc_gids[i]); i++) 167 for (i = 0; i < UNX_NGROUPS && gid_valid(cred->uc_gids[i]); i++)
170 *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i])); 168 *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i]));
171 *hold = htonl(p - hold - 1); /* gid array length */ 169 *hold = htonl(p - hold - 1); /* gid array length */
172 *base = htonl((p - base - 1) << 2); /* cred length */ 170 *base = htonl((p - base - 1) << 2); /* cred length */
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index d8639da06d9c..79d55d949d9a 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -728,7 +728,7 @@ void cache_clean_deferred(void *owner)
728/* 728/*
729 * communicate with user-space 729 * communicate with user-space
730 * 730 *
731 * We have a magic /proc file - /proc/sunrpc/<cachename>/channel. 731 * We have a magic /proc file - /proc/net/rpc/<cachename>/channel.
732 * On read, you get a full request, or block. 732 * On read, you get a full request, or block.
733 * On write, an update request is processed. 733 * On write, an update request is processed.
734 * Poll works if anything to read, and always allows write. 734 * Poll works if anything to read, and always allows write.
@@ -1283,7 +1283,7 @@ EXPORT_SYMBOL_GPL(qword_get);
1283 1283
1284 1284
1285/* 1285/*
1286 * support /proc/sunrpc/cache/$CACHENAME/content 1286 * support /proc/net/rpc/$CACHENAME/content
1287 * as a seqfile. 1287 * as a seqfile.
1288 * We call ->cache_show passing NULL for the item to 1288 * We call ->cache_show passing NULL for the item to
1289 * get a header, then pass each real item in the cache 1289 * get a header, then pass each real item in the cache
@@ -1438,20 +1438,11 @@ static ssize_t read_flush(struct file *file, char __user *buf,
1438 struct cache_detail *cd) 1438 struct cache_detail *cd)
1439{ 1439{
1440 char tbuf[22]; 1440 char tbuf[22];
1441 unsigned long p = *ppos;
1442 size_t len; 1441 size_t len;
1443 1442
1444 snprintf(tbuf, sizeof(tbuf), "%lu\n", convert_to_wallclock(cd->flush_time)); 1443 len = snprintf(tbuf, sizeof(tbuf), "%lu\n",
1445 len = strlen(tbuf); 1444 convert_to_wallclock(cd->flush_time));
1446 if (p >= len) 1445 return simple_read_from_buffer(buf, count, ppos, tbuf, len);
1447 return 0;
1448 len -= p;
1449 if (len > count)
1450 len = count;
1451 if (copy_to_user(buf, (void*)(tbuf+p), len))
1452 return -EFAULT;
1453 *ppos += len;
1454 return len;
1455} 1446}
1456 1447
1457static ssize_t write_flush(struct file *file, const char __user *buf, 1448static ssize_t write_flush(struct file *file, const char __user *buf,
@@ -1611,21 +1602,12 @@ static const struct file_operations cache_flush_operations_procfs = {
1611 .llseek = no_llseek, 1602 .llseek = no_llseek,
1612}; 1603};
1613 1604
1614static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net) 1605static void remove_cache_proc_entries(struct cache_detail *cd)
1615{ 1606{
1616 struct sunrpc_net *sn; 1607 if (cd->procfs) {
1617 1608 proc_remove(cd->procfs);
1618 if (cd->u.procfs.proc_ent == NULL) 1609 cd->procfs = NULL;
1619 return; 1610 }
1620 if (cd->u.procfs.flush_ent)
1621 remove_proc_entry("flush", cd->u.procfs.proc_ent);
1622 if (cd->u.procfs.channel_ent)
1623 remove_proc_entry("channel", cd->u.procfs.proc_ent);
1624 if (cd->u.procfs.content_ent)
1625 remove_proc_entry("content", cd->u.procfs.proc_ent);
1626 cd->u.procfs.proc_ent = NULL;
1627 sn = net_generic(net, sunrpc_net_id);
1628 remove_proc_entry(cd->name, sn->proc_net_rpc);
1629} 1611}
1630 1612
1631#ifdef CONFIG_PROC_FS 1613#ifdef CONFIG_PROC_FS
@@ -1635,38 +1617,30 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
1635 struct sunrpc_net *sn; 1617 struct sunrpc_net *sn;
1636 1618
1637 sn = net_generic(net, sunrpc_net_id); 1619 sn = net_generic(net, sunrpc_net_id);
1638 cd->u.procfs.proc_ent = proc_mkdir(cd->name, sn->proc_net_rpc); 1620 cd->procfs = proc_mkdir(cd->name, sn->proc_net_rpc);
1639 if (cd->u.procfs.proc_ent == NULL) 1621 if (cd->procfs == NULL)
1640 goto out_nomem; 1622 goto out_nomem;
1641 cd->u.procfs.channel_ent = NULL;
1642 cd->u.procfs.content_ent = NULL;
1643 1623
1644 p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR, 1624 p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR,
1645 cd->u.procfs.proc_ent, 1625 cd->procfs, &cache_flush_operations_procfs, cd);
1646 &cache_flush_operations_procfs, cd);
1647 cd->u.procfs.flush_ent = p;
1648 if (p == NULL) 1626 if (p == NULL)
1649 goto out_nomem; 1627 goto out_nomem;
1650 1628
1651 if (cd->cache_request || cd->cache_parse) { 1629 if (cd->cache_request || cd->cache_parse) {
1652 p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, 1630 p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR,
1653 cd->u.procfs.proc_ent, 1631 cd->procfs, &cache_file_operations_procfs, cd);
1654 &cache_file_operations_procfs, cd);
1655 cd->u.procfs.channel_ent = p;
1656 if (p == NULL) 1632 if (p == NULL)
1657 goto out_nomem; 1633 goto out_nomem;
1658 } 1634 }
1659 if (cd->cache_show) { 1635 if (cd->cache_show) {
1660 p = proc_create_data("content", S_IFREG|S_IRUSR, 1636 p = proc_create_data("content", S_IFREG|S_IRUSR,
1661 cd->u.procfs.proc_ent, 1637 cd->procfs, &content_file_operations_procfs, cd);
1662 &content_file_operations_procfs, cd);
1663 cd->u.procfs.content_ent = p;
1664 if (p == NULL) 1638 if (p == NULL)
1665 goto out_nomem; 1639 goto out_nomem;
1666 } 1640 }
1667 return 0; 1641 return 0;
1668out_nomem: 1642out_nomem:
1669 remove_cache_proc_entries(cd, net); 1643 remove_cache_proc_entries(cd);
1670 return -ENOMEM; 1644 return -ENOMEM;
1671} 1645}
1672#else /* CONFIG_PROC_FS */ 1646#else /* CONFIG_PROC_FS */
@@ -1695,7 +1669,7 @@ EXPORT_SYMBOL_GPL(cache_register_net);
1695 1669
1696void cache_unregister_net(struct cache_detail *cd, struct net *net) 1670void cache_unregister_net(struct cache_detail *cd, struct net *net)
1697{ 1671{
1698 remove_cache_proc_entries(cd, net); 1672 remove_cache_proc_entries(cd);
1699 sunrpc_destroy_cache_detail(cd); 1673 sunrpc_destroy_cache_detail(cd);
1700} 1674}
1701EXPORT_SYMBOL_GPL(cache_unregister_net); 1675EXPORT_SYMBOL_GPL(cache_unregister_net);
@@ -1854,15 +1828,17 @@ int sunrpc_cache_register_pipefs(struct dentry *parent,
1854 struct dentry *dir = rpc_create_cache_dir(parent, name, umode, cd); 1828 struct dentry *dir = rpc_create_cache_dir(parent, name, umode, cd);
1855 if (IS_ERR(dir)) 1829 if (IS_ERR(dir))
1856 return PTR_ERR(dir); 1830 return PTR_ERR(dir);
1857 cd->u.pipefs.dir = dir; 1831 cd->pipefs = dir;
1858 return 0; 1832 return 0;
1859} 1833}
1860EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs); 1834EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs);
1861 1835
1862void sunrpc_cache_unregister_pipefs(struct cache_detail *cd) 1836void sunrpc_cache_unregister_pipefs(struct cache_detail *cd)
1863{ 1837{
1864 rpc_remove_cache_dir(cd->u.pipefs.dir); 1838 if (cd->pipefs) {
1865 cd->u.pipefs.dir = NULL; 1839 rpc_remove_cache_dir(cd->pipefs);
1840 cd->pipefs = NULL;
1841 }
1866} 1842}
1867EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs); 1843EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs);
1868 1844
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 1dc9f3bac099..52da3ce54bb5 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1453,21 +1453,6 @@ size_t rpc_max_bc_payload(struct rpc_clnt *clnt)
1453EXPORT_SYMBOL_GPL(rpc_max_bc_payload); 1453EXPORT_SYMBOL_GPL(rpc_max_bc_payload);
1454 1454
1455/** 1455/**
1456 * rpc_get_timeout - Get timeout for transport in units of HZ
1457 * @clnt: RPC client to query
1458 */
1459unsigned long rpc_get_timeout(struct rpc_clnt *clnt)
1460{
1461 unsigned long ret;
1462
1463 rcu_read_lock();
1464 ret = rcu_dereference(clnt->cl_xprt)->timeout->to_initval;
1465 rcu_read_unlock();
1466 return ret;
1467}
1468EXPORT_SYMBOL_GPL(rpc_get_timeout);
1469
1470/**
1471 * rpc_force_rebind - force transport to check that remote port is unchanged 1456 * rpc_force_rebind - force transport to check that remote port is unchanged
1472 * @clnt: client to rebind 1457 * @clnt: client to rebind
1473 * 1458 *
@@ -2699,6 +2684,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
2699{ 2684{
2700 struct rpc_xprt_switch *xps; 2685 struct rpc_xprt_switch *xps;
2701 struct rpc_xprt *xprt; 2686 struct rpc_xprt *xprt;
2687 unsigned long connect_timeout;
2702 unsigned long reconnect_timeout; 2688 unsigned long reconnect_timeout;
2703 unsigned char resvport; 2689 unsigned char resvport;
2704 int ret = 0; 2690 int ret = 0;
@@ -2711,6 +2697,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
2711 return -EAGAIN; 2697 return -EAGAIN;
2712 } 2698 }
2713 resvport = xprt->resvport; 2699 resvport = xprt->resvport;
2700 connect_timeout = xprt->connect_timeout;
2714 reconnect_timeout = xprt->max_reconnect_timeout; 2701 reconnect_timeout = xprt->max_reconnect_timeout;
2715 rcu_read_unlock(); 2702 rcu_read_unlock();
2716 2703
@@ -2720,7 +2707,10 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
2720 goto out_put_switch; 2707 goto out_put_switch;
2721 } 2708 }
2722 xprt->resvport = resvport; 2709 xprt->resvport = resvport;
2723 xprt->max_reconnect_timeout = reconnect_timeout; 2710 if (xprt->ops->set_connect_timeout != NULL)
2711 xprt->ops->set_connect_timeout(xprt,
2712 connect_timeout,
2713 reconnect_timeout);
2724 2714
2725 rpc_xprt_switch_set_roundrobin(xps); 2715 rpc_xprt_switch_set_roundrobin(xps);
2726 if (setup) { 2716 if (setup) {
@@ -2737,26 +2727,39 @@ out_put_switch:
2737} 2727}
2738EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt); 2728EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt);
2739 2729
2730struct connect_timeout_data {
2731 unsigned long connect_timeout;
2732 unsigned long reconnect_timeout;
2733};
2734
2740static int 2735static int
2741rpc_xprt_cap_max_reconnect_timeout(struct rpc_clnt *clnt, 2736rpc_xprt_set_connect_timeout(struct rpc_clnt *clnt,
2742 struct rpc_xprt *xprt, 2737 struct rpc_xprt *xprt,
2743 void *data) 2738 void *data)
2744{ 2739{
2745 unsigned long timeout = *((unsigned long *)data); 2740 struct connect_timeout_data *timeo = data;
2746 2741
2747 if (timeout < xprt->max_reconnect_timeout) 2742 if (xprt->ops->set_connect_timeout)
2748 xprt->max_reconnect_timeout = timeout; 2743 xprt->ops->set_connect_timeout(xprt,
2744 timeo->connect_timeout,
2745 timeo->reconnect_timeout);
2749 return 0; 2746 return 0;
2750} 2747}
2751 2748
2752void 2749void
2753rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo) 2750rpc_set_connect_timeout(struct rpc_clnt *clnt,
2751 unsigned long connect_timeout,
2752 unsigned long reconnect_timeout)
2754{ 2753{
2754 struct connect_timeout_data timeout = {
2755 .connect_timeout = connect_timeout,
2756 .reconnect_timeout = reconnect_timeout,
2757 };
2755 rpc_clnt_iterate_for_each_xprt(clnt, 2758 rpc_clnt_iterate_for_each_xprt(clnt,
2756 rpc_xprt_cap_max_reconnect_timeout, 2759 rpc_xprt_set_connect_timeout,
2757 &timeo); 2760 &timeout);
2758} 2761}
2759EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout); 2762EXPORT_SYMBOL_GPL(rpc_set_connect_timeout);
2760 2763
2761void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt) 2764void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt)
2762{ 2765{
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index e7b4d93566df..c8fd0b6c1618 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -16,11 +16,6 @@ static struct dentry *rpc_xprt_dir;
16 16
17unsigned int rpc_inject_disconnect; 17unsigned int rpc_inject_disconnect;
18 18
19struct rpc_clnt_iter {
20 struct rpc_clnt *clnt;
21 loff_t pos;
22};
23
24static int 19static int
25tasks_show(struct seq_file *f, void *v) 20tasks_show(struct seq_file *f, void *v)
26{ 21{
@@ -47,12 +42,10 @@ static void *
47tasks_start(struct seq_file *f, loff_t *ppos) 42tasks_start(struct seq_file *f, loff_t *ppos)
48 __acquires(&clnt->cl_lock) 43 __acquires(&clnt->cl_lock)
49{ 44{
50 struct rpc_clnt_iter *iter = f->private; 45 struct rpc_clnt *clnt = f->private;
51 loff_t pos = *ppos; 46 loff_t pos = *ppos;
52 struct rpc_clnt *clnt = iter->clnt;
53 struct rpc_task *task; 47 struct rpc_task *task;
54 48
55 iter->pos = pos + 1;
56 spin_lock(&clnt->cl_lock); 49 spin_lock(&clnt->cl_lock);
57 list_for_each_entry(task, &clnt->cl_tasks, tk_task) 50 list_for_each_entry(task, &clnt->cl_tasks, tk_task)
58 if (pos-- == 0) 51 if (pos-- == 0)
@@ -63,12 +56,10 @@ tasks_start(struct seq_file *f, loff_t *ppos)
63static void * 56static void *
64tasks_next(struct seq_file *f, void *v, loff_t *pos) 57tasks_next(struct seq_file *f, void *v, loff_t *pos)
65{ 58{
66 struct rpc_clnt_iter *iter = f->private; 59 struct rpc_clnt *clnt = f->private;
67 struct rpc_clnt *clnt = iter->clnt;
68 struct rpc_task *task = v; 60 struct rpc_task *task = v;
69 struct list_head *next = task->tk_task.next; 61 struct list_head *next = task->tk_task.next;
70 62
71 ++iter->pos;
72 ++*pos; 63 ++*pos;
73 64
74 /* If there's another task on list, return it */ 65 /* If there's another task on list, return it */
@@ -81,9 +72,7 @@ static void
81tasks_stop(struct seq_file *f, void *v) 72tasks_stop(struct seq_file *f, void *v)
82 __releases(&clnt->cl_lock) 73 __releases(&clnt->cl_lock)
83{ 74{
84 struct rpc_clnt_iter *iter = f->private; 75 struct rpc_clnt *clnt = f->private;
85 struct rpc_clnt *clnt = iter->clnt;
86
87 spin_unlock(&clnt->cl_lock); 76 spin_unlock(&clnt->cl_lock);
88} 77}
89 78
@@ -96,17 +85,13 @@ static const struct seq_operations tasks_seq_operations = {
96 85
97static int tasks_open(struct inode *inode, struct file *filp) 86static int tasks_open(struct inode *inode, struct file *filp)
98{ 87{
99 int ret = seq_open_private(filp, &tasks_seq_operations, 88 int ret = seq_open(filp, &tasks_seq_operations);
100 sizeof(struct rpc_clnt_iter));
101
102 if (!ret) { 89 if (!ret) {
103 struct seq_file *seq = filp->private_data; 90 struct seq_file *seq = filp->private_data;
104 struct rpc_clnt_iter *iter = seq->private; 91 struct rpc_clnt *clnt = seq->private = inode->i_private;
105
106 iter->clnt = inode->i_private;
107 92
108 if (!atomic_inc_not_zero(&iter->clnt->cl_count)) { 93 if (!atomic_inc_not_zero(&clnt->cl_count)) {
109 seq_release_private(inode, filp); 94 seq_release(inode, filp);
110 ret = -EINVAL; 95 ret = -EINVAL;
111 } 96 }
112 } 97 }
@@ -118,10 +103,10 @@ static int
118tasks_release(struct inode *inode, struct file *filp) 103tasks_release(struct inode *inode, struct file *filp)
119{ 104{
120 struct seq_file *seq = filp->private_data; 105 struct seq_file *seq = filp->private_data;
121 struct rpc_clnt_iter *iter = seq->private; 106 struct rpc_clnt *clnt = seq->private;
122 107
123 rpc_release_client(iter->clnt); 108 rpc_release_client(clnt);
124 return seq_release_private(inode, filp); 109 return seq_release(inode, filp);
125} 110}
126 111
127static const struct file_operations tasks_fops = { 112static const struct file_operations tasks_fops = {
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 64af4f034de6..f81eaa8e0888 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -403,7 +403,7 @@ svcauth_unix_info_release(struct svc_xprt *xpt)
403/**************************************************************************** 403/****************************************************************************
404 * auth.unix.gid cache 404 * auth.unix.gid cache
405 * simple cache to map a UID to a list of GIDs 405 * simple cache to map a UID to a list of GIDs
406 * because AUTH_UNIX aka AUTH_SYS has a max of 16 406 * because AUTH_UNIX aka AUTH_SYS has a max of UNX_NGROUPS
407 */ 407 */
408#define GID_HASHBITS 8 408#define GID_HASHBITS 8
409#define GID_HASHMAX (1<<GID_HASHBITS) 409#define GID_HASHMAX (1<<GID_HASHBITS)
@@ -810,7 +810,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
810 cred->cr_uid = make_kuid(&init_user_ns, svc_getnl(argv)); /* uid */ 810 cred->cr_uid = make_kuid(&init_user_ns, svc_getnl(argv)); /* uid */
811 cred->cr_gid = make_kgid(&init_user_ns, svc_getnl(argv)); /* gid */ 811 cred->cr_gid = make_kgid(&init_user_ns, svc_getnl(argv)); /* gid */
812 slen = svc_getnl(argv); /* gids length */ 812 slen = svc_getnl(argv); /* gids length */
813 if (slen > 16 || (len -= (slen + 2)*4) < 0) 813 if (slen > UNX_NGROUPS || (len -= (slen + 2)*4) < 0)
814 goto badcred; 814 goto badcred;
815 cred->cr_group_info = groups_alloc(slen); 815 cred->cr_group_info = groups_alloc(slen);
816 if (cred->cr_group_info == NULL) 816 if (cred->cr_group_info == NULL)
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 7f1071e103ca..1f7082144e01 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1518,3 +1518,37 @@ out:
1518} 1518}
1519EXPORT_SYMBOL_GPL(xdr_process_buf); 1519EXPORT_SYMBOL_GPL(xdr_process_buf);
1520 1520
1521/**
1522 * xdr_stream_decode_string_dup - Decode and duplicate variable length string
1523 * @xdr: pointer to xdr_stream
1524 * @str: location to store pointer to string
1525 * @maxlen: maximum acceptable string length
1526 * @gfp_flags: GFP mask to use
1527 *
1528 * Return values:
1529 * On success, returns length of NUL-terminated string stored in *@ptr
1530 * %-EBADMSG on XDR buffer overflow
1531 * %-EMSGSIZE if the size of the string would exceed @maxlen
1532 * %-ENOMEM on memory allocation failure
1533 */
1534ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str,
1535 size_t maxlen, gfp_t gfp_flags)
1536{
1537 void *p;
1538 ssize_t ret;
1539
1540 ret = xdr_stream_decode_opaque_inline(xdr, &p, maxlen);
1541 if (ret > 0) {
1542 char *s = kmalloc(ret + 1, gfp_flags);
1543 if (s != NULL) {
1544 memcpy(s, p, ret);
1545 s[ret] = '\0';
1546 *str = s;
1547 return strlen(s);
1548 }
1549 ret = -ENOMEM;
1550 }
1551 *str = NULL;
1552 return ret;
1553}
1554EXPORT_SYMBOL_GPL(xdr_stream_decode_string_dup);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 9a6be030ca7d..b530a2852ba8 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -897,13 +897,11 @@ static void xprt_timer(struct rpc_task *task)
897 return; 897 return;
898 dprintk("RPC: %5u xprt_timer\n", task->tk_pid); 898 dprintk("RPC: %5u xprt_timer\n", task->tk_pid);
899 899
900 spin_lock_bh(&xprt->transport_lock);
901 if (!req->rq_reply_bytes_recvd) { 900 if (!req->rq_reply_bytes_recvd) {
902 if (xprt->ops->timer) 901 if (xprt->ops->timer)
903 xprt->ops->timer(xprt, task); 902 xprt->ops->timer(xprt, task);
904 } else 903 } else
905 task->tk_status = 0; 904 task->tk_status = 0;
906 spin_unlock_bh(&xprt->transport_lock);
907} 905}
908 906
909/** 907/**
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 1ebb09e1ac4f..59e64025ed96 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -310,10 +310,7 @@ fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
310 struct rpcrdma_mw *mw; 310 struct rpcrdma_mw *mw;
311 311
312 while (!list_empty(&req->rl_registered)) { 312 while (!list_empty(&req->rl_registered)) {
313 mw = list_first_entry(&req->rl_registered, 313 mw = rpcrdma_pop_mw(&req->rl_registered);
314 struct rpcrdma_mw, mw_list);
315 list_del_init(&mw->mw_list);
316
317 if (sync) 314 if (sync)
318 fmr_op_recover_mr(mw); 315 fmr_op_recover_mr(mw);
319 else 316 else
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 47bed5333c7f..f81dd93176c0 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -466,8 +466,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
466 struct ib_send_wr *first, **prev, *last, *bad_wr; 466 struct ib_send_wr *first, **prev, *last, *bad_wr;
467 struct rpcrdma_rep *rep = req->rl_reply; 467 struct rpcrdma_rep *rep = req->rl_reply;
468 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 468 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
469 struct rpcrdma_mw *mw, *tmp;
470 struct rpcrdma_frmr *f; 469 struct rpcrdma_frmr *f;
470 struct rpcrdma_mw *mw;
471 int count, rc; 471 int count, rc;
472 472
473 dprintk("RPC: %s: req %p\n", __func__, req); 473 dprintk("RPC: %s: req %p\n", __func__, req);
@@ -534,10 +534,10 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
534 * them to the free MW list. 534 * them to the free MW list.
535 */ 535 */
536unmap: 536unmap:
537 list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) { 537 while (!list_empty(&req->rl_registered)) {
538 mw = rpcrdma_pop_mw(&req->rl_registered);
538 dprintk("RPC: %s: DMA unmapping frmr %p\n", 539 dprintk("RPC: %s: DMA unmapping frmr %p\n",
539 __func__, &mw->frmr); 540 __func__, &mw->frmr);
540 list_del_init(&mw->mw_list);
541 ib_dma_unmap_sg(ia->ri_device, 541 ib_dma_unmap_sg(ia->ri_device,
542 mw->mw_sg, mw->mw_nents, mw->mw_dir); 542 mw->mw_sg, mw->mw_nents, mw->mw_dir);
543 rpcrdma_put_mw(r_xprt, mw); 543 rpcrdma_put_mw(r_xprt, mw);
@@ -571,10 +571,7 @@ frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
571 struct rpcrdma_mw *mw; 571 struct rpcrdma_mw *mw;
572 572
573 while (!list_empty(&req->rl_registered)) { 573 while (!list_empty(&req->rl_registered)) {
574 mw = list_first_entry(&req->rl_registered, 574 mw = rpcrdma_pop_mw(&req->rl_registered);
575 struct rpcrdma_mw, mw_list);
576 list_del_init(&mw->mw_list);
577
578 if (sync) 575 if (sync)
579 frwr_op_recover_mr(mw); 576 frwr_op_recover_mr(mw);
580 else 577 else
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index c52e0f2ffe52..a044be2d6ad7 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -125,14 +125,34 @@ void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
125/* The client can send a request inline as long as the RPCRDMA header 125/* The client can send a request inline as long as the RPCRDMA header
126 * plus the RPC call fit under the transport's inline limit. If the 126 * plus the RPC call fit under the transport's inline limit. If the
127 * combined call message size exceeds that limit, the client must use 127 * combined call message size exceeds that limit, the client must use
128 * the read chunk list for this operation. 128 * a Read chunk for this operation.
129 *
130 * A Read chunk is also required if sending the RPC call inline would
131 * exceed this device's max_sge limit.
129 */ 132 */
130static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, 133static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
131 struct rpc_rqst *rqst) 134 struct rpc_rqst *rqst)
132{ 135{
133 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 136 struct xdr_buf *xdr = &rqst->rq_snd_buf;
137 unsigned int count, remaining, offset;
138
139 if (xdr->len > r_xprt->rx_ia.ri_max_inline_write)
140 return false;
141
142 if (xdr->page_len) {
143 remaining = xdr->page_len;
144 offset = xdr->page_base & ~PAGE_MASK;
145 count = 0;
146 while (remaining) {
147 remaining -= min_t(unsigned int,
148 PAGE_SIZE - offset, remaining);
149 offset = 0;
150 if (++count > r_xprt->rx_ia.ri_max_send_sges)
151 return false;
152 }
153 }
134 154
135 return rqst->rq_snd_buf.len <= ia->ri_max_inline_write; 155 return true;
136} 156}
137 157
138/* The client can't know how large the actual reply will be. Thus it 158/* The client can't know how large the actual reply will be. Thus it
@@ -186,9 +206,9 @@ rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n)
186 */ 206 */
187 207
188static int 208static int
189rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, 209rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
190 enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, 210 unsigned int pos, enum rpcrdma_chunktype type,
191 bool reminv_expected) 211 struct rpcrdma_mr_seg *seg)
192{ 212{
193 int len, n, p, page_base; 213 int len, n, p, page_base;
194 struct page **ppages; 214 struct page **ppages;
@@ -226,22 +246,21 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
226 if (len && n == RPCRDMA_MAX_SEGS) 246 if (len && n == RPCRDMA_MAX_SEGS)
227 goto out_overflow; 247 goto out_overflow;
228 248
229 /* When encoding the read list, the tail is always sent inline */ 249 /* When encoding a Read chunk, the tail iovec contains an
230 if (type == rpcrdma_readch) 250 * XDR pad and may be omitted.
251 */
252 if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup)
231 return n; 253 return n;
232 254
233 /* When encoding the Write list, some servers need to see an extra 255 /* When encoding a Write chunk, some servers need to see an
234 * segment for odd-length Write chunks. The upper layer provides 256 * extra segment for non-XDR-aligned Write chunks. The upper
235 * space in the tail iovec for this purpose. 257 * layer provides space in the tail iovec that may be used
258 * for this purpose.
236 */ 259 */
237 if (type == rpcrdma_writech && reminv_expected) 260 if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup)
238 return n; 261 return n;
239 262
240 if (xdrbuf->tail[0].iov_len) { 263 if (xdrbuf->tail[0].iov_len) {
241 /* the rpcrdma protocol allows us to omit any trailing
242 * xdr pad bytes, saving the server an RDMA operation. */
243 if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
244 return n;
245 n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n); 264 n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n);
246 if (n == RPCRDMA_MAX_SEGS) 265 if (n == RPCRDMA_MAX_SEGS)
247 goto out_overflow; 266 goto out_overflow;
@@ -293,7 +312,8 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
293 if (rtype == rpcrdma_areadch) 312 if (rtype == rpcrdma_areadch)
294 pos = 0; 313 pos = 0;
295 seg = req->rl_segments; 314 seg = req->rl_segments;
296 nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false); 315 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
316 rtype, seg);
297 if (nsegs < 0) 317 if (nsegs < 0)
298 return ERR_PTR(nsegs); 318 return ERR_PTR(nsegs);
299 319
@@ -302,7 +322,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
302 false, &mw); 322 false, &mw);
303 if (n < 0) 323 if (n < 0)
304 return ERR_PTR(n); 324 return ERR_PTR(n);
305 list_add(&mw->mw_list, &req->rl_registered); 325 rpcrdma_push_mw(mw, &req->rl_registered);
306 326
307 *iptr++ = xdr_one; /* item present */ 327 *iptr++ = xdr_one; /* item present */
308 328
@@ -355,10 +375,9 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
355 } 375 }
356 376
357 seg = req->rl_segments; 377 seg = req->rl_segments;
358 nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 378 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
359 rqst->rq_rcv_buf.head[0].iov_len, 379 rqst->rq_rcv_buf.head[0].iov_len,
360 wtype, seg, 380 wtype, seg);
361 r_xprt->rx_ia.ri_reminv_expected);
362 if (nsegs < 0) 381 if (nsegs < 0)
363 return ERR_PTR(nsegs); 382 return ERR_PTR(nsegs);
364 383
@@ -371,7 +390,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
371 true, &mw); 390 true, &mw);
372 if (n < 0) 391 if (n < 0)
373 return ERR_PTR(n); 392 return ERR_PTR(n);
374 list_add(&mw->mw_list, &req->rl_registered); 393 rpcrdma_push_mw(mw, &req->rl_registered);
375 394
376 iptr = xdr_encode_rdma_segment(iptr, mw); 395 iptr = xdr_encode_rdma_segment(iptr, mw);
377 396
@@ -423,8 +442,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
423 } 442 }
424 443
425 seg = req->rl_segments; 444 seg = req->rl_segments;
426 nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg, 445 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
427 r_xprt->rx_ia.ri_reminv_expected);
428 if (nsegs < 0) 446 if (nsegs < 0)
429 return ERR_PTR(nsegs); 447 return ERR_PTR(nsegs);
430 448
@@ -437,7 +455,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
437 true, &mw); 455 true, &mw);
438 if (n < 0) 456 if (n < 0)
439 return ERR_PTR(n); 457 return ERR_PTR(n);
440 list_add(&mw->mw_list, &req->rl_registered); 458 rpcrdma_push_mw(mw, &req->rl_registered);
441 459
442 iptr = xdr_encode_rdma_segment(iptr, mw); 460 iptr = xdr_encode_rdma_segment(iptr, mw);
443 461
@@ -741,13 +759,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
741 iptr = headerp->rm_body.rm_chunks; 759 iptr = headerp->rm_body.rm_chunks;
742 iptr = rpcrdma_encode_read_list(r_xprt, req, rqst, iptr, rtype); 760 iptr = rpcrdma_encode_read_list(r_xprt, req, rqst, iptr, rtype);
743 if (IS_ERR(iptr)) 761 if (IS_ERR(iptr))
744 goto out_unmap; 762 goto out_err;
745 iptr = rpcrdma_encode_write_list(r_xprt, req, rqst, iptr, wtype); 763 iptr = rpcrdma_encode_write_list(r_xprt, req, rqst, iptr, wtype);
746 if (IS_ERR(iptr)) 764 if (IS_ERR(iptr))
747 goto out_unmap; 765 goto out_err;
748 iptr = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, iptr, wtype); 766 iptr = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, iptr, wtype);
749 if (IS_ERR(iptr)) 767 if (IS_ERR(iptr))
750 goto out_unmap; 768 goto out_err;
751 hdrlen = (unsigned char *)iptr - (unsigned char *)headerp; 769 hdrlen = (unsigned char *)iptr - (unsigned char *)headerp;
752 770
753 dprintk("RPC: %5u %s: %s/%s: hdrlen %zd rpclen %zd\n", 771 dprintk("RPC: %5u %s: %s/%s: hdrlen %zd rpclen %zd\n",
@@ -758,12 +776,14 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
758 if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen, 776 if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen,
759 &rqst->rq_snd_buf, rtype)) { 777 &rqst->rq_snd_buf, rtype)) {
760 iptr = ERR_PTR(-EIO); 778 iptr = ERR_PTR(-EIO);
761 goto out_unmap; 779 goto out_err;
762 } 780 }
763 return 0; 781 return 0;
764 782
765out_unmap: 783out_err:
766 r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); 784 pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n",
785 PTR_ERR(iptr));
786 r_xprt->rx_stats.failed_marshal_count++;
767 return PTR_ERR(iptr); 787 return PTR_ERR(iptr);
768} 788}
769 789
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 534c178d2a7e..c717f5410776 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -67,7 +67,7 @@ unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
67static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; 67static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
68static unsigned int xprt_rdma_inline_write_padding; 68static unsigned int xprt_rdma_inline_write_padding;
69static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; 69static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
70 int xprt_rdma_pad_optimize = 1; 70 int xprt_rdma_pad_optimize = 0;
71 71
72#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 72#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
73 73
@@ -709,10 +709,6 @@ xprt_rdma_send_request(struct rpc_task *task)
709 return 0; 709 return 0;
710 710
711failed_marshal: 711failed_marshal:
712 dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n",
713 __func__, rc);
714 if (rc == -EIO)
715 r_xprt->rx_stats.failed_marshal_count++;
716 if (rc != -ENOTCONN) 712 if (rc != -ENOTCONN)
717 return rc; 713 return rc;
718drop_connection: 714drop_connection:
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 11d07748f699..81cd31acf690 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -54,6 +54,7 @@
54#include <linux/sunrpc/svc_rdma.h> 54#include <linux/sunrpc/svc_rdma.h>
55#include <asm/bitops.h> 55#include <asm/bitops.h>
56#include <linux/module.h> /* try_module_get()/module_put() */ 56#include <linux/module.h> /* try_module_get()/module_put() */
57#include <rdma/ib_cm.h>
57 58
58#include "xprt_rdma.h" 59#include "xprt_rdma.h"
59 60
@@ -208,6 +209,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
208 209
209 /* Default settings for RPC-over-RDMA Version One */ 210 /* Default settings for RPC-over-RDMA Version One */
210 r_xprt->rx_ia.ri_reminv_expected = false; 211 r_xprt->rx_ia.ri_reminv_expected = false;
212 r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize;
211 rsize = RPCRDMA_V1_DEF_INLINE_SIZE; 213 rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
212 wsize = RPCRDMA_V1_DEF_INLINE_SIZE; 214 wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
213 215
@@ -215,6 +217,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
215 pmsg->cp_magic == rpcrdma_cmp_magic && 217 pmsg->cp_magic == rpcrdma_cmp_magic &&
216 pmsg->cp_version == RPCRDMA_CMP_VERSION) { 218 pmsg->cp_version == RPCRDMA_CMP_VERSION) {
217 r_xprt->rx_ia.ri_reminv_expected = true; 219 r_xprt->rx_ia.ri_reminv_expected = true;
220 r_xprt->rx_ia.ri_implicit_roundup = true;
218 rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); 221 rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
219 wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); 222 wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
220 } 223 }
@@ -277,7 +280,14 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
277 connstate = -ENETDOWN; 280 connstate = -ENETDOWN;
278 goto connected; 281 goto connected;
279 case RDMA_CM_EVENT_REJECTED: 282 case RDMA_CM_EVENT_REJECTED:
283#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
284 pr_info("rpcrdma: connection to %pIS:%u on %s rejected: %s\n",
285 sap, rpc_get_port(sap), ia->ri_device->name,
286 rdma_reject_msg(id, event->status));
287#endif
280 connstate = -ECONNREFUSED; 288 connstate = -ECONNREFUSED;
289 if (event->status == IB_CM_REJ_STALE_CONN)
290 connstate = -EAGAIN;
281 goto connected; 291 goto connected;
282 case RDMA_CM_EVENT_DISCONNECTED: 292 case RDMA_CM_EVENT_DISCONNECTED:
283 connstate = -ECONNABORTED; 293 connstate = -ECONNABORTED;
@@ -486,18 +496,19 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
486 */ 496 */
487int 497int
488rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, 498rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
489 struct rpcrdma_create_data_internal *cdata) 499 struct rpcrdma_create_data_internal *cdata)
490{ 500{
491 struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; 501 struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
502 unsigned int max_qp_wr, max_sge;
492 struct ib_cq *sendcq, *recvcq; 503 struct ib_cq *sendcq, *recvcq;
493 unsigned int max_qp_wr;
494 int rc; 504 int rc;
495 505
496 if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_SEND_SGES) { 506 max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES);
497 dprintk("RPC: %s: insufficient sge's available\n", 507 if (max_sge < RPCRDMA_MIN_SEND_SGES) {
498 __func__); 508 pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
499 return -ENOMEM; 509 return -ENOMEM;
500 } 510 }
511 ia->ri_max_send_sges = max_sge - RPCRDMA_MIN_SEND_SGES;
501 512
502 if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) { 513 if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
503 dprintk("RPC: %s: insufficient wqe's available\n", 514 dprintk("RPC: %s: insufficient wqe's available\n",
@@ -522,7 +533,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
522 ep->rep_attr.cap.max_recv_wr = cdata->max_requests; 533 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
523 ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; 534 ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
524 ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */ 535 ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */
525 ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_SEND_SGES; 536 ep->rep_attr.cap.max_send_sge = max_sge;
526 ep->rep_attr.cap.max_recv_sge = 1; 537 ep->rep_attr.cap.max_recv_sge = 1;
527 ep->rep_attr.cap.max_inline_data = 0; 538 ep->rep_attr.cap.max_inline_data = 0;
528 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 539 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -640,20 +651,21 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
640int 651int
641rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) 652rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
642{ 653{
654 struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
655 rx_ia);
643 struct rdma_cm_id *id, *old; 656 struct rdma_cm_id *id, *old;
657 struct sockaddr *sap;
658 unsigned int extras;
644 int rc = 0; 659 int rc = 0;
645 int retry_count = 0;
646 660
647 if (ep->rep_connected != 0) { 661 if (ep->rep_connected != 0) {
648 struct rpcrdma_xprt *xprt;
649retry: 662retry:
650 dprintk("RPC: %s: reconnecting...\n", __func__); 663 dprintk("RPC: %s: reconnecting...\n", __func__);
651 664
652 rpcrdma_ep_disconnect(ep, ia); 665 rpcrdma_ep_disconnect(ep, ia);
653 666
654 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); 667 sap = (struct sockaddr *)&r_xprt->rx_data.addr;
655 id = rpcrdma_create_id(xprt, ia, 668 id = rpcrdma_create_id(r_xprt, ia, sap);
656 (struct sockaddr *)&xprt->rx_data.addr);
657 if (IS_ERR(id)) { 669 if (IS_ERR(id)) {
658 rc = -EHOSTUNREACH; 670 rc = -EHOSTUNREACH;
659 goto out; 671 goto out;
@@ -708,51 +720,18 @@ retry:
708 } 720 }
709 721
710 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); 722 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
711
712 /*
713 * Check state. A non-peer reject indicates no listener
714 * (ECONNREFUSED), which may be a transient state. All
715 * others indicate a transport condition which has already
716 * undergone a best-effort.
717 */
718 if (ep->rep_connected == -ECONNREFUSED &&
719 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
720 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
721 goto retry;
722 }
723 if (ep->rep_connected <= 0) { 723 if (ep->rep_connected <= 0) {
724 /* Sometimes, the only way to reliably connect to remote 724 if (ep->rep_connected == -EAGAIN)
725 * CMs is to use same nonzero values for ORD and IRD. */
726 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
727 (ep->rep_remote_cma.responder_resources == 0 ||
728 ep->rep_remote_cma.initiator_depth !=
729 ep->rep_remote_cma.responder_resources)) {
730 if (ep->rep_remote_cma.responder_resources == 0)
731 ep->rep_remote_cma.responder_resources = 1;
732 ep->rep_remote_cma.initiator_depth =
733 ep->rep_remote_cma.responder_resources;
734 goto retry; 725 goto retry;
735 }
736 rc = ep->rep_connected; 726 rc = ep->rep_connected;
737 } else { 727 goto out;
738 struct rpcrdma_xprt *r_xprt;
739 unsigned int extras;
740
741 dprintk("RPC: %s: connected\n", __func__);
742
743 r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
744 extras = r_xprt->rx_buf.rb_bc_srv_max_requests;
745
746 if (extras) {
747 rc = rpcrdma_ep_post_extra_recv(r_xprt, extras);
748 if (rc) {
749 pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n",
750 __func__, rc);
751 rc = 0;
752 }
753 }
754 } 728 }
755 729
730 dprintk("RPC: %s: connected\n", __func__);
731 extras = r_xprt->rx_buf.rb_bc_srv_max_requests;
732 if (extras)
733 rpcrdma_ep_post_extra_recv(r_xprt, extras);
734
756out: 735out:
757 if (rc) 736 if (rc)
758 ep->rep_connected = rc; 737 ep->rep_connected = rc;
@@ -797,9 +776,7 @@ rpcrdma_mr_recovery_worker(struct work_struct *work)
797 776
798 spin_lock(&buf->rb_recovery_lock); 777 spin_lock(&buf->rb_recovery_lock);
799 while (!list_empty(&buf->rb_stale_mrs)) { 778 while (!list_empty(&buf->rb_stale_mrs)) {
800 mw = list_first_entry(&buf->rb_stale_mrs, 779 mw = rpcrdma_pop_mw(&buf->rb_stale_mrs);
801 struct rpcrdma_mw, mw_list);
802 list_del_init(&mw->mw_list);
803 spin_unlock(&buf->rb_recovery_lock); 780 spin_unlock(&buf->rb_recovery_lock);
804 781
805 dprintk("RPC: %s: recovering MR %p\n", __func__, mw); 782 dprintk("RPC: %s: recovering MR %p\n", __func__, mw);
@@ -817,7 +794,7 @@ rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw)
817 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 794 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
818 795
819 spin_lock(&buf->rb_recovery_lock); 796 spin_lock(&buf->rb_recovery_lock);
820 list_add(&mw->mw_list, &buf->rb_stale_mrs); 797 rpcrdma_push_mw(mw, &buf->rb_stale_mrs);
821 spin_unlock(&buf->rb_recovery_lock); 798 spin_unlock(&buf->rb_recovery_lock);
822 799
823 schedule_delayed_work(&buf->rb_recovery_worker, 0); 800 schedule_delayed_work(&buf->rb_recovery_worker, 0);
@@ -1093,11 +1070,8 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
1093 struct rpcrdma_mw *mw = NULL; 1070 struct rpcrdma_mw *mw = NULL;
1094 1071
1095 spin_lock(&buf->rb_mwlock); 1072 spin_lock(&buf->rb_mwlock);
1096 if (!list_empty(&buf->rb_mws)) { 1073 if (!list_empty(&buf->rb_mws))
1097 mw = list_first_entry(&buf->rb_mws, 1074 mw = rpcrdma_pop_mw(&buf->rb_mws);
1098 struct rpcrdma_mw, mw_list);
1099 list_del_init(&mw->mw_list);
1100 }
1101 spin_unlock(&buf->rb_mwlock); 1075 spin_unlock(&buf->rb_mwlock);
1102 1076
1103 if (!mw) 1077 if (!mw)
@@ -1120,7 +1094,7 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
1120 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1094 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1121 1095
1122 spin_lock(&buf->rb_mwlock); 1096 spin_lock(&buf->rb_mwlock);
1123 list_add_tail(&mw->mw_list, &buf->rb_mws); 1097 rpcrdma_push_mw(mw, &buf->rb_mws);
1124 spin_unlock(&buf->rb_mwlock); 1098 spin_unlock(&buf->rb_mwlock);
1125} 1099}
1126 1100
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index e35efd4ac1e4..171a35116de9 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -74,7 +74,9 @@ struct rpcrdma_ia {
74 unsigned int ri_max_frmr_depth; 74 unsigned int ri_max_frmr_depth;
75 unsigned int ri_max_inline_write; 75 unsigned int ri_max_inline_write;
76 unsigned int ri_max_inline_read; 76 unsigned int ri_max_inline_read;
77 unsigned int ri_max_send_sges;
77 bool ri_reminv_expected; 78 bool ri_reminv_expected;
79 bool ri_implicit_roundup;
78 enum ib_mr_type ri_mrtype; 80 enum ib_mr_type ri_mrtype;
79 struct ib_qp_attr ri_qp_attr; 81 struct ib_qp_attr ri_qp_attr;
80 struct ib_qp_init_attr ri_qp_init_attr; 82 struct ib_qp_init_attr ri_qp_init_attr;
@@ -303,15 +305,19 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
303 char *mr_offset; /* kva if no page, else offset */ 305 char *mr_offset; /* kva if no page, else offset */
304}; 306};
305 307
306/* Reserve enough Send SGEs to send a maximum size inline request: 308/* The Send SGE array is provisioned to send a maximum size
309 * inline request:
307 * - RPC-over-RDMA header 310 * - RPC-over-RDMA header
308 * - xdr_buf head iovec 311 * - xdr_buf head iovec
309 * - RPCRDMA_MAX_INLINE bytes, possibly unaligned, in pages 312 * - RPCRDMA_MAX_INLINE bytes, in pages
310 * - xdr_buf tail iovec 313 * - xdr_buf tail iovec
314 *
315 * The actual number of array elements consumed by each RPC
316 * depends on the device's max_sge limit.
311 */ 317 */
312enum { 318enum {
313 RPCRDMA_MAX_SEND_PAGES = PAGE_SIZE + RPCRDMA_MAX_INLINE - 1, 319 RPCRDMA_MIN_SEND_SGES = 3,
314 RPCRDMA_MAX_PAGE_SGES = (RPCRDMA_MAX_SEND_PAGES >> PAGE_SHIFT) + 1, 320 RPCRDMA_MAX_PAGE_SGES = RPCRDMA_MAX_INLINE >> PAGE_SHIFT,
315 RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1, 321 RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1,
316}; 322};
317 323
@@ -348,6 +354,22 @@ rpcr_to_rdmar(struct rpc_rqst *rqst)
348 return rqst->rq_xprtdata; 354 return rqst->rq_xprtdata;
349} 355}
350 356
357static inline void
358rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list)
359{
360 list_add_tail(&mw->mw_list, list);
361}
362
363static inline struct rpcrdma_mw *
364rpcrdma_pop_mw(struct list_head *list)
365{
366 struct rpcrdma_mw *mw;
367
368 mw = list_first_entry(list, struct rpcrdma_mw, mw_list);
369 list_del(&mw->mw_list);
370 return mw;
371}
372
351/* 373/*
352 * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for 374 * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for
353 * inline requests/replies, and client/server credits. 375 * inline requests/replies, and client/server credits.
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 956c7bce80d1..16aff8ddc16f 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -52,6 +52,8 @@
52#include "sunrpc.h" 52#include "sunrpc.h"
53 53
54static void xs_close(struct rpc_xprt *xprt); 54static void xs_close(struct rpc_xprt *xprt);
55static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
56 struct socket *sock);
55 57
56/* 58/*
57 * xprtsock tunables 59 * xprtsock tunables
@@ -666,6 +668,9 @@ static int xs_tcp_send_request(struct rpc_task *task)
666 if (task->tk_flags & RPC_TASK_SENT) 668 if (task->tk_flags & RPC_TASK_SENT)
667 zerocopy = false; 669 zerocopy = false;
668 670
671 if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state))
672 xs_tcp_set_socket_timeouts(xprt, transport->sock);
673
669 /* Continue transmitting the packet/record. We must be careful 674 /* Continue transmitting the packet/record. We must be careful
670 * to cope with writespace callbacks arriving _after_ we have 675 * to cope with writespace callbacks arriving _after_ we have
671 * called sendmsg(). */ 676 * called sendmsg(). */
@@ -1734,7 +1739,9 @@ static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t
1734 */ 1739 */
1735static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task) 1740static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task)
1736{ 1741{
1742 spin_lock_bh(&xprt->transport_lock);
1737 xprt_adjust_cwnd(xprt, task, -ETIMEDOUT); 1743 xprt_adjust_cwnd(xprt, task, -ETIMEDOUT);
1744 spin_unlock_bh(&xprt->transport_lock);
1738} 1745}
1739 1746
1740static unsigned short xs_get_random_port(void) 1747static unsigned short xs_get_random_port(void)
@@ -2235,6 +2242,66 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
2235 xs_reset_transport(transport); 2242 xs_reset_transport(transport);
2236} 2243}
2237 2244
2245static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
2246 struct socket *sock)
2247{
2248 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
2249 unsigned int keepidle;
2250 unsigned int keepcnt;
2251 unsigned int opt_on = 1;
2252 unsigned int timeo;
2253
2254 spin_lock_bh(&xprt->transport_lock);
2255 keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ);
2256 keepcnt = xprt->timeout->to_retries + 1;
2257 timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
2258 (xprt->timeout->to_retries + 1);
2259 clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
2260 spin_unlock_bh(&xprt->transport_lock);
2261
2262 /* TCP Keepalive options */
2263 kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
2264 (char *)&opt_on, sizeof(opt_on));
2265 kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE,
2266 (char *)&keepidle, sizeof(keepidle));
2267 kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
2268 (char *)&keepidle, sizeof(keepidle));
2269 kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
2270 (char *)&keepcnt, sizeof(keepcnt));
2271
2272 /* TCP user timeout (see RFC5482) */
2273 kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
2274 (char *)&timeo, sizeof(timeo));
2275}
2276
2277static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
2278 unsigned long connect_timeout,
2279 unsigned long reconnect_timeout)
2280{
2281 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
2282 struct rpc_timeout to;
2283 unsigned long initval;
2284
2285 spin_lock_bh(&xprt->transport_lock);
2286 if (reconnect_timeout < xprt->max_reconnect_timeout)
2287 xprt->max_reconnect_timeout = reconnect_timeout;
2288 if (connect_timeout < xprt->connect_timeout) {
2289 memcpy(&to, xprt->timeout, sizeof(to));
2290 initval = DIV_ROUND_UP(connect_timeout, to.to_retries + 1);
2291 /* Arbitrary lower limit */
2292 if (initval < XS_TCP_INIT_REEST_TO << 1)
2293 initval = XS_TCP_INIT_REEST_TO << 1;
2294 to.to_initval = initval;
2295 to.to_maxval = initval;
2296 memcpy(&transport->tcp_timeout, &to,
2297 sizeof(transport->tcp_timeout));
2298 xprt->timeout = &transport->tcp_timeout;
2299 xprt->connect_timeout = connect_timeout;
2300 }
2301 set_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
2302 spin_unlock_bh(&xprt->transport_lock);
2303}
2304
2238static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 2305static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
2239{ 2306{
2240 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 2307 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2242,22 +2309,8 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
2242 2309
2243 if (!transport->inet) { 2310 if (!transport->inet) {
2244 struct sock *sk = sock->sk; 2311 struct sock *sk = sock->sk;
2245 unsigned int keepidle = xprt->timeout->to_initval / HZ;
2246 unsigned int keepcnt = xprt->timeout->to_retries + 1;
2247 unsigned int opt_on = 1;
2248 unsigned int timeo;
2249 unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC; 2312 unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC;
2250 2313
2251 /* TCP Keepalive options */
2252 kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
2253 (char *)&opt_on, sizeof(opt_on));
2254 kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE,
2255 (char *)&keepidle, sizeof(keepidle));
2256 kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
2257 (char *)&keepidle, sizeof(keepidle));
2258 kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
2259 (char *)&keepcnt, sizeof(keepcnt));
2260
2261 /* Avoid temporary address, they are bad for long-lived 2314 /* Avoid temporary address, they are bad for long-lived
2262 * connections such as NFS mounts. 2315 * connections such as NFS mounts.
2263 * RFC4941, section 3.6 suggests that: 2316 * RFC4941, section 3.6 suggests that:
@@ -2268,11 +2321,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
2268 kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES, 2321 kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES,
2269 (char *)&addr_pref, sizeof(addr_pref)); 2322 (char *)&addr_pref, sizeof(addr_pref));
2270 2323
2271 /* TCP user timeout (see RFC5482) */ 2324 xs_tcp_set_socket_timeouts(xprt, sock);
2272 timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
2273 (xprt->timeout->to_retries + 1);
2274 kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
2275 (char *)&timeo, sizeof(timeo));
2276 2325
2277 write_lock_bh(&sk->sk_callback_lock); 2326 write_lock_bh(&sk->sk_callback_lock);
2278 2327
@@ -2721,6 +2770,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
2721 .set_retrans_timeout = xprt_set_retrans_timeout_def, 2770 .set_retrans_timeout = xprt_set_retrans_timeout_def,
2722 .close = xs_tcp_shutdown, 2771 .close = xs_tcp_shutdown,
2723 .destroy = xs_destroy, 2772 .destroy = xs_destroy,
2773 .set_connect_timeout = xs_tcp_set_connect_timeout,
2724 .print_stats = xs_tcp_print_stats, 2774 .print_stats = xs_tcp_print_stats,
2725 .enable_swap = xs_enable_swap, 2775 .enable_swap = xs_enable_swap,
2726 .disable_swap = xs_disable_swap, 2776 .disable_swap = xs_disable_swap,
@@ -3007,6 +3057,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
3007 xprt->timeout = &xs_tcp_default_timeout; 3057 xprt->timeout = &xs_tcp_default_timeout;
3008 3058
3009 xprt->max_reconnect_timeout = xprt->timeout->to_maxval; 3059 xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
3060 xprt->connect_timeout = xprt->timeout->to_initval *
3061 (xprt->timeout->to_retries + 1);
3010 3062
3011 INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn); 3063 INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
3012 INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket); 3064 INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
@@ -3209,7 +3261,9 @@ static int param_set_uint_minmax(const char *val,
3209 if (!val) 3261 if (!val)
3210 return -EINVAL; 3262 return -EINVAL;
3211 ret = kstrtouint(val, 0, &num); 3263 ret = kstrtouint(val, 0, &num);
3212 if (ret == -EINVAL || num < min || num > max) 3264 if (ret)
3265 return ret;
3266 if (num < min || num > max)
3213 return -EINVAL; 3267 return -EINVAL;
3214 *((unsigned int *)kp->arg) = num; 3268 *((unsigned int *)kp->arg) = num;
3215 return 0; 3269 return 0;