diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-03-01 19:10:30 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-03-01 19:10:30 -0500 |
commit | 8f03cf50bc9443e92d6e54ac4d599357d6cb7cbb (patch) | |
tree | 0c38aab3a4c24d06fb05376b651157627bc1669d /net | |
parent | 25c4e6c3f0c14d1575aa488ff4ca47e045ae51a0 (diff) | |
parent | ed92d8c137b7794c2c2aa14479298b9885967607 (diff) |
Merge tag 'nfs-for-4.11-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
Pull NFS client updates from Anna Schumaker:
"Highlights include:
Stable bugfixes:
- NFSv4: Fix memory and state leak in _nfs4_open_and_get_state
- xprtrdma: Fix Read chunk padding
- xprtrdma: Per-connection pad optimization
- xprtrdma: Disable pad optimization by default
- xprtrdma: Reduce required number of send SGEs
- nlm: Ensure callback code also checks that the files match
- pNFS/flexfiles: If the layout is invalid, it must be updated before
retrying
- NFSv4: Fix reboot recovery in copy offload
- Revert "NFSv4.1: Handle NFS4ERR_BADSESSION/NFS4ERR_DEADSESSION
replies to OP_SEQUENCE"
- NFSv4: fix getacl head length estimation
- NFSv4: fix getacl ERANGE for sum ACL buffer sizes
Features:
- Add and use dprintk_cont macros
- Various cleanups to NFS v4.x to reduce code duplication and
complexity
- Remove unused cr_magic related code
- Improvements to sunrpc "read from buffer" code
- Clean up sunrpc timeout code and allow changing TCP timeout
parameters
- Remove duplicate mw_list management code in xprtrdma
- Add generic functions for encoding and decoding xdr streams
Bugfixes:
- Clean up nfs_show_mountd_netid
- Make layoutreturn_ops static and use NULL instead of 0 to fix
sparse warnings
- Properly handle -ERESTARTSYS in nfs_rename()
- Check if register_shrinker() failed during rpcauth_init()
- Properly clean up procfs/pipefs entries
- Various NFS over RDMA related fixes
- Silence unititialized variable warning in sunrpc"
* tag 'nfs-for-4.11-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (64 commits)
NFSv4: fix getacl ERANGE for some ACL buffer sizes
NFSv4: fix getacl head length estimation
Revert "NFSv4.1: Handle NFS4ERR_BADSESSION/NFS4ERR_DEADSESSION replies to OP_SEQUENCE"
NFSv4: Fix reboot recovery in copy offload
pNFS/flexfiles: If the layout is invalid, it must be updated before retrying
NFSv4: Clean up owner/group attribute decode
SUNRPC: Add a helper function xdr_stream_decode_string_dup()
NFSv4: Remove bogus "struct nfs_client" argument from decode_ace()
NFSv4: Fix the underestimation of delegation XDR space reservation
NFSv4: Replace callback string decode function with a generic
NFSv4: Replace the open coded decode_opaque_inline() with the new generic
NFSv4: Replace ad-hoc xdr encode/decode helpers with xdr_stream_* generics
SUNRPC: Add generic helpers for xdr_stream encode/decode
sunrpc: silence uninitialized variable warning
nlm: Ensure callback code also checks that the files match
sunrpc: Allow xprt->ops->timer method to sleep
xprtrdma: Refactor management of mw_list field
xprtrdma: Handle stale connection rejection
xprtrdma: Properly recover FRWRs with in-flight FASTREG WRs
xprtrdma: Shrink send SGEs array
...
Diffstat (limited to 'net')
-rw-r--r-- | net/sunrpc/auth.c | 15 | ||||
-rw-r--r-- | net/sunrpc/auth_null.c | 3 | ||||
-rw-r--r-- | net/sunrpc/auth_unix.c | 18 | ||||
-rw-r--r-- | net/sunrpc/cache.c | 68 | ||||
-rw-r--r-- | net/sunrpc/clnt.c | 51 | ||||
-rw-r--r-- | net/sunrpc/debugfs.c | 35 | ||||
-rw-r--r-- | net/sunrpc/svcauth_unix.c | 4 | ||||
-rw-r--r-- | net/sunrpc/xdr.c | 34 | ||||
-rw-r--r-- | net/sunrpc/xprt.c | 2 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/fmr_ops.c | 5 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/frwr_ops.c | 11 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/rpc_rdma.c | 82 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/transport.c | 6 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 96 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/xprt_rdma.h | 30 | ||||
-rw-r--r-- | net/sunrpc/xprtsock.c | 94 |
16 files changed, 304 insertions, 250 deletions
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 2bff63a73cf8..a1ee933e3029 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c | |||
@@ -464,8 +464,10 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) | |||
464 | * Note that the cred_unused list must be time-ordered. | 464 | * Note that the cred_unused list must be time-ordered. |
465 | */ | 465 | */ |
466 | if (time_in_range(cred->cr_expire, expired, jiffies) && | 466 | if (time_in_range(cred->cr_expire, expired, jiffies) && |
467 | test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) | 467 | test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) { |
468 | freed = SHRINK_STOP; | ||
468 | break; | 469 | break; |
470 | } | ||
469 | 471 | ||
470 | list_del_init(&cred->cr_lru); | 472 | list_del_init(&cred->cr_lru); |
471 | number_cred_unused--; | 473 | number_cred_unused--; |
@@ -520,7 +522,7 @@ static unsigned long | |||
520 | rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) | 522 | rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) |
521 | 523 | ||
522 | { | 524 | { |
523 | return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; | 525 | return number_cred_unused * sysctl_vfs_cache_pressure / 100; |
524 | } | 526 | } |
525 | 527 | ||
526 | static void | 528 | static void |
@@ -646,9 +648,6 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, | |||
646 | cred->cr_auth = auth; | 648 | cred->cr_auth = auth; |
647 | cred->cr_ops = ops; | 649 | cred->cr_ops = ops; |
648 | cred->cr_expire = jiffies; | 650 | cred->cr_expire = jiffies; |
649 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
650 | cred->cr_magic = RPCAUTH_CRED_MAGIC; | ||
651 | #endif | ||
652 | cred->cr_uid = acred->uid; | 651 | cred->cr_uid = acred->uid; |
653 | } | 652 | } |
654 | EXPORT_SYMBOL_GPL(rpcauth_init_cred); | 653 | EXPORT_SYMBOL_GPL(rpcauth_init_cred); |
@@ -876,8 +875,12 @@ int __init rpcauth_init_module(void) | |||
876 | err = rpc_init_generic_auth(); | 875 | err = rpc_init_generic_auth(); |
877 | if (err < 0) | 876 | if (err < 0) |
878 | goto out2; | 877 | goto out2; |
879 | register_shrinker(&rpc_cred_shrinker); | 878 | err = register_shrinker(&rpc_cred_shrinker); |
879 | if (err < 0) | ||
880 | goto out3; | ||
880 | return 0; | 881 | return 0; |
882 | out3: | ||
883 | rpc_destroy_generic_auth(); | ||
881 | out2: | 884 | out2: |
882 | rpc_destroy_authunix(); | 885 | rpc_destroy_authunix(); |
883 | out1: | 886 | out1: |
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 4d17376b2acb..5f3d527dff65 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c | |||
@@ -139,7 +139,4 @@ struct rpc_cred null_cred = { | |||
139 | .cr_ops = &null_credops, | 139 | .cr_ops = &null_credops, |
140 | .cr_count = ATOMIC_INIT(1), | 140 | .cr_count = ATOMIC_INIT(1), |
141 | .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE, | 141 | .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE, |
142 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
143 | .cr_magic = RPCAUTH_CRED_MAGIC, | ||
144 | #endif | ||
145 | }; | 142 | }; |
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 306fc0f54596..82337e1ec9cd 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c | |||
@@ -14,12 +14,10 @@ | |||
14 | #include <linux/sunrpc/auth.h> | 14 | #include <linux/sunrpc/auth.h> |
15 | #include <linux/user_namespace.h> | 15 | #include <linux/user_namespace.h> |
16 | 16 | ||
17 | #define NFS_NGROUPS 16 | ||
18 | |||
19 | struct unx_cred { | 17 | struct unx_cred { |
20 | struct rpc_cred uc_base; | 18 | struct rpc_cred uc_base; |
21 | kgid_t uc_gid; | 19 | kgid_t uc_gid; |
22 | kgid_t uc_gids[NFS_NGROUPS]; | 20 | kgid_t uc_gids[UNX_NGROUPS]; |
23 | }; | 21 | }; |
24 | #define uc_uid uc_base.cr_uid | 22 | #define uc_uid uc_base.cr_uid |
25 | 23 | ||
@@ -82,13 +80,13 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t | |||
82 | 80 | ||
83 | if (acred->group_info != NULL) | 81 | if (acred->group_info != NULL) |
84 | groups = acred->group_info->ngroups; | 82 | groups = acred->group_info->ngroups; |
85 | if (groups > NFS_NGROUPS) | 83 | if (groups > UNX_NGROUPS) |
86 | groups = NFS_NGROUPS; | 84 | groups = UNX_NGROUPS; |
87 | 85 | ||
88 | cred->uc_gid = acred->gid; | 86 | cred->uc_gid = acred->gid; |
89 | for (i = 0; i < groups; i++) | 87 | for (i = 0; i < groups; i++) |
90 | cred->uc_gids[i] = acred->group_info->gid[i]; | 88 | cred->uc_gids[i] = acred->group_info->gid[i]; |
91 | if (i < NFS_NGROUPS) | 89 | if (i < UNX_NGROUPS) |
92 | cred->uc_gids[i] = INVALID_GID; | 90 | cred->uc_gids[i] = INVALID_GID; |
93 | 91 | ||
94 | return &cred->uc_base; | 92 | return &cred->uc_base; |
@@ -132,12 +130,12 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) | |||
132 | 130 | ||
133 | if (acred->group_info != NULL) | 131 | if (acred->group_info != NULL) |
134 | groups = acred->group_info->ngroups; | 132 | groups = acred->group_info->ngroups; |
135 | if (groups > NFS_NGROUPS) | 133 | if (groups > UNX_NGROUPS) |
136 | groups = NFS_NGROUPS; | 134 | groups = UNX_NGROUPS; |
137 | for (i = 0; i < groups ; i++) | 135 | for (i = 0; i < groups ; i++) |
138 | if (!gid_eq(cred->uc_gids[i], acred->group_info->gid[i])) | 136 | if (!gid_eq(cred->uc_gids[i], acred->group_info->gid[i])) |
139 | return 0; | 137 | return 0; |
140 | if (groups < NFS_NGROUPS && gid_valid(cred->uc_gids[groups])) | 138 | if (groups < UNX_NGROUPS && gid_valid(cred->uc_gids[groups])) |
141 | return 0; | 139 | return 0; |
142 | return 1; | 140 | return 1; |
143 | } | 141 | } |
@@ -166,7 +164,7 @@ unx_marshal(struct rpc_task *task, __be32 *p) | |||
166 | *p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid)); | 164 | *p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid)); |
167 | *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid)); | 165 | *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid)); |
168 | hold = p++; | 166 | hold = p++; |
169 | for (i = 0; i < 16 && gid_valid(cred->uc_gids[i]); i++) | 167 | for (i = 0; i < UNX_NGROUPS && gid_valid(cred->uc_gids[i]); i++) |
170 | *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i])); | 168 | *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i])); |
171 | *hold = htonl(p - hold - 1); /* gid array length */ | 169 | *hold = htonl(p - hold - 1); /* gid array length */ |
172 | *base = htonl((p - base - 1) << 2); /* cred length */ | 170 | *base = htonl((p - base - 1) << 2); /* cred length */ |
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index d8639da06d9c..79d55d949d9a 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c | |||
@@ -728,7 +728,7 @@ void cache_clean_deferred(void *owner) | |||
728 | /* | 728 | /* |
729 | * communicate with user-space | 729 | * communicate with user-space |
730 | * | 730 | * |
731 | * We have a magic /proc file - /proc/sunrpc/<cachename>/channel. | 731 | * We have a magic /proc file - /proc/net/rpc/<cachename>/channel. |
732 | * On read, you get a full request, or block. | 732 | * On read, you get a full request, or block. |
733 | * On write, an update request is processed. | 733 | * On write, an update request is processed. |
734 | * Poll works if anything to read, and always allows write. | 734 | * Poll works if anything to read, and always allows write. |
@@ -1283,7 +1283,7 @@ EXPORT_SYMBOL_GPL(qword_get); | |||
1283 | 1283 | ||
1284 | 1284 | ||
1285 | /* | 1285 | /* |
1286 | * support /proc/sunrpc/cache/$CACHENAME/content | 1286 | * support /proc/net/rpc/$CACHENAME/content |
1287 | * as a seqfile. | 1287 | * as a seqfile. |
1288 | * We call ->cache_show passing NULL for the item to | 1288 | * We call ->cache_show passing NULL for the item to |
1289 | * get a header, then pass each real item in the cache | 1289 | * get a header, then pass each real item in the cache |
@@ -1438,20 +1438,11 @@ static ssize_t read_flush(struct file *file, char __user *buf, | |||
1438 | struct cache_detail *cd) | 1438 | struct cache_detail *cd) |
1439 | { | 1439 | { |
1440 | char tbuf[22]; | 1440 | char tbuf[22]; |
1441 | unsigned long p = *ppos; | ||
1442 | size_t len; | 1441 | size_t len; |
1443 | 1442 | ||
1444 | snprintf(tbuf, sizeof(tbuf), "%lu\n", convert_to_wallclock(cd->flush_time)); | 1443 | len = snprintf(tbuf, sizeof(tbuf), "%lu\n", |
1445 | len = strlen(tbuf); | 1444 | convert_to_wallclock(cd->flush_time)); |
1446 | if (p >= len) | 1445 | return simple_read_from_buffer(buf, count, ppos, tbuf, len); |
1447 | return 0; | ||
1448 | len -= p; | ||
1449 | if (len > count) | ||
1450 | len = count; | ||
1451 | if (copy_to_user(buf, (void*)(tbuf+p), len)) | ||
1452 | return -EFAULT; | ||
1453 | *ppos += len; | ||
1454 | return len; | ||
1455 | } | 1446 | } |
1456 | 1447 | ||
1457 | static ssize_t write_flush(struct file *file, const char __user *buf, | 1448 | static ssize_t write_flush(struct file *file, const char __user *buf, |
@@ -1611,21 +1602,12 @@ static const struct file_operations cache_flush_operations_procfs = { | |||
1611 | .llseek = no_llseek, | 1602 | .llseek = no_llseek, |
1612 | }; | 1603 | }; |
1613 | 1604 | ||
1614 | static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net) | 1605 | static void remove_cache_proc_entries(struct cache_detail *cd) |
1615 | { | 1606 | { |
1616 | struct sunrpc_net *sn; | 1607 | if (cd->procfs) { |
1617 | 1608 | proc_remove(cd->procfs); | |
1618 | if (cd->u.procfs.proc_ent == NULL) | 1609 | cd->procfs = NULL; |
1619 | return; | 1610 | } |
1620 | if (cd->u.procfs.flush_ent) | ||
1621 | remove_proc_entry("flush", cd->u.procfs.proc_ent); | ||
1622 | if (cd->u.procfs.channel_ent) | ||
1623 | remove_proc_entry("channel", cd->u.procfs.proc_ent); | ||
1624 | if (cd->u.procfs.content_ent) | ||
1625 | remove_proc_entry("content", cd->u.procfs.proc_ent); | ||
1626 | cd->u.procfs.proc_ent = NULL; | ||
1627 | sn = net_generic(net, sunrpc_net_id); | ||
1628 | remove_proc_entry(cd->name, sn->proc_net_rpc); | ||
1629 | } | 1611 | } |
1630 | 1612 | ||
1631 | #ifdef CONFIG_PROC_FS | 1613 | #ifdef CONFIG_PROC_FS |
@@ -1635,38 +1617,30 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) | |||
1635 | struct sunrpc_net *sn; | 1617 | struct sunrpc_net *sn; |
1636 | 1618 | ||
1637 | sn = net_generic(net, sunrpc_net_id); | 1619 | sn = net_generic(net, sunrpc_net_id); |
1638 | cd->u.procfs.proc_ent = proc_mkdir(cd->name, sn->proc_net_rpc); | 1620 | cd->procfs = proc_mkdir(cd->name, sn->proc_net_rpc); |
1639 | if (cd->u.procfs.proc_ent == NULL) | 1621 | if (cd->procfs == NULL) |
1640 | goto out_nomem; | 1622 | goto out_nomem; |
1641 | cd->u.procfs.channel_ent = NULL; | ||
1642 | cd->u.procfs.content_ent = NULL; | ||
1643 | 1623 | ||
1644 | p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR, | 1624 | p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR, |
1645 | cd->u.procfs.proc_ent, | 1625 | cd->procfs, &cache_flush_operations_procfs, cd); |
1646 | &cache_flush_operations_procfs, cd); | ||
1647 | cd->u.procfs.flush_ent = p; | ||
1648 | if (p == NULL) | 1626 | if (p == NULL) |
1649 | goto out_nomem; | 1627 | goto out_nomem; |
1650 | 1628 | ||
1651 | if (cd->cache_request || cd->cache_parse) { | 1629 | if (cd->cache_request || cd->cache_parse) { |
1652 | p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, | 1630 | p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, |
1653 | cd->u.procfs.proc_ent, | 1631 | cd->procfs, &cache_file_operations_procfs, cd); |
1654 | &cache_file_operations_procfs, cd); | ||
1655 | cd->u.procfs.channel_ent = p; | ||
1656 | if (p == NULL) | 1632 | if (p == NULL) |
1657 | goto out_nomem; | 1633 | goto out_nomem; |
1658 | } | 1634 | } |
1659 | if (cd->cache_show) { | 1635 | if (cd->cache_show) { |
1660 | p = proc_create_data("content", S_IFREG|S_IRUSR, | 1636 | p = proc_create_data("content", S_IFREG|S_IRUSR, |
1661 | cd->u.procfs.proc_ent, | 1637 | cd->procfs, &content_file_operations_procfs, cd); |
1662 | &content_file_operations_procfs, cd); | ||
1663 | cd->u.procfs.content_ent = p; | ||
1664 | if (p == NULL) | 1638 | if (p == NULL) |
1665 | goto out_nomem; | 1639 | goto out_nomem; |
1666 | } | 1640 | } |
1667 | return 0; | 1641 | return 0; |
1668 | out_nomem: | 1642 | out_nomem: |
1669 | remove_cache_proc_entries(cd, net); | 1643 | remove_cache_proc_entries(cd); |
1670 | return -ENOMEM; | 1644 | return -ENOMEM; |
1671 | } | 1645 | } |
1672 | #else /* CONFIG_PROC_FS */ | 1646 | #else /* CONFIG_PROC_FS */ |
@@ -1695,7 +1669,7 @@ EXPORT_SYMBOL_GPL(cache_register_net); | |||
1695 | 1669 | ||
1696 | void cache_unregister_net(struct cache_detail *cd, struct net *net) | 1670 | void cache_unregister_net(struct cache_detail *cd, struct net *net) |
1697 | { | 1671 | { |
1698 | remove_cache_proc_entries(cd, net); | 1672 | remove_cache_proc_entries(cd); |
1699 | sunrpc_destroy_cache_detail(cd); | 1673 | sunrpc_destroy_cache_detail(cd); |
1700 | } | 1674 | } |
1701 | EXPORT_SYMBOL_GPL(cache_unregister_net); | 1675 | EXPORT_SYMBOL_GPL(cache_unregister_net); |
@@ -1854,15 +1828,17 @@ int sunrpc_cache_register_pipefs(struct dentry *parent, | |||
1854 | struct dentry *dir = rpc_create_cache_dir(parent, name, umode, cd); | 1828 | struct dentry *dir = rpc_create_cache_dir(parent, name, umode, cd); |
1855 | if (IS_ERR(dir)) | 1829 | if (IS_ERR(dir)) |
1856 | return PTR_ERR(dir); | 1830 | return PTR_ERR(dir); |
1857 | cd->u.pipefs.dir = dir; | 1831 | cd->pipefs = dir; |
1858 | return 0; | 1832 | return 0; |
1859 | } | 1833 | } |
1860 | EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs); | 1834 | EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs); |
1861 | 1835 | ||
1862 | void sunrpc_cache_unregister_pipefs(struct cache_detail *cd) | 1836 | void sunrpc_cache_unregister_pipefs(struct cache_detail *cd) |
1863 | { | 1837 | { |
1864 | rpc_remove_cache_dir(cd->u.pipefs.dir); | 1838 | if (cd->pipefs) { |
1865 | cd->u.pipefs.dir = NULL; | 1839 | rpc_remove_cache_dir(cd->pipefs); |
1840 | cd->pipefs = NULL; | ||
1841 | } | ||
1866 | } | 1842 | } |
1867 | EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs); | 1843 | EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs); |
1868 | 1844 | ||
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 1dc9f3bac099..52da3ce54bb5 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
@@ -1453,21 +1453,6 @@ size_t rpc_max_bc_payload(struct rpc_clnt *clnt) | |||
1453 | EXPORT_SYMBOL_GPL(rpc_max_bc_payload); | 1453 | EXPORT_SYMBOL_GPL(rpc_max_bc_payload); |
1454 | 1454 | ||
1455 | /** | 1455 | /** |
1456 | * rpc_get_timeout - Get timeout for transport in units of HZ | ||
1457 | * @clnt: RPC client to query | ||
1458 | */ | ||
1459 | unsigned long rpc_get_timeout(struct rpc_clnt *clnt) | ||
1460 | { | ||
1461 | unsigned long ret; | ||
1462 | |||
1463 | rcu_read_lock(); | ||
1464 | ret = rcu_dereference(clnt->cl_xprt)->timeout->to_initval; | ||
1465 | rcu_read_unlock(); | ||
1466 | return ret; | ||
1467 | } | ||
1468 | EXPORT_SYMBOL_GPL(rpc_get_timeout); | ||
1469 | |||
1470 | /** | ||
1471 | * rpc_force_rebind - force transport to check that remote port is unchanged | 1456 | * rpc_force_rebind - force transport to check that remote port is unchanged |
1472 | * @clnt: client to rebind | 1457 | * @clnt: client to rebind |
1473 | * | 1458 | * |
@@ -2699,6 +2684,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, | |||
2699 | { | 2684 | { |
2700 | struct rpc_xprt_switch *xps; | 2685 | struct rpc_xprt_switch *xps; |
2701 | struct rpc_xprt *xprt; | 2686 | struct rpc_xprt *xprt; |
2687 | unsigned long connect_timeout; | ||
2702 | unsigned long reconnect_timeout; | 2688 | unsigned long reconnect_timeout; |
2703 | unsigned char resvport; | 2689 | unsigned char resvport; |
2704 | int ret = 0; | 2690 | int ret = 0; |
@@ -2711,6 +2697,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, | |||
2711 | return -EAGAIN; | 2697 | return -EAGAIN; |
2712 | } | 2698 | } |
2713 | resvport = xprt->resvport; | 2699 | resvport = xprt->resvport; |
2700 | connect_timeout = xprt->connect_timeout; | ||
2714 | reconnect_timeout = xprt->max_reconnect_timeout; | 2701 | reconnect_timeout = xprt->max_reconnect_timeout; |
2715 | rcu_read_unlock(); | 2702 | rcu_read_unlock(); |
2716 | 2703 | ||
@@ -2720,7 +2707,10 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, | |||
2720 | goto out_put_switch; | 2707 | goto out_put_switch; |
2721 | } | 2708 | } |
2722 | xprt->resvport = resvport; | 2709 | xprt->resvport = resvport; |
2723 | xprt->max_reconnect_timeout = reconnect_timeout; | 2710 | if (xprt->ops->set_connect_timeout != NULL) |
2711 | xprt->ops->set_connect_timeout(xprt, | ||
2712 | connect_timeout, | ||
2713 | reconnect_timeout); | ||
2724 | 2714 | ||
2725 | rpc_xprt_switch_set_roundrobin(xps); | 2715 | rpc_xprt_switch_set_roundrobin(xps); |
2726 | if (setup) { | 2716 | if (setup) { |
@@ -2737,26 +2727,39 @@ out_put_switch: | |||
2737 | } | 2727 | } |
2738 | EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt); | 2728 | EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt); |
2739 | 2729 | ||
2730 | struct connect_timeout_data { | ||
2731 | unsigned long connect_timeout; | ||
2732 | unsigned long reconnect_timeout; | ||
2733 | }; | ||
2734 | |||
2740 | static int | 2735 | static int |
2741 | rpc_xprt_cap_max_reconnect_timeout(struct rpc_clnt *clnt, | 2736 | rpc_xprt_set_connect_timeout(struct rpc_clnt *clnt, |
2742 | struct rpc_xprt *xprt, | 2737 | struct rpc_xprt *xprt, |
2743 | void *data) | 2738 | void *data) |
2744 | { | 2739 | { |
2745 | unsigned long timeout = *((unsigned long *)data); | 2740 | struct connect_timeout_data *timeo = data; |
2746 | 2741 | ||
2747 | if (timeout < xprt->max_reconnect_timeout) | 2742 | if (xprt->ops->set_connect_timeout) |
2748 | xprt->max_reconnect_timeout = timeout; | 2743 | xprt->ops->set_connect_timeout(xprt, |
2744 | timeo->connect_timeout, | ||
2745 | timeo->reconnect_timeout); | ||
2749 | return 0; | 2746 | return 0; |
2750 | } | 2747 | } |
2751 | 2748 | ||
2752 | void | 2749 | void |
2753 | rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo) | 2750 | rpc_set_connect_timeout(struct rpc_clnt *clnt, |
2751 | unsigned long connect_timeout, | ||
2752 | unsigned long reconnect_timeout) | ||
2754 | { | 2753 | { |
2754 | struct connect_timeout_data timeout = { | ||
2755 | .connect_timeout = connect_timeout, | ||
2756 | .reconnect_timeout = reconnect_timeout, | ||
2757 | }; | ||
2755 | rpc_clnt_iterate_for_each_xprt(clnt, | 2758 | rpc_clnt_iterate_for_each_xprt(clnt, |
2756 | rpc_xprt_cap_max_reconnect_timeout, | 2759 | rpc_xprt_set_connect_timeout, |
2757 | &timeo); | 2760 | &timeout); |
2758 | } | 2761 | } |
2759 | EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout); | 2762 | EXPORT_SYMBOL_GPL(rpc_set_connect_timeout); |
2760 | 2763 | ||
2761 | void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt) | 2764 | void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt) |
2762 | { | 2765 | { |
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index e7b4d93566df..c8fd0b6c1618 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c | |||
@@ -16,11 +16,6 @@ static struct dentry *rpc_xprt_dir; | |||
16 | 16 | ||
17 | unsigned int rpc_inject_disconnect; | 17 | unsigned int rpc_inject_disconnect; |
18 | 18 | ||
19 | struct rpc_clnt_iter { | ||
20 | struct rpc_clnt *clnt; | ||
21 | loff_t pos; | ||
22 | }; | ||
23 | |||
24 | static int | 19 | static int |
25 | tasks_show(struct seq_file *f, void *v) | 20 | tasks_show(struct seq_file *f, void *v) |
26 | { | 21 | { |
@@ -47,12 +42,10 @@ static void * | |||
47 | tasks_start(struct seq_file *f, loff_t *ppos) | 42 | tasks_start(struct seq_file *f, loff_t *ppos) |
48 | __acquires(&clnt->cl_lock) | 43 | __acquires(&clnt->cl_lock) |
49 | { | 44 | { |
50 | struct rpc_clnt_iter *iter = f->private; | 45 | struct rpc_clnt *clnt = f->private; |
51 | loff_t pos = *ppos; | 46 | loff_t pos = *ppos; |
52 | struct rpc_clnt *clnt = iter->clnt; | ||
53 | struct rpc_task *task; | 47 | struct rpc_task *task; |
54 | 48 | ||
55 | iter->pos = pos + 1; | ||
56 | spin_lock(&clnt->cl_lock); | 49 | spin_lock(&clnt->cl_lock); |
57 | list_for_each_entry(task, &clnt->cl_tasks, tk_task) | 50 | list_for_each_entry(task, &clnt->cl_tasks, tk_task) |
58 | if (pos-- == 0) | 51 | if (pos-- == 0) |
@@ -63,12 +56,10 @@ tasks_start(struct seq_file *f, loff_t *ppos) | |||
63 | static void * | 56 | static void * |
64 | tasks_next(struct seq_file *f, void *v, loff_t *pos) | 57 | tasks_next(struct seq_file *f, void *v, loff_t *pos) |
65 | { | 58 | { |
66 | struct rpc_clnt_iter *iter = f->private; | 59 | struct rpc_clnt *clnt = f->private; |
67 | struct rpc_clnt *clnt = iter->clnt; | ||
68 | struct rpc_task *task = v; | 60 | struct rpc_task *task = v; |
69 | struct list_head *next = task->tk_task.next; | 61 | struct list_head *next = task->tk_task.next; |
70 | 62 | ||
71 | ++iter->pos; | ||
72 | ++*pos; | 63 | ++*pos; |
73 | 64 | ||
74 | /* If there's another task on list, return it */ | 65 | /* If there's another task on list, return it */ |
@@ -81,9 +72,7 @@ static void | |||
81 | tasks_stop(struct seq_file *f, void *v) | 72 | tasks_stop(struct seq_file *f, void *v) |
82 | __releases(&clnt->cl_lock) | 73 | __releases(&clnt->cl_lock) |
83 | { | 74 | { |
84 | struct rpc_clnt_iter *iter = f->private; | 75 | struct rpc_clnt *clnt = f->private; |
85 | struct rpc_clnt *clnt = iter->clnt; | ||
86 | |||
87 | spin_unlock(&clnt->cl_lock); | 76 | spin_unlock(&clnt->cl_lock); |
88 | } | 77 | } |
89 | 78 | ||
@@ -96,17 +85,13 @@ static const struct seq_operations tasks_seq_operations = { | |||
96 | 85 | ||
97 | static int tasks_open(struct inode *inode, struct file *filp) | 86 | static int tasks_open(struct inode *inode, struct file *filp) |
98 | { | 87 | { |
99 | int ret = seq_open_private(filp, &tasks_seq_operations, | 88 | int ret = seq_open(filp, &tasks_seq_operations); |
100 | sizeof(struct rpc_clnt_iter)); | ||
101 | |||
102 | if (!ret) { | 89 | if (!ret) { |
103 | struct seq_file *seq = filp->private_data; | 90 | struct seq_file *seq = filp->private_data; |
104 | struct rpc_clnt_iter *iter = seq->private; | 91 | struct rpc_clnt *clnt = seq->private = inode->i_private; |
105 | |||
106 | iter->clnt = inode->i_private; | ||
107 | 92 | ||
108 | if (!atomic_inc_not_zero(&iter->clnt->cl_count)) { | 93 | if (!atomic_inc_not_zero(&clnt->cl_count)) { |
109 | seq_release_private(inode, filp); | 94 | seq_release(inode, filp); |
110 | ret = -EINVAL; | 95 | ret = -EINVAL; |
111 | } | 96 | } |
112 | } | 97 | } |
@@ -118,10 +103,10 @@ static int | |||
118 | tasks_release(struct inode *inode, struct file *filp) | 103 | tasks_release(struct inode *inode, struct file *filp) |
119 | { | 104 | { |
120 | struct seq_file *seq = filp->private_data; | 105 | struct seq_file *seq = filp->private_data; |
121 | struct rpc_clnt_iter *iter = seq->private; | 106 | struct rpc_clnt *clnt = seq->private; |
122 | 107 | ||
123 | rpc_release_client(iter->clnt); | 108 | rpc_release_client(clnt); |
124 | return seq_release_private(inode, filp); | 109 | return seq_release(inode, filp); |
125 | } | 110 | } |
126 | 111 | ||
127 | static const struct file_operations tasks_fops = { | 112 | static const struct file_operations tasks_fops = { |
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 64af4f034de6..f81eaa8e0888 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c | |||
@@ -403,7 +403,7 @@ svcauth_unix_info_release(struct svc_xprt *xpt) | |||
403 | /**************************************************************************** | 403 | /**************************************************************************** |
404 | * auth.unix.gid cache | 404 | * auth.unix.gid cache |
405 | * simple cache to map a UID to a list of GIDs | 405 | * simple cache to map a UID to a list of GIDs |
406 | * because AUTH_UNIX aka AUTH_SYS has a max of 16 | 406 | * because AUTH_UNIX aka AUTH_SYS has a max of UNX_NGROUPS |
407 | */ | 407 | */ |
408 | #define GID_HASHBITS 8 | 408 | #define GID_HASHBITS 8 |
409 | #define GID_HASHMAX (1<<GID_HASHBITS) | 409 | #define GID_HASHMAX (1<<GID_HASHBITS) |
@@ -810,7 +810,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) | |||
810 | cred->cr_uid = make_kuid(&init_user_ns, svc_getnl(argv)); /* uid */ | 810 | cred->cr_uid = make_kuid(&init_user_ns, svc_getnl(argv)); /* uid */ |
811 | cred->cr_gid = make_kgid(&init_user_ns, svc_getnl(argv)); /* gid */ | 811 | cred->cr_gid = make_kgid(&init_user_ns, svc_getnl(argv)); /* gid */ |
812 | slen = svc_getnl(argv); /* gids length */ | 812 | slen = svc_getnl(argv); /* gids length */ |
813 | if (slen > 16 || (len -= (slen + 2)*4) < 0) | 813 | if (slen > UNX_NGROUPS || (len -= (slen + 2)*4) < 0) |
814 | goto badcred; | 814 | goto badcred; |
815 | cred->cr_group_info = groups_alloc(slen); | 815 | cred->cr_group_info = groups_alloc(slen); |
816 | if (cred->cr_group_info == NULL) | 816 | if (cred->cr_group_info == NULL) |
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 7f1071e103ca..1f7082144e01 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c | |||
@@ -1518,3 +1518,37 @@ out: | |||
1518 | } | 1518 | } |
1519 | EXPORT_SYMBOL_GPL(xdr_process_buf); | 1519 | EXPORT_SYMBOL_GPL(xdr_process_buf); |
1520 | 1520 | ||
1521 | /** | ||
1522 | * xdr_stream_decode_string_dup - Decode and duplicate variable length string | ||
1523 | * @xdr: pointer to xdr_stream | ||
1524 | * @str: location to store pointer to string | ||
1525 | * @maxlen: maximum acceptable string length | ||
1526 | * @gfp_flags: GFP mask to use | ||
1527 | * | ||
1528 | * Return values: | ||
1529 | * On success, returns length of NUL-terminated string stored in *@ptr | ||
1530 | * %-EBADMSG on XDR buffer overflow | ||
1531 | * %-EMSGSIZE if the size of the string would exceed @maxlen | ||
1532 | * %-ENOMEM on memory allocation failure | ||
1533 | */ | ||
1534 | ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, | ||
1535 | size_t maxlen, gfp_t gfp_flags) | ||
1536 | { | ||
1537 | void *p; | ||
1538 | ssize_t ret; | ||
1539 | |||
1540 | ret = xdr_stream_decode_opaque_inline(xdr, &p, maxlen); | ||
1541 | if (ret > 0) { | ||
1542 | char *s = kmalloc(ret + 1, gfp_flags); | ||
1543 | if (s != NULL) { | ||
1544 | memcpy(s, p, ret); | ||
1545 | s[ret] = '\0'; | ||
1546 | *str = s; | ||
1547 | return strlen(s); | ||
1548 | } | ||
1549 | ret = -ENOMEM; | ||
1550 | } | ||
1551 | *str = NULL; | ||
1552 | return ret; | ||
1553 | } | ||
1554 | EXPORT_SYMBOL_GPL(xdr_stream_decode_string_dup); | ||
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 9a6be030ca7d..b530a2852ba8 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
@@ -897,13 +897,11 @@ static void xprt_timer(struct rpc_task *task) | |||
897 | return; | 897 | return; |
898 | dprintk("RPC: %5u xprt_timer\n", task->tk_pid); | 898 | dprintk("RPC: %5u xprt_timer\n", task->tk_pid); |
899 | 899 | ||
900 | spin_lock_bh(&xprt->transport_lock); | ||
901 | if (!req->rq_reply_bytes_recvd) { | 900 | if (!req->rq_reply_bytes_recvd) { |
902 | if (xprt->ops->timer) | 901 | if (xprt->ops->timer) |
903 | xprt->ops->timer(xprt, task); | 902 | xprt->ops->timer(xprt, task); |
904 | } else | 903 | } else |
905 | task->tk_status = 0; | 904 | task->tk_status = 0; |
906 | spin_unlock_bh(&xprt->transport_lock); | ||
907 | } | 905 | } |
908 | 906 | ||
909 | /** | 907 | /** |
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 1ebb09e1ac4f..59e64025ed96 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c | |||
@@ -310,10 +310,7 @@ fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
310 | struct rpcrdma_mw *mw; | 310 | struct rpcrdma_mw *mw; |
311 | 311 | ||
312 | while (!list_empty(&req->rl_registered)) { | 312 | while (!list_empty(&req->rl_registered)) { |
313 | mw = list_first_entry(&req->rl_registered, | 313 | mw = rpcrdma_pop_mw(&req->rl_registered); |
314 | struct rpcrdma_mw, mw_list); | ||
315 | list_del_init(&mw->mw_list); | ||
316 | |||
317 | if (sync) | 314 | if (sync) |
318 | fmr_op_recover_mr(mw); | 315 | fmr_op_recover_mr(mw); |
319 | else | 316 | else |
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 47bed5333c7f..f81dd93176c0 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c | |||
@@ -466,8 +466,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | |||
466 | struct ib_send_wr *first, **prev, *last, *bad_wr; | 466 | struct ib_send_wr *first, **prev, *last, *bad_wr; |
467 | struct rpcrdma_rep *rep = req->rl_reply; | 467 | struct rpcrdma_rep *rep = req->rl_reply; |
468 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 468 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
469 | struct rpcrdma_mw *mw, *tmp; | ||
470 | struct rpcrdma_frmr *f; | 469 | struct rpcrdma_frmr *f; |
470 | struct rpcrdma_mw *mw; | ||
471 | int count, rc; | 471 | int count, rc; |
472 | 472 | ||
473 | dprintk("RPC: %s: req %p\n", __func__, req); | 473 | dprintk("RPC: %s: req %p\n", __func__, req); |
@@ -534,10 +534,10 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | |||
534 | * them to the free MW list. | 534 | * them to the free MW list. |
535 | */ | 535 | */ |
536 | unmap: | 536 | unmap: |
537 | list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) { | 537 | while (!list_empty(&req->rl_registered)) { |
538 | mw = rpcrdma_pop_mw(&req->rl_registered); | ||
538 | dprintk("RPC: %s: DMA unmapping frmr %p\n", | 539 | dprintk("RPC: %s: DMA unmapping frmr %p\n", |
539 | __func__, &mw->frmr); | 540 | __func__, &mw->frmr); |
540 | list_del_init(&mw->mw_list); | ||
541 | ib_dma_unmap_sg(ia->ri_device, | 541 | ib_dma_unmap_sg(ia->ri_device, |
542 | mw->mw_sg, mw->mw_nents, mw->mw_dir); | 542 | mw->mw_sg, mw->mw_nents, mw->mw_dir); |
543 | rpcrdma_put_mw(r_xprt, mw); | 543 | rpcrdma_put_mw(r_xprt, mw); |
@@ -571,10 +571,7 @@ frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
571 | struct rpcrdma_mw *mw; | 571 | struct rpcrdma_mw *mw; |
572 | 572 | ||
573 | while (!list_empty(&req->rl_registered)) { | 573 | while (!list_empty(&req->rl_registered)) { |
574 | mw = list_first_entry(&req->rl_registered, | 574 | mw = rpcrdma_pop_mw(&req->rl_registered); |
575 | struct rpcrdma_mw, mw_list); | ||
576 | list_del_init(&mw->mw_list); | ||
577 | |||
578 | if (sync) | 575 | if (sync) |
579 | frwr_op_recover_mr(mw); | 576 | frwr_op_recover_mr(mw); |
580 | else | 577 | else |
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index c52e0f2ffe52..a044be2d6ad7 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
@@ -125,14 +125,34 @@ void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt) | |||
125 | /* The client can send a request inline as long as the RPCRDMA header | 125 | /* The client can send a request inline as long as the RPCRDMA header |
126 | * plus the RPC call fit under the transport's inline limit. If the | 126 | * plus the RPC call fit under the transport's inline limit. If the |
127 | * combined call message size exceeds that limit, the client must use | 127 | * combined call message size exceeds that limit, the client must use |
128 | * the read chunk list for this operation. | 128 | * a Read chunk for this operation. |
129 | * | ||
130 | * A Read chunk is also required if sending the RPC call inline would | ||
131 | * exceed this device's max_sge limit. | ||
129 | */ | 132 | */ |
130 | static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, | 133 | static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, |
131 | struct rpc_rqst *rqst) | 134 | struct rpc_rqst *rqst) |
132 | { | 135 | { |
133 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 136 | struct xdr_buf *xdr = &rqst->rq_snd_buf; |
137 | unsigned int count, remaining, offset; | ||
138 | |||
139 | if (xdr->len > r_xprt->rx_ia.ri_max_inline_write) | ||
140 | return false; | ||
141 | |||
142 | if (xdr->page_len) { | ||
143 | remaining = xdr->page_len; | ||
144 | offset = xdr->page_base & ~PAGE_MASK; | ||
145 | count = 0; | ||
146 | while (remaining) { | ||
147 | remaining -= min_t(unsigned int, | ||
148 | PAGE_SIZE - offset, remaining); | ||
149 | offset = 0; | ||
150 | if (++count > r_xprt->rx_ia.ri_max_send_sges) | ||
151 | return false; | ||
152 | } | ||
153 | } | ||
134 | 154 | ||
135 | return rqst->rq_snd_buf.len <= ia->ri_max_inline_write; | 155 | return true; |
136 | } | 156 | } |
137 | 157 | ||
138 | /* The client can't know how large the actual reply will be. Thus it | 158 | /* The client can't know how large the actual reply will be. Thus it |
@@ -186,9 +206,9 @@ rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n) | |||
186 | */ | 206 | */ |
187 | 207 | ||
188 | static int | 208 | static int |
189 | rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | 209 | rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, |
190 | enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, | 210 | unsigned int pos, enum rpcrdma_chunktype type, |
191 | bool reminv_expected) | 211 | struct rpcrdma_mr_seg *seg) |
192 | { | 212 | { |
193 | int len, n, p, page_base; | 213 | int len, n, p, page_base; |
194 | struct page **ppages; | 214 | struct page **ppages; |
@@ -226,22 +246,21 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
226 | if (len && n == RPCRDMA_MAX_SEGS) | 246 | if (len && n == RPCRDMA_MAX_SEGS) |
227 | goto out_overflow; | 247 | goto out_overflow; |
228 | 248 | ||
229 | /* When encoding the read list, the tail is always sent inline */ | 249 | /* When encoding a Read chunk, the tail iovec contains an |
230 | if (type == rpcrdma_readch) | 250 | * XDR pad and may be omitted. |
251 | */ | ||
252 | if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup) | ||
231 | return n; | 253 | return n; |
232 | 254 | ||
233 | /* When encoding the Write list, some servers need to see an extra | 255 | /* When encoding a Write chunk, some servers need to see an |
234 | * segment for odd-length Write chunks. The upper layer provides | 256 | * extra segment for non-XDR-aligned Write chunks. The upper |
235 | * space in the tail iovec for this purpose. | 257 | * layer provides space in the tail iovec that may be used |
258 | * for this purpose. | ||
236 | */ | 259 | */ |
237 | if (type == rpcrdma_writech && reminv_expected) | 260 | if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup) |
238 | return n; | 261 | return n; |
239 | 262 | ||
240 | if (xdrbuf->tail[0].iov_len) { | 263 | if (xdrbuf->tail[0].iov_len) { |
241 | /* the rpcrdma protocol allows us to omit any trailing | ||
242 | * xdr pad bytes, saving the server an RDMA operation. */ | ||
243 | if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) | ||
244 | return n; | ||
245 | n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n); | 264 | n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n); |
246 | if (n == RPCRDMA_MAX_SEGS) | 265 | if (n == RPCRDMA_MAX_SEGS) |
247 | goto out_overflow; | 266 | goto out_overflow; |
@@ -293,7 +312,8 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, | |||
293 | if (rtype == rpcrdma_areadch) | 312 | if (rtype == rpcrdma_areadch) |
294 | pos = 0; | 313 | pos = 0; |
295 | seg = req->rl_segments; | 314 | seg = req->rl_segments; |
296 | nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false); | 315 | nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos, |
316 | rtype, seg); | ||
297 | if (nsegs < 0) | 317 | if (nsegs < 0) |
298 | return ERR_PTR(nsegs); | 318 | return ERR_PTR(nsegs); |
299 | 319 | ||
@@ -302,7 +322,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, | |||
302 | false, &mw); | 322 | false, &mw); |
303 | if (n < 0) | 323 | if (n < 0) |
304 | return ERR_PTR(n); | 324 | return ERR_PTR(n); |
305 | list_add(&mw->mw_list, &req->rl_registered); | 325 | rpcrdma_push_mw(mw, &req->rl_registered); |
306 | 326 | ||
307 | *iptr++ = xdr_one; /* item present */ | 327 | *iptr++ = xdr_one; /* item present */ |
308 | 328 | ||
@@ -355,10 +375,9 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
355 | } | 375 | } |
356 | 376 | ||
357 | seg = req->rl_segments; | 377 | seg = req->rl_segments; |
358 | nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, | 378 | nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, |
359 | rqst->rq_rcv_buf.head[0].iov_len, | 379 | rqst->rq_rcv_buf.head[0].iov_len, |
360 | wtype, seg, | 380 | wtype, seg); |
361 | r_xprt->rx_ia.ri_reminv_expected); | ||
362 | if (nsegs < 0) | 381 | if (nsegs < 0) |
363 | return ERR_PTR(nsegs); | 382 | return ERR_PTR(nsegs); |
364 | 383 | ||
@@ -371,7 +390,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
371 | true, &mw); | 390 | true, &mw); |
372 | if (n < 0) | 391 | if (n < 0) |
373 | return ERR_PTR(n); | 392 | return ERR_PTR(n); |
374 | list_add(&mw->mw_list, &req->rl_registered); | 393 | rpcrdma_push_mw(mw, &req->rl_registered); |
375 | 394 | ||
376 | iptr = xdr_encode_rdma_segment(iptr, mw); | 395 | iptr = xdr_encode_rdma_segment(iptr, mw); |
377 | 396 | ||
@@ -423,8 +442,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, | |||
423 | } | 442 | } |
424 | 443 | ||
425 | seg = req->rl_segments; | 444 | seg = req->rl_segments; |
426 | nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg, | 445 | nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg); |
427 | r_xprt->rx_ia.ri_reminv_expected); | ||
428 | if (nsegs < 0) | 446 | if (nsegs < 0) |
429 | return ERR_PTR(nsegs); | 447 | return ERR_PTR(nsegs); |
430 | 448 | ||
@@ -437,7 +455,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, | |||
437 | true, &mw); | 455 | true, &mw); |
438 | if (n < 0) | 456 | if (n < 0) |
439 | return ERR_PTR(n); | 457 | return ERR_PTR(n); |
440 | list_add(&mw->mw_list, &req->rl_registered); | 458 | rpcrdma_push_mw(mw, &req->rl_registered); |
441 | 459 | ||
442 | iptr = xdr_encode_rdma_segment(iptr, mw); | 460 | iptr = xdr_encode_rdma_segment(iptr, mw); |
443 | 461 | ||
@@ -741,13 +759,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
741 | iptr = headerp->rm_body.rm_chunks; | 759 | iptr = headerp->rm_body.rm_chunks; |
742 | iptr = rpcrdma_encode_read_list(r_xprt, req, rqst, iptr, rtype); | 760 | iptr = rpcrdma_encode_read_list(r_xprt, req, rqst, iptr, rtype); |
743 | if (IS_ERR(iptr)) | 761 | if (IS_ERR(iptr)) |
744 | goto out_unmap; | 762 | goto out_err; |
745 | iptr = rpcrdma_encode_write_list(r_xprt, req, rqst, iptr, wtype); | 763 | iptr = rpcrdma_encode_write_list(r_xprt, req, rqst, iptr, wtype); |
746 | if (IS_ERR(iptr)) | 764 | if (IS_ERR(iptr)) |
747 | goto out_unmap; | 765 | goto out_err; |
748 | iptr = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, iptr, wtype); | 766 | iptr = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, iptr, wtype); |
749 | if (IS_ERR(iptr)) | 767 | if (IS_ERR(iptr)) |
750 | goto out_unmap; | 768 | goto out_err; |
751 | hdrlen = (unsigned char *)iptr - (unsigned char *)headerp; | 769 | hdrlen = (unsigned char *)iptr - (unsigned char *)headerp; |
752 | 770 | ||
753 | dprintk("RPC: %5u %s: %s/%s: hdrlen %zd rpclen %zd\n", | 771 | dprintk("RPC: %5u %s: %s/%s: hdrlen %zd rpclen %zd\n", |
@@ -758,12 +776,14 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
758 | if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen, | 776 | if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen, |
759 | &rqst->rq_snd_buf, rtype)) { | 777 | &rqst->rq_snd_buf, rtype)) { |
760 | iptr = ERR_PTR(-EIO); | 778 | iptr = ERR_PTR(-EIO); |
761 | goto out_unmap; | 779 | goto out_err; |
762 | } | 780 | } |
763 | return 0; | 781 | return 0; |
764 | 782 | ||
765 | out_unmap: | 783 | out_err: |
766 | r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); | 784 | pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n", |
785 | PTR_ERR(iptr)); | ||
786 | r_xprt->rx_stats.failed_marshal_count++; | ||
767 | return PTR_ERR(iptr); | 787 | return PTR_ERR(iptr); |
768 | } | 788 | } |
769 | 789 | ||
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 534c178d2a7e..c717f5410776 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -67,7 +67,7 @@ unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; | |||
67 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; | 67 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; |
68 | static unsigned int xprt_rdma_inline_write_padding; | 68 | static unsigned int xprt_rdma_inline_write_padding; |
69 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; | 69 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; |
70 | int xprt_rdma_pad_optimize = 1; | 70 | int xprt_rdma_pad_optimize = 0; |
71 | 71 | ||
72 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 72 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
73 | 73 | ||
@@ -709,10 +709,6 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
709 | return 0; | 709 | return 0; |
710 | 710 | ||
711 | failed_marshal: | 711 | failed_marshal: |
712 | dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n", | ||
713 | __func__, rc); | ||
714 | if (rc == -EIO) | ||
715 | r_xprt->rx_stats.failed_marshal_count++; | ||
716 | if (rc != -ENOTCONN) | 712 | if (rc != -ENOTCONN) |
717 | return rc; | 713 | return rc; |
718 | drop_connection: | 714 | drop_connection: |
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 11d07748f699..81cd31acf690 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -54,6 +54,7 @@ | |||
54 | #include <linux/sunrpc/svc_rdma.h> | 54 | #include <linux/sunrpc/svc_rdma.h> |
55 | #include <asm/bitops.h> | 55 | #include <asm/bitops.h> |
56 | #include <linux/module.h> /* try_module_get()/module_put() */ | 56 | #include <linux/module.h> /* try_module_get()/module_put() */ |
57 | #include <rdma/ib_cm.h> | ||
57 | 58 | ||
58 | #include "xprt_rdma.h" | 59 | #include "xprt_rdma.h" |
59 | 60 | ||
@@ -208,6 +209,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, | |||
208 | 209 | ||
209 | /* Default settings for RPC-over-RDMA Version One */ | 210 | /* Default settings for RPC-over-RDMA Version One */ |
210 | r_xprt->rx_ia.ri_reminv_expected = false; | 211 | r_xprt->rx_ia.ri_reminv_expected = false; |
212 | r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize; | ||
211 | rsize = RPCRDMA_V1_DEF_INLINE_SIZE; | 213 | rsize = RPCRDMA_V1_DEF_INLINE_SIZE; |
212 | wsize = RPCRDMA_V1_DEF_INLINE_SIZE; | 214 | wsize = RPCRDMA_V1_DEF_INLINE_SIZE; |
213 | 215 | ||
@@ -215,6 +217,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, | |||
215 | pmsg->cp_magic == rpcrdma_cmp_magic && | 217 | pmsg->cp_magic == rpcrdma_cmp_magic && |
216 | pmsg->cp_version == RPCRDMA_CMP_VERSION) { | 218 | pmsg->cp_version == RPCRDMA_CMP_VERSION) { |
217 | r_xprt->rx_ia.ri_reminv_expected = true; | 219 | r_xprt->rx_ia.ri_reminv_expected = true; |
220 | r_xprt->rx_ia.ri_implicit_roundup = true; | ||
218 | rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); | 221 | rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); |
219 | wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); | 222 | wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); |
220 | } | 223 | } |
@@ -277,7 +280,14 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) | |||
277 | connstate = -ENETDOWN; | 280 | connstate = -ENETDOWN; |
278 | goto connected; | 281 | goto connected; |
279 | case RDMA_CM_EVENT_REJECTED: | 282 | case RDMA_CM_EVENT_REJECTED: |
283 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
284 | pr_info("rpcrdma: connection to %pIS:%u on %s rejected: %s\n", | ||
285 | sap, rpc_get_port(sap), ia->ri_device->name, | ||
286 | rdma_reject_msg(id, event->status)); | ||
287 | #endif | ||
280 | connstate = -ECONNREFUSED; | 288 | connstate = -ECONNREFUSED; |
289 | if (event->status == IB_CM_REJ_STALE_CONN) | ||
290 | connstate = -EAGAIN; | ||
281 | goto connected; | 291 | goto connected; |
282 | case RDMA_CM_EVENT_DISCONNECTED: | 292 | case RDMA_CM_EVENT_DISCONNECTED: |
283 | connstate = -ECONNABORTED; | 293 | connstate = -ECONNABORTED; |
@@ -486,18 +496,19 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) | |||
486 | */ | 496 | */ |
487 | int | 497 | int |
488 | rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | 498 | rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, |
489 | struct rpcrdma_create_data_internal *cdata) | 499 | struct rpcrdma_create_data_internal *cdata) |
490 | { | 500 | { |
491 | struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; | 501 | struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; |
502 | unsigned int max_qp_wr, max_sge; | ||
492 | struct ib_cq *sendcq, *recvcq; | 503 | struct ib_cq *sendcq, *recvcq; |
493 | unsigned int max_qp_wr; | ||
494 | int rc; | 504 | int rc; |
495 | 505 | ||
496 | if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_SEND_SGES) { | 506 | max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES); |
497 | dprintk("RPC: %s: insufficient sge's available\n", | 507 | if (max_sge < RPCRDMA_MIN_SEND_SGES) { |
498 | __func__); | 508 | pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge); |
499 | return -ENOMEM; | 509 | return -ENOMEM; |
500 | } | 510 | } |
511 | ia->ri_max_send_sges = max_sge - RPCRDMA_MIN_SEND_SGES; | ||
501 | 512 | ||
502 | if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) { | 513 | if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) { |
503 | dprintk("RPC: %s: insufficient wqe's available\n", | 514 | dprintk("RPC: %s: insufficient wqe's available\n", |
@@ -522,7 +533,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
522 | ep->rep_attr.cap.max_recv_wr = cdata->max_requests; | 533 | ep->rep_attr.cap.max_recv_wr = cdata->max_requests; |
523 | ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; | 534 | ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; |
524 | ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */ | 535 | ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */ |
525 | ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_SEND_SGES; | 536 | ep->rep_attr.cap.max_send_sge = max_sge; |
526 | ep->rep_attr.cap.max_recv_sge = 1; | 537 | ep->rep_attr.cap.max_recv_sge = 1; |
527 | ep->rep_attr.cap.max_inline_data = 0; | 538 | ep->rep_attr.cap.max_inline_data = 0; |
528 | ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; | 539 | ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; |
@@ -640,20 +651,21 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
640 | int | 651 | int |
641 | rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | 652 | rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) |
642 | { | 653 | { |
654 | struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, | ||
655 | rx_ia); | ||
643 | struct rdma_cm_id *id, *old; | 656 | struct rdma_cm_id *id, *old; |
657 | struct sockaddr *sap; | ||
658 | unsigned int extras; | ||
644 | int rc = 0; | 659 | int rc = 0; |
645 | int retry_count = 0; | ||
646 | 660 | ||
647 | if (ep->rep_connected != 0) { | 661 | if (ep->rep_connected != 0) { |
648 | struct rpcrdma_xprt *xprt; | ||
649 | retry: | 662 | retry: |
650 | dprintk("RPC: %s: reconnecting...\n", __func__); | 663 | dprintk("RPC: %s: reconnecting...\n", __func__); |
651 | 664 | ||
652 | rpcrdma_ep_disconnect(ep, ia); | 665 | rpcrdma_ep_disconnect(ep, ia); |
653 | 666 | ||
654 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); | 667 | sap = (struct sockaddr *)&r_xprt->rx_data.addr; |
655 | id = rpcrdma_create_id(xprt, ia, | 668 | id = rpcrdma_create_id(r_xprt, ia, sap); |
656 | (struct sockaddr *)&xprt->rx_data.addr); | ||
657 | if (IS_ERR(id)) { | 669 | if (IS_ERR(id)) { |
658 | rc = -EHOSTUNREACH; | 670 | rc = -EHOSTUNREACH; |
659 | goto out; | 671 | goto out; |
@@ -708,51 +720,18 @@ retry: | |||
708 | } | 720 | } |
709 | 721 | ||
710 | wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); | 722 | wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); |
711 | |||
712 | /* | ||
713 | * Check state. A non-peer reject indicates no listener | ||
714 | * (ECONNREFUSED), which may be a transient state. All | ||
715 | * others indicate a transport condition which has already | ||
716 | * undergone a best-effort. | ||
717 | */ | ||
718 | if (ep->rep_connected == -ECONNREFUSED && | ||
719 | ++retry_count <= RDMA_CONNECT_RETRY_MAX) { | ||
720 | dprintk("RPC: %s: non-peer_reject, retry\n", __func__); | ||
721 | goto retry; | ||
722 | } | ||
723 | if (ep->rep_connected <= 0) { | 723 | if (ep->rep_connected <= 0) { |
724 | /* Sometimes, the only way to reliably connect to remote | 724 | if (ep->rep_connected == -EAGAIN) |
725 | * CMs is to use same nonzero values for ORD and IRD. */ | ||
726 | if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 && | ||
727 | (ep->rep_remote_cma.responder_resources == 0 || | ||
728 | ep->rep_remote_cma.initiator_depth != | ||
729 | ep->rep_remote_cma.responder_resources)) { | ||
730 | if (ep->rep_remote_cma.responder_resources == 0) | ||
731 | ep->rep_remote_cma.responder_resources = 1; | ||
732 | ep->rep_remote_cma.initiator_depth = | ||
733 | ep->rep_remote_cma.responder_resources; | ||
734 | goto retry; | 725 | goto retry; |
735 | } | ||
736 | rc = ep->rep_connected; | 726 | rc = ep->rep_connected; |
737 | } else { | 727 | goto out; |
738 | struct rpcrdma_xprt *r_xprt; | ||
739 | unsigned int extras; | ||
740 | |||
741 | dprintk("RPC: %s: connected\n", __func__); | ||
742 | |||
743 | r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); | ||
744 | extras = r_xprt->rx_buf.rb_bc_srv_max_requests; | ||
745 | |||
746 | if (extras) { | ||
747 | rc = rpcrdma_ep_post_extra_recv(r_xprt, extras); | ||
748 | if (rc) { | ||
749 | pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n", | ||
750 | __func__, rc); | ||
751 | rc = 0; | ||
752 | } | ||
753 | } | ||
754 | } | 728 | } |
755 | 729 | ||
730 | dprintk("RPC: %s: connected\n", __func__); | ||
731 | extras = r_xprt->rx_buf.rb_bc_srv_max_requests; | ||
732 | if (extras) | ||
733 | rpcrdma_ep_post_extra_recv(r_xprt, extras); | ||
734 | |||
756 | out: | 735 | out: |
757 | if (rc) | 736 | if (rc) |
758 | ep->rep_connected = rc; | 737 | ep->rep_connected = rc; |
@@ -797,9 +776,7 @@ rpcrdma_mr_recovery_worker(struct work_struct *work) | |||
797 | 776 | ||
798 | spin_lock(&buf->rb_recovery_lock); | 777 | spin_lock(&buf->rb_recovery_lock); |
799 | while (!list_empty(&buf->rb_stale_mrs)) { | 778 | while (!list_empty(&buf->rb_stale_mrs)) { |
800 | mw = list_first_entry(&buf->rb_stale_mrs, | 779 | mw = rpcrdma_pop_mw(&buf->rb_stale_mrs); |
801 | struct rpcrdma_mw, mw_list); | ||
802 | list_del_init(&mw->mw_list); | ||
803 | spin_unlock(&buf->rb_recovery_lock); | 780 | spin_unlock(&buf->rb_recovery_lock); |
804 | 781 | ||
805 | dprintk("RPC: %s: recovering MR %p\n", __func__, mw); | 782 | dprintk("RPC: %s: recovering MR %p\n", __func__, mw); |
@@ -817,7 +794,7 @@ rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw) | |||
817 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | 794 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
818 | 795 | ||
819 | spin_lock(&buf->rb_recovery_lock); | 796 | spin_lock(&buf->rb_recovery_lock); |
820 | list_add(&mw->mw_list, &buf->rb_stale_mrs); | 797 | rpcrdma_push_mw(mw, &buf->rb_stale_mrs); |
821 | spin_unlock(&buf->rb_recovery_lock); | 798 | spin_unlock(&buf->rb_recovery_lock); |
822 | 799 | ||
823 | schedule_delayed_work(&buf->rb_recovery_worker, 0); | 800 | schedule_delayed_work(&buf->rb_recovery_worker, 0); |
@@ -1093,11 +1070,8 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt) | |||
1093 | struct rpcrdma_mw *mw = NULL; | 1070 | struct rpcrdma_mw *mw = NULL; |
1094 | 1071 | ||
1095 | spin_lock(&buf->rb_mwlock); | 1072 | spin_lock(&buf->rb_mwlock); |
1096 | if (!list_empty(&buf->rb_mws)) { | 1073 | if (!list_empty(&buf->rb_mws)) |
1097 | mw = list_first_entry(&buf->rb_mws, | 1074 | mw = rpcrdma_pop_mw(&buf->rb_mws); |
1098 | struct rpcrdma_mw, mw_list); | ||
1099 | list_del_init(&mw->mw_list); | ||
1100 | } | ||
1101 | spin_unlock(&buf->rb_mwlock); | 1075 | spin_unlock(&buf->rb_mwlock); |
1102 | 1076 | ||
1103 | if (!mw) | 1077 | if (!mw) |
@@ -1120,7 +1094,7 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) | |||
1120 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | 1094 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
1121 | 1095 | ||
1122 | spin_lock(&buf->rb_mwlock); | 1096 | spin_lock(&buf->rb_mwlock); |
1123 | list_add_tail(&mw->mw_list, &buf->rb_mws); | 1097 | rpcrdma_push_mw(mw, &buf->rb_mws); |
1124 | spin_unlock(&buf->rb_mwlock); | 1098 | spin_unlock(&buf->rb_mwlock); |
1125 | } | 1099 | } |
1126 | 1100 | ||
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index e35efd4ac1e4..171a35116de9 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -74,7 +74,9 @@ struct rpcrdma_ia { | |||
74 | unsigned int ri_max_frmr_depth; | 74 | unsigned int ri_max_frmr_depth; |
75 | unsigned int ri_max_inline_write; | 75 | unsigned int ri_max_inline_write; |
76 | unsigned int ri_max_inline_read; | 76 | unsigned int ri_max_inline_read; |
77 | unsigned int ri_max_send_sges; | ||
77 | bool ri_reminv_expected; | 78 | bool ri_reminv_expected; |
79 | bool ri_implicit_roundup; | ||
78 | enum ib_mr_type ri_mrtype; | 80 | enum ib_mr_type ri_mrtype; |
79 | struct ib_qp_attr ri_qp_attr; | 81 | struct ib_qp_attr ri_qp_attr; |
80 | struct ib_qp_init_attr ri_qp_init_attr; | 82 | struct ib_qp_init_attr ri_qp_init_attr; |
@@ -303,15 +305,19 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ | |||
303 | char *mr_offset; /* kva if no page, else offset */ | 305 | char *mr_offset; /* kva if no page, else offset */ |
304 | }; | 306 | }; |
305 | 307 | ||
306 | /* Reserve enough Send SGEs to send a maximum size inline request: | 308 | /* The Send SGE array is provisioned to send a maximum size |
309 | * inline request: | ||
307 | * - RPC-over-RDMA header | 310 | * - RPC-over-RDMA header |
308 | * - xdr_buf head iovec | 311 | * - xdr_buf head iovec |
309 | * - RPCRDMA_MAX_INLINE bytes, possibly unaligned, in pages | 312 | * - RPCRDMA_MAX_INLINE bytes, in pages |
310 | * - xdr_buf tail iovec | 313 | * - xdr_buf tail iovec |
314 | * | ||
315 | * The actual number of array elements consumed by each RPC | ||
316 | * depends on the device's max_sge limit. | ||
311 | */ | 317 | */ |
312 | enum { | 318 | enum { |
313 | RPCRDMA_MAX_SEND_PAGES = PAGE_SIZE + RPCRDMA_MAX_INLINE - 1, | 319 | RPCRDMA_MIN_SEND_SGES = 3, |
314 | RPCRDMA_MAX_PAGE_SGES = (RPCRDMA_MAX_SEND_PAGES >> PAGE_SHIFT) + 1, | 320 | RPCRDMA_MAX_PAGE_SGES = RPCRDMA_MAX_INLINE >> PAGE_SHIFT, |
315 | RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1, | 321 | RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1, |
316 | }; | 322 | }; |
317 | 323 | ||
@@ -348,6 +354,22 @@ rpcr_to_rdmar(struct rpc_rqst *rqst) | |||
348 | return rqst->rq_xprtdata; | 354 | return rqst->rq_xprtdata; |
349 | } | 355 | } |
350 | 356 | ||
357 | static inline void | ||
358 | rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list) | ||
359 | { | ||
360 | list_add_tail(&mw->mw_list, list); | ||
361 | } | ||
362 | |||
363 | static inline struct rpcrdma_mw * | ||
364 | rpcrdma_pop_mw(struct list_head *list) | ||
365 | { | ||
366 | struct rpcrdma_mw *mw; | ||
367 | |||
368 | mw = list_first_entry(list, struct rpcrdma_mw, mw_list); | ||
369 | list_del(&mw->mw_list); | ||
370 | return mw; | ||
371 | } | ||
372 | |||
351 | /* | 373 | /* |
352 | * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for | 374 | * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for |
353 | * inline requests/replies, and client/server credits. | 375 | * inline requests/replies, and client/server credits. |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 956c7bce80d1..16aff8ddc16f 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
@@ -52,6 +52,8 @@ | |||
52 | #include "sunrpc.h" | 52 | #include "sunrpc.h" |
53 | 53 | ||
54 | static void xs_close(struct rpc_xprt *xprt); | 54 | static void xs_close(struct rpc_xprt *xprt); |
55 | static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, | ||
56 | struct socket *sock); | ||
55 | 57 | ||
56 | /* | 58 | /* |
57 | * xprtsock tunables | 59 | * xprtsock tunables |
@@ -666,6 +668,9 @@ static int xs_tcp_send_request(struct rpc_task *task) | |||
666 | if (task->tk_flags & RPC_TASK_SENT) | 668 | if (task->tk_flags & RPC_TASK_SENT) |
667 | zerocopy = false; | 669 | zerocopy = false; |
668 | 670 | ||
671 | if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state)) | ||
672 | xs_tcp_set_socket_timeouts(xprt, transport->sock); | ||
673 | |||
669 | /* Continue transmitting the packet/record. We must be careful | 674 | /* Continue transmitting the packet/record. We must be careful |
670 | * to cope with writespace callbacks arriving _after_ we have | 675 | * to cope with writespace callbacks arriving _after_ we have |
671 | * called sendmsg(). */ | 676 | * called sendmsg(). */ |
@@ -1734,7 +1739,9 @@ static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t | |||
1734 | */ | 1739 | */ |
1735 | static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task) | 1740 | static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task) |
1736 | { | 1741 | { |
1742 | spin_lock_bh(&xprt->transport_lock); | ||
1737 | xprt_adjust_cwnd(xprt, task, -ETIMEDOUT); | 1743 | xprt_adjust_cwnd(xprt, task, -ETIMEDOUT); |
1744 | spin_unlock_bh(&xprt->transport_lock); | ||
1738 | } | 1745 | } |
1739 | 1746 | ||
1740 | static unsigned short xs_get_random_port(void) | 1747 | static unsigned short xs_get_random_port(void) |
@@ -2235,6 +2242,66 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt) | |||
2235 | xs_reset_transport(transport); | 2242 | xs_reset_transport(transport); |
2236 | } | 2243 | } |
2237 | 2244 | ||
2245 | static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, | ||
2246 | struct socket *sock) | ||
2247 | { | ||
2248 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); | ||
2249 | unsigned int keepidle; | ||
2250 | unsigned int keepcnt; | ||
2251 | unsigned int opt_on = 1; | ||
2252 | unsigned int timeo; | ||
2253 | |||
2254 | spin_lock_bh(&xprt->transport_lock); | ||
2255 | keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ); | ||
2256 | keepcnt = xprt->timeout->to_retries + 1; | ||
2257 | timeo = jiffies_to_msecs(xprt->timeout->to_initval) * | ||
2258 | (xprt->timeout->to_retries + 1); | ||
2259 | clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state); | ||
2260 | spin_unlock_bh(&xprt->transport_lock); | ||
2261 | |||
2262 | /* TCP Keepalive options */ | ||
2263 | kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, | ||
2264 | (char *)&opt_on, sizeof(opt_on)); | ||
2265 | kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, | ||
2266 | (char *)&keepidle, sizeof(keepidle)); | ||
2267 | kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL, | ||
2268 | (char *)&keepidle, sizeof(keepidle)); | ||
2269 | kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, | ||
2270 | (char *)&keepcnt, sizeof(keepcnt)); | ||
2271 | |||
2272 | /* TCP user timeout (see RFC5482) */ | ||
2273 | kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT, | ||
2274 | (char *)&timeo, sizeof(timeo)); | ||
2275 | } | ||
2276 | |||
2277 | static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt, | ||
2278 | unsigned long connect_timeout, | ||
2279 | unsigned long reconnect_timeout) | ||
2280 | { | ||
2281 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); | ||
2282 | struct rpc_timeout to; | ||
2283 | unsigned long initval; | ||
2284 | |||
2285 | spin_lock_bh(&xprt->transport_lock); | ||
2286 | if (reconnect_timeout < xprt->max_reconnect_timeout) | ||
2287 | xprt->max_reconnect_timeout = reconnect_timeout; | ||
2288 | if (connect_timeout < xprt->connect_timeout) { | ||
2289 | memcpy(&to, xprt->timeout, sizeof(to)); | ||
2290 | initval = DIV_ROUND_UP(connect_timeout, to.to_retries + 1); | ||
2291 | /* Arbitrary lower limit */ | ||
2292 | if (initval < XS_TCP_INIT_REEST_TO << 1) | ||
2293 | initval = XS_TCP_INIT_REEST_TO << 1; | ||
2294 | to.to_initval = initval; | ||
2295 | to.to_maxval = initval; | ||
2296 | memcpy(&transport->tcp_timeout, &to, | ||
2297 | sizeof(transport->tcp_timeout)); | ||
2298 | xprt->timeout = &transport->tcp_timeout; | ||
2299 | xprt->connect_timeout = connect_timeout; | ||
2300 | } | ||
2301 | set_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state); | ||
2302 | spin_unlock_bh(&xprt->transport_lock); | ||
2303 | } | ||
2304 | |||
2238 | static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) | 2305 | static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) |
2239 | { | 2306 | { |
2240 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); | 2307 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
@@ -2242,22 +2309,8 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) | |||
2242 | 2309 | ||
2243 | if (!transport->inet) { | 2310 | if (!transport->inet) { |
2244 | struct sock *sk = sock->sk; | 2311 | struct sock *sk = sock->sk; |
2245 | unsigned int keepidle = xprt->timeout->to_initval / HZ; | ||
2246 | unsigned int keepcnt = xprt->timeout->to_retries + 1; | ||
2247 | unsigned int opt_on = 1; | ||
2248 | unsigned int timeo; | ||
2249 | unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC; | 2312 | unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC; |
2250 | 2313 | ||
2251 | /* TCP Keepalive options */ | ||
2252 | kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, | ||
2253 | (char *)&opt_on, sizeof(opt_on)); | ||
2254 | kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, | ||
2255 | (char *)&keepidle, sizeof(keepidle)); | ||
2256 | kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL, | ||
2257 | (char *)&keepidle, sizeof(keepidle)); | ||
2258 | kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, | ||
2259 | (char *)&keepcnt, sizeof(keepcnt)); | ||
2260 | |||
2261 | /* Avoid temporary address, they are bad for long-lived | 2314 | /* Avoid temporary address, they are bad for long-lived |
2262 | * connections such as NFS mounts. | 2315 | * connections such as NFS mounts. |
2263 | * RFC4941, section 3.6 suggests that: | 2316 | * RFC4941, section 3.6 suggests that: |
@@ -2268,11 +2321,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) | |||
2268 | kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES, | 2321 | kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES, |
2269 | (char *)&addr_pref, sizeof(addr_pref)); | 2322 | (char *)&addr_pref, sizeof(addr_pref)); |
2270 | 2323 | ||
2271 | /* TCP user timeout (see RFC5482) */ | 2324 | xs_tcp_set_socket_timeouts(xprt, sock); |
2272 | timeo = jiffies_to_msecs(xprt->timeout->to_initval) * | ||
2273 | (xprt->timeout->to_retries + 1); | ||
2274 | kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT, | ||
2275 | (char *)&timeo, sizeof(timeo)); | ||
2276 | 2325 | ||
2277 | write_lock_bh(&sk->sk_callback_lock); | 2326 | write_lock_bh(&sk->sk_callback_lock); |
2278 | 2327 | ||
@@ -2721,6 +2770,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { | |||
2721 | .set_retrans_timeout = xprt_set_retrans_timeout_def, | 2770 | .set_retrans_timeout = xprt_set_retrans_timeout_def, |
2722 | .close = xs_tcp_shutdown, | 2771 | .close = xs_tcp_shutdown, |
2723 | .destroy = xs_destroy, | 2772 | .destroy = xs_destroy, |
2773 | .set_connect_timeout = xs_tcp_set_connect_timeout, | ||
2724 | .print_stats = xs_tcp_print_stats, | 2774 | .print_stats = xs_tcp_print_stats, |
2725 | .enable_swap = xs_enable_swap, | 2775 | .enable_swap = xs_enable_swap, |
2726 | .disable_swap = xs_disable_swap, | 2776 | .disable_swap = xs_disable_swap, |
@@ -3007,6 +3057,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) | |||
3007 | xprt->timeout = &xs_tcp_default_timeout; | 3057 | xprt->timeout = &xs_tcp_default_timeout; |
3008 | 3058 | ||
3009 | xprt->max_reconnect_timeout = xprt->timeout->to_maxval; | 3059 | xprt->max_reconnect_timeout = xprt->timeout->to_maxval; |
3060 | xprt->connect_timeout = xprt->timeout->to_initval * | ||
3061 | (xprt->timeout->to_retries + 1); | ||
3010 | 3062 | ||
3011 | INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn); | 3063 | INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn); |
3012 | INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket); | 3064 | INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket); |
@@ -3209,7 +3261,9 @@ static int param_set_uint_minmax(const char *val, | |||
3209 | if (!val) | 3261 | if (!val) |
3210 | return -EINVAL; | 3262 | return -EINVAL; |
3211 | ret = kstrtouint(val, 0, &num); | 3263 | ret = kstrtouint(val, 0, &num); |
3212 | if (ret == -EINVAL || num < min || num > max) | 3264 | if (ret) |
3265 | return ret; | ||
3266 | if (num < min || num > max) | ||
3213 | return -EINVAL; | 3267 | return -EINVAL; |
3214 | *((unsigned int *)kp->arg) = num; | 3268 | *((unsigned int *)kp->arg) = num; |
3215 | return 0; | 3269 | return 0; |