diff options
author | Trond Myklebust <trond.myklebust@primarydata.com> | 2016-07-24 17:09:02 -0400 |
---|---|---|
committer | Trond Myklebust <trond.myklebust@primarydata.com> | 2016-07-24 17:09:02 -0400 |
commit | 1592c4d62a89bbca895c568d65ce290dfbc36ecc (patch) | |
tree | 6b979bc02ded2ea7e644c34e6939ffbbb7ee001d /net | |
parent | 668f455dac57050e33a43ff5fe006f6cd947fc65 (diff) | |
parent | f0445670bd81cae9f46399d98fef5cd1622d9776 (diff) |
Merge branch 'nfs-rdma'
Diffstat (limited to 'net')
-rw-r--r-- | net/bridge/br_netfilter_hooks.c | 2 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 43 | ||||
-rw-r--r-- | net/core/skbuff.c | 18 | ||||
-rw-r--r-- | net/decnet/dn_fib.c | 21 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 4 | ||||
-rw-r--r-- | net/ipv6/ip6_fib.c | 1 | ||||
-rw-r--r-- | net/packet/af_packet.c | 2 | ||||
-rw-r--r-- | net/rds/tcp.c | 5 | ||||
-rw-r--r-- | net/sched/act_mirred.c | 2 | ||||
-rw-r--r-- | net/sunrpc/auth_gss/auth_gss.c | 2 | ||||
-rw-r--r-- | net/sunrpc/auth_gss/gss_krb5_mech.c | 2 | ||||
-rw-r--r-- | net/sunrpc/auth_gss/gss_mech_switch.c | 12 | ||||
-rw-r--r-- | net/sunrpc/svc.c | 8 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/Makefile | 2 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/fmr_ops.c | 378 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/frwr_ops.c | 369 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/physical_ops.c | 122 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/rpc_rdma.c | 274 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/transport.c | 40 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 242 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/xprt_rdma.h | 118 | ||||
-rw-r--r-- | net/tipc/netlink_compat.c | 2 |
22 files changed, 783 insertions, 886 deletions
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 2d25979273a6..77e7f69bf80d 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c | |||
@@ -700,7 +700,7 @@ static int | |||
700 | br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, | 700 | br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, |
701 | int (*output)(struct net *, struct sock *, struct sk_buff *)) | 701 | int (*output)(struct net *, struct sock *, struct sk_buff *)) |
702 | { | 702 | { |
703 | unsigned int mtu = ip_skb_dst_mtu(skb); | 703 | unsigned int mtu = ip_skb_dst_mtu(sk, skb); |
704 | struct iphdr *iph = ip_hdr(skb); | 704 | struct iphdr *iph = ip_hdr(skb); |
705 | 705 | ||
706 | if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || | 706 | if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || |
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index a669dea146c6..61ad43f61c5e 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c | |||
@@ -651,6 +651,23 @@ void make_flow_keys_digest(struct flow_keys_digest *digest, | |||
651 | } | 651 | } |
652 | EXPORT_SYMBOL(make_flow_keys_digest); | 652 | EXPORT_SYMBOL(make_flow_keys_digest); |
653 | 653 | ||
654 | static struct flow_dissector flow_keys_dissector_symmetric __read_mostly; | ||
655 | |||
656 | u32 __skb_get_hash_symmetric(struct sk_buff *skb) | ||
657 | { | ||
658 | struct flow_keys keys; | ||
659 | |||
660 | __flow_hash_secret_init(); | ||
661 | |||
662 | memset(&keys, 0, sizeof(keys)); | ||
663 | __skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, | ||
664 | NULL, 0, 0, 0, | ||
665 | FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); | ||
666 | |||
667 | return __flow_hash_from_keys(&keys, hashrnd); | ||
668 | } | ||
669 | EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); | ||
670 | |||
654 | /** | 671 | /** |
655 | * __skb_get_hash: calculate a flow hash | 672 | * __skb_get_hash: calculate a flow hash |
656 | * @skb: sk_buff to calculate flow hash from | 673 | * @skb: sk_buff to calculate flow hash from |
@@ -868,6 +885,29 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = { | |||
868 | }, | 885 | }, |
869 | }; | 886 | }; |
870 | 887 | ||
888 | static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = { | ||
889 | { | ||
890 | .key_id = FLOW_DISSECTOR_KEY_CONTROL, | ||
891 | .offset = offsetof(struct flow_keys, control), | ||
892 | }, | ||
893 | { | ||
894 | .key_id = FLOW_DISSECTOR_KEY_BASIC, | ||
895 | .offset = offsetof(struct flow_keys, basic), | ||
896 | }, | ||
897 | { | ||
898 | .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, | ||
899 | .offset = offsetof(struct flow_keys, addrs.v4addrs), | ||
900 | }, | ||
901 | { | ||
902 | .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, | ||
903 | .offset = offsetof(struct flow_keys, addrs.v6addrs), | ||
904 | }, | ||
905 | { | ||
906 | .key_id = FLOW_DISSECTOR_KEY_PORTS, | ||
907 | .offset = offsetof(struct flow_keys, ports), | ||
908 | }, | ||
909 | }; | ||
910 | |||
871 | static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = { | 911 | static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = { |
872 | { | 912 | { |
873 | .key_id = FLOW_DISSECTOR_KEY_CONTROL, | 913 | .key_id = FLOW_DISSECTOR_KEY_CONTROL, |
@@ -889,6 +929,9 @@ static int __init init_default_flow_dissectors(void) | |||
889 | skb_flow_dissector_init(&flow_keys_dissector, | 929 | skb_flow_dissector_init(&flow_keys_dissector, |
890 | flow_keys_dissector_keys, | 930 | flow_keys_dissector_keys, |
891 | ARRAY_SIZE(flow_keys_dissector_keys)); | 931 | ARRAY_SIZE(flow_keys_dissector_keys)); |
932 | skb_flow_dissector_init(&flow_keys_dissector_symmetric, | ||
933 | flow_keys_dissector_symmetric_keys, | ||
934 | ARRAY_SIZE(flow_keys_dissector_symmetric_keys)); | ||
892 | skb_flow_dissector_init(&flow_keys_buf_dissector, | 935 | skb_flow_dissector_init(&flow_keys_buf_dissector, |
893 | flow_keys_buf_dissector_keys, | 936 | flow_keys_buf_dissector_keys, |
894 | ARRAY_SIZE(flow_keys_buf_dissector_keys)); | 937 | ARRAY_SIZE(flow_keys_buf_dissector_keys)); |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f2b77e549c03..eb12d2161fb2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -3016,24 +3016,6 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page, | |||
3016 | EXPORT_SYMBOL_GPL(skb_append_pagefrags); | 3016 | EXPORT_SYMBOL_GPL(skb_append_pagefrags); |
3017 | 3017 | ||
3018 | /** | 3018 | /** |
3019 | * skb_push_rcsum - push skb and update receive checksum | ||
3020 | * @skb: buffer to update | ||
3021 | * @len: length of data pulled | ||
3022 | * | ||
3023 | * This function performs an skb_push on the packet and updates | ||
3024 | * the CHECKSUM_COMPLETE checksum. It should be used on | ||
3025 | * receive path processing instead of skb_push unless you know | ||
3026 | * that the checksum difference is zero (e.g., a valid IP header) | ||
3027 | * or you are setting ip_summed to CHECKSUM_NONE. | ||
3028 | */ | ||
3029 | static unsigned char *skb_push_rcsum(struct sk_buff *skb, unsigned len) | ||
3030 | { | ||
3031 | skb_push(skb, len); | ||
3032 | skb_postpush_rcsum(skb, skb->data, len); | ||
3033 | return skb->data; | ||
3034 | } | ||
3035 | |||
3036 | /** | ||
3037 | * skb_pull_rcsum - pull skb and update receive checksum | 3019 | * skb_pull_rcsum - pull skb and update receive checksum |
3038 | * @skb: buffer to update | 3020 | * @skb: buffer to update |
3039 | * @len: length of data pulled | 3021 | * @len: length of data pulled |
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index df4803437888..a796fc7cbc35 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <net/dn_fib.h> | 41 | #include <net/dn_fib.h> |
42 | #include <net/dn_neigh.h> | 42 | #include <net/dn_neigh.h> |
43 | #include <net/dn_dev.h> | 43 | #include <net/dn_dev.h> |
44 | #include <net/nexthop.h> | ||
44 | 45 | ||
45 | #define RT_MIN_TABLE 1 | 46 | #define RT_MIN_TABLE 1 |
46 | 47 | ||
@@ -150,14 +151,13 @@ static int dn_fib_count_nhs(const struct nlattr *attr) | |||
150 | struct rtnexthop *nhp = nla_data(attr); | 151 | struct rtnexthop *nhp = nla_data(attr); |
151 | int nhs = 0, nhlen = nla_len(attr); | 152 | int nhs = 0, nhlen = nla_len(attr); |
152 | 153 | ||
153 | while(nhlen >= (int)sizeof(struct rtnexthop)) { | 154 | while (rtnh_ok(nhp, nhlen)) { |
154 | if ((nhlen -= nhp->rtnh_len) < 0) | ||
155 | return 0; | ||
156 | nhs++; | 155 | nhs++; |
157 | nhp = RTNH_NEXT(nhp); | 156 | nhp = rtnh_next(nhp, &nhlen); |
158 | } | 157 | } |
159 | 158 | ||
160 | return nhs; | 159 | /* leftover implies invalid nexthop configuration, discard it */ |
160 | return nhlen > 0 ? 0 : nhs; | ||
161 | } | 161 | } |
162 | 162 | ||
163 | static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr, | 163 | static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr, |
@@ -167,21 +167,24 @@ static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr, | |||
167 | int nhlen = nla_len(attr); | 167 | int nhlen = nla_len(attr); |
168 | 168 | ||
169 | change_nexthops(fi) { | 169 | change_nexthops(fi) { |
170 | int attrlen = nhlen - sizeof(struct rtnexthop); | 170 | int attrlen; |
171 | if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) | 171 | |
172 | if (!rtnh_ok(nhp, nhlen)) | ||
172 | return -EINVAL; | 173 | return -EINVAL; |
173 | 174 | ||
174 | nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; | 175 | nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; |
175 | nh->nh_oif = nhp->rtnh_ifindex; | 176 | nh->nh_oif = nhp->rtnh_ifindex; |
176 | nh->nh_weight = nhp->rtnh_hops + 1; | 177 | nh->nh_weight = nhp->rtnh_hops + 1; |
177 | 178 | ||
178 | if (attrlen) { | 179 | attrlen = rtnh_attrlen(nhp); |
180 | if (attrlen > 0) { | ||
179 | struct nlattr *gw_attr; | 181 | struct nlattr *gw_attr; |
180 | 182 | ||
181 | gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY); | 183 | gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY); |
182 | nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0; | 184 | nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0; |
183 | } | 185 | } |
184 | nhp = RTNH_NEXT(nhp); | 186 | |
187 | nhp = rtnh_next(nhp, &nhlen); | ||
185 | } endfor_nexthops(fi); | 188 | } endfor_nexthops(fi); |
186 | 189 | ||
187 | return 0; | 190 | return 0; |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 124bf0a66328..4bd4921639c3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -271,7 +271,7 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk | |||
271 | return dst_output(net, sk, skb); | 271 | return dst_output(net, sk, skb); |
272 | } | 272 | } |
273 | #endif | 273 | #endif |
274 | mtu = ip_skb_dst_mtu(skb); | 274 | mtu = ip_skb_dst_mtu(sk, skb); |
275 | if (skb_is_gso(skb)) | 275 | if (skb_is_gso(skb)) |
276 | return ip_finish_output_gso(net, sk, skb, mtu); | 276 | return ip_finish_output_gso(net, sk, skb, mtu); |
277 | 277 | ||
@@ -541,7 +541,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, | |||
541 | 541 | ||
542 | iph = ip_hdr(skb); | 542 | iph = ip_hdr(skb); |
543 | 543 | ||
544 | mtu = ip_skb_dst_mtu(skb); | 544 | mtu = ip_skb_dst_mtu(sk, skb); |
545 | if (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size < mtu) | 545 | if (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size < mtu) |
546 | mtu = IPCB(skb)->frag_max_size; | 546 | mtu = IPCB(skb)->frag_max_size; |
547 | 547 | ||
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1bcef2369d64..771be1fa4176 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c | |||
@@ -177,6 +177,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) | |||
177 | } | 177 | } |
178 | } | 178 | } |
179 | 179 | ||
180 | free_percpu(non_pcpu_rt->rt6i_pcpu); | ||
180 | non_pcpu_rt->rt6i_pcpu = NULL; | 181 | non_pcpu_rt->rt6i_pcpu = NULL; |
181 | } | 182 | } |
182 | 183 | ||
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9bff6ef16fa7..9f0983fa4d52 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -1341,7 +1341,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f, | |||
1341 | struct sk_buff *skb, | 1341 | struct sk_buff *skb, |
1342 | unsigned int num) | 1342 | unsigned int num) |
1343 | { | 1343 | { |
1344 | return reciprocal_scale(skb_get_hash(skb), num); | 1344 | return reciprocal_scale(__skb_get_hash_symmetric(skb), num); |
1345 | } | 1345 | } |
1346 | 1346 | ||
1347 | static unsigned int fanout_demux_lb(struct packet_fanout *f, | 1347 | static unsigned int fanout_demux_lb(struct packet_fanout *f, |
diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 74ee126a6fe6..c8a7b4c90190 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c | |||
@@ -616,7 +616,7 @@ static int rds_tcp_init(void) | |||
616 | 616 | ||
617 | ret = rds_tcp_recv_init(); | 617 | ret = rds_tcp_recv_init(); |
618 | if (ret) | 618 | if (ret) |
619 | goto out_slab; | 619 | goto out_pernet; |
620 | 620 | ||
621 | ret = rds_trans_register(&rds_tcp_transport); | 621 | ret = rds_trans_register(&rds_tcp_transport); |
622 | if (ret) | 622 | if (ret) |
@@ -628,8 +628,9 @@ static int rds_tcp_init(void) | |||
628 | 628 | ||
629 | out_recv: | 629 | out_recv: |
630 | rds_tcp_recv_exit(); | 630 | rds_tcp_recv_exit(); |
631 | out_slab: | 631 | out_pernet: |
632 | unregister_pernet_subsys(&rds_tcp_net_ops); | 632 | unregister_pernet_subsys(&rds_tcp_net_ops); |
633 | out_slab: | ||
633 | kmem_cache_destroy(rds_tcp_conn_slab); | 634 | kmem_cache_destroy(rds_tcp_conn_slab); |
634 | out: | 635 | out: |
635 | return ret; | 636 | return ret; |
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 128942bc9e42..1f5bd6ccbd2c 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c | |||
@@ -181,7 +181,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, | |||
181 | 181 | ||
182 | if (!(at & AT_EGRESS)) { | 182 | if (!(at & AT_EGRESS)) { |
183 | if (m->tcfm_ok_push) | 183 | if (m->tcfm_ok_push) |
184 | skb_push(skb2, skb->mac_len); | 184 | skb_push_rcsum(skb2, skb->mac_len); |
185 | } | 185 | } |
186 | 186 | ||
187 | /* mirror is always swallowed */ | 187 | /* mirror is always swallowed */ |
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 813a3cdfb573..23c8e7c39656 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c | |||
@@ -1018,6 +1018,8 @@ gss_create_new(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) | |||
1018 | auth->au_flags = 0; | 1018 | auth->au_flags = 0; |
1019 | auth->au_ops = &authgss_ops; | 1019 | auth->au_ops = &authgss_ops; |
1020 | auth->au_flavor = flavor; | 1020 | auth->au_flavor = flavor; |
1021 | if (gss_pseudoflavor_to_datatouch(gss_auth->mech, flavor)) | ||
1022 | auth->au_flags |= RPCAUTH_AUTH_DATATOUCH; | ||
1021 | atomic_set(&auth->au_count, 1); | 1023 | atomic_set(&auth->au_count, 1); |
1022 | kref_init(&gss_auth->kref); | 1024 | kref_init(&gss_auth->kref); |
1023 | 1025 | ||
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 65427492b1c9..60595835317a 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c | |||
@@ -745,12 +745,14 @@ static struct pf_desc gss_kerberos_pfs[] = { | |||
745 | .qop = GSS_C_QOP_DEFAULT, | 745 | .qop = GSS_C_QOP_DEFAULT, |
746 | .service = RPC_GSS_SVC_INTEGRITY, | 746 | .service = RPC_GSS_SVC_INTEGRITY, |
747 | .name = "krb5i", | 747 | .name = "krb5i", |
748 | .datatouch = true, | ||
748 | }, | 749 | }, |
749 | [2] = { | 750 | [2] = { |
750 | .pseudoflavor = RPC_AUTH_GSS_KRB5P, | 751 | .pseudoflavor = RPC_AUTH_GSS_KRB5P, |
751 | .qop = GSS_C_QOP_DEFAULT, | 752 | .qop = GSS_C_QOP_DEFAULT, |
752 | .service = RPC_GSS_SVC_PRIVACY, | 753 | .service = RPC_GSS_SVC_PRIVACY, |
753 | .name = "krb5p", | 754 | .name = "krb5p", |
755 | .datatouch = true, | ||
754 | }, | 756 | }, |
755 | }; | 757 | }; |
756 | 758 | ||
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 7063d856a598..5fec3abbe19b 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c | |||
@@ -361,6 +361,18 @@ gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor) | |||
361 | } | 361 | } |
362 | EXPORT_SYMBOL(gss_pseudoflavor_to_service); | 362 | EXPORT_SYMBOL(gss_pseudoflavor_to_service); |
363 | 363 | ||
364 | bool | ||
365 | gss_pseudoflavor_to_datatouch(struct gss_api_mech *gm, u32 pseudoflavor) | ||
366 | { | ||
367 | int i; | ||
368 | |||
369 | for (i = 0; i < gm->gm_pf_num; i++) { | ||
370 | if (gm->gm_pfs[i].pseudoflavor == pseudoflavor) | ||
371 | return gm->gm_pfs[i].datatouch; | ||
372 | } | ||
373 | return false; | ||
374 | } | ||
375 | |||
364 | char * | 376 | char * |
365 | gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service) | 377 | gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service) |
366 | { | 378 | { |
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index cc9852897395..c5b0cb4f4056 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c | |||
@@ -1188,11 +1188,17 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) | |||
1188 | *statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); | 1188 | *statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); |
1189 | 1189 | ||
1190 | /* Encode reply */ | 1190 | /* Encode reply */ |
1191 | if (test_bit(RQ_DROPME, &rqstp->rq_flags)) { | 1191 | if (*statp == rpc_drop_reply || |
1192 | test_bit(RQ_DROPME, &rqstp->rq_flags)) { | ||
1192 | if (procp->pc_release) | 1193 | if (procp->pc_release) |
1193 | procp->pc_release(rqstp, NULL, rqstp->rq_resp); | 1194 | procp->pc_release(rqstp, NULL, rqstp->rq_resp); |
1194 | goto dropit; | 1195 | goto dropit; |
1195 | } | 1196 | } |
1197 | if (*statp == rpc_autherr_badcred) { | ||
1198 | if (procp->pc_release) | ||
1199 | procp->pc_release(rqstp, NULL, rqstp->rq_resp); | ||
1200 | goto err_bad_auth; | ||
1201 | } | ||
1196 | if (*statp == rpc_success && | 1202 | if (*statp == rpc_success && |
1197 | (xdr = procp->pc_encode) && | 1203 | (xdr = procp->pc_encode) && |
1198 | !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) { | 1204 | !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) { |
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index dc9f3b513a05..ef19fa42c50f 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile | |||
@@ -1,7 +1,7 @@ | |||
1 | obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o | 1 | obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o |
2 | 2 | ||
3 | rpcrdma-y := transport.o rpc_rdma.o verbs.o \ | 3 | rpcrdma-y := transport.o rpc_rdma.o verbs.o \ |
4 | fmr_ops.o frwr_ops.o physical_ops.o \ | 4 | fmr_ops.o frwr_ops.o \ |
5 | svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \ | 5 | svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \ |
6 | svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \ | 6 | svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \ |
7 | module.o | 7 | module.o |
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 6326ebe8b595..21cb3b150b37 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c | |||
@@ -19,13 +19,6 @@ | |||
19 | * verb (fmr_op_unmap). | 19 | * verb (fmr_op_unmap). |
20 | */ | 20 | */ |
21 | 21 | ||
22 | /* Transport recovery | ||
23 | * | ||
24 | * After a transport reconnect, fmr_op_map re-uses the MR already | ||
25 | * allocated for the RPC, but generates a fresh rkey then maps the | ||
26 | * MR again. This process is synchronous. | ||
27 | */ | ||
28 | |||
29 | #include "xprt_rdma.h" | 22 | #include "xprt_rdma.h" |
30 | 23 | ||
31 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 24 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
@@ -35,62 +28,132 @@ | |||
35 | /* Maximum scatter/gather per FMR */ | 28 | /* Maximum scatter/gather per FMR */ |
36 | #define RPCRDMA_MAX_FMR_SGES (64) | 29 | #define RPCRDMA_MAX_FMR_SGES (64) |
37 | 30 | ||
38 | static struct workqueue_struct *fmr_recovery_wq; | 31 | /* Access mode of externally registered pages */ |
39 | 32 | enum { | |
40 | #define FMR_RECOVERY_WQ_FLAGS (WQ_UNBOUND) | 33 | RPCRDMA_FMR_ACCESS_FLAGS = IB_ACCESS_REMOTE_WRITE | |
34 | IB_ACCESS_REMOTE_READ, | ||
35 | }; | ||
41 | 36 | ||
42 | int | 37 | bool |
43 | fmr_alloc_recovery_wq(void) | 38 | fmr_is_supported(struct rpcrdma_ia *ia) |
44 | { | 39 | { |
45 | fmr_recovery_wq = alloc_workqueue("fmr_recovery", WQ_UNBOUND, 0); | 40 | if (!ia->ri_device->alloc_fmr) { |
46 | return !fmr_recovery_wq ? -ENOMEM : 0; | 41 | pr_info("rpcrdma: 'fmr' mode is not supported by device %s\n", |
42 | ia->ri_device->name); | ||
43 | return false; | ||
44 | } | ||
45 | return true; | ||
47 | } | 46 | } |
48 | 47 | ||
49 | void | 48 | static int |
50 | fmr_destroy_recovery_wq(void) | 49 | fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw) |
51 | { | 50 | { |
52 | struct workqueue_struct *wq; | 51 | static struct ib_fmr_attr fmr_attr = { |
52 | .max_pages = RPCRDMA_MAX_FMR_SGES, | ||
53 | .max_maps = 1, | ||
54 | .page_shift = PAGE_SHIFT | ||
55 | }; | ||
53 | 56 | ||
54 | if (!fmr_recovery_wq) | 57 | mw->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES, |
55 | return; | 58 | sizeof(u64), GFP_KERNEL); |
59 | if (!mw->fmr.fm_physaddrs) | ||
60 | goto out_free; | ||
56 | 61 | ||
57 | wq = fmr_recovery_wq; | 62 | mw->mw_sg = kcalloc(RPCRDMA_MAX_FMR_SGES, |
58 | fmr_recovery_wq = NULL; | 63 | sizeof(*mw->mw_sg), GFP_KERNEL); |
59 | destroy_workqueue(wq); | 64 | if (!mw->mw_sg) |
65 | goto out_free; | ||
66 | |||
67 | sg_init_table(mw->mw_sg, RPCRDMA_MAX_FMR_SGES); | ||
68 | |||
69 | mw->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS, | ||
70 | &fmr_attr); | ||
71 | if (IS_ERR(mw->fmr.fm_mr)) | ||
72 | goto out_fmr_err; | ||
73 | |||
74 | return 0; | ||
75 | |||
76 | out_fmr_err: | ||
77 | dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__, | ||
78 | PTR_ERR(mw->fmr.fm_mr)); | ||
79 | |||
80 | out_free: | ||
81 | kfree(mw->mw_sg); | ||
82 | kfree(mw->fmr.fm_physaddrs); | ||
83 | return -ENOMEM; | ||
60 | } | 84 | } |
61 | 85 | ||
62 | static int | 86 | static int |
63 | __fmr_unmap(struct rpcrdma_mw *mw) | 87 | __fmr_unmap(struct rpcrdma_mw *mw) |
64 | { | 88 | { |
65 | LIST_HEAD(l); | 89 | LIST_HEAD(l); |
90 | int rc; | ||
66 | 91 | ||
67 | list_add(&mw->fmr.fmr->list, &l); | 92 | list_add(&mw->fmr.fm_mr->list, &l); |
68 | return ib_unmap_fmr(&l); | 93 | rc = ib_unmap_fmr(&l); |
94 | list_del_init(&mw->fmr.fm_mr->list); | ||
95 | return rc; | ||
69 | } | 96 | } |
70 | 97 | ||
71 | /* Deferred reset of a single FMR. Generate a fresh rkey by | ||
72 | * replacing the MR. There's no recovery if this fails. | ||
73 | */ | ||
74 | static void | 98 | static void |
75 | __fmr_recovery_worker(struct work_struct *work) | 99 | fmr_op_release_mr(struct rpcrdma_mw *r) |
76 | { | 100 | { |
77 | struct rpcrdma_mw *mw = container_of(work, struct rpcrdma_mw, | 101 | LIST_HEAD(unmap_list); |
78 | mw_work); | 102 | int rc; |
79 | struct rpcrdma_xprt *r_xprt = mw->mw_xprt; | ||
80 | 103 | ||
81 | __fmr_unmap(mw); | 104 | /* Ensure MW is not on any rl_registered list */ |
82 | rpcrdma_put_mw(r_xprt, mw); | 105 | if (!list_empty(&r->mw_list)) |
83 | return; | 106 | list_del(&r->mw_list); |
107 | |||
108 | kfree(r->fmr.fm_physaddrs); | ||
109 | kfree(r->mw_sg); | ||
110 | |||
111 | /* In case this one was left mapped, try to unmap it | ||
112 | * to prevent dealloc_fmr from failing with EBUSY | ||
113 | */ | ||
114 | rc = __fmr_unmap(r); | ||
115 | if (rc) | ||
116 | pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n", | ||
117 | r, rc); | ||
118 | |||
119 | rc = ib_dealloc_fmr(r->fmr.fm_mr); | ||
120 | if (rc) | ||
121 | pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n", | ||
122 | r, rc); | ||
123 | |||
124 | kfree(r); | ||
84 | } | 125 | } |
85 | 126 | ||
86 | /* A broken MR was discovered in a context that can't sleep. | 127 | /* Reset of a single FMR. |
87 | * Defer recovery to the recovery worker. | ||
88 | */ | 128 | */ |
89 | static void | 129 | static void |
90 | __fmr_queue_recovery(struct rpcrdma_mw *mw) | 130 | fmr_op_recover_mr(struct rpcrdma_mw *mw) |
91 | { | 131 | { |
92 | INIT_WORK(&mw->mw_work, __fmr_recovery_worker); | 132 | struct rpcrdma_xprt *r_xprt = mw->mw_xprt; |
93 | queue_work(fmr_recovery_wq, &mw->mw_work); | 133 | int rc; |
134 | |||
135 | /* ORDER: invalidate first */ | ||
136 | rc = __fmr_unmap(mw); | ||
137 | |||
138 | /* ORDER: then DMA unmap */ | ||
139 | ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, | ||
140 | mw->mw_sg, mw->mw_nents, mw->mw_dir); | ||
141 | if (rc) | ||
142 | goto out_release; | ||
143 | |||
144 | rpcrdma_put_mw(r_xprt, mw); | ||
145 | r_xprt->rx_stats.mrs_recovered++; | ||
146 | return; | ||
147 | |||
148 | out_release: | ||
149 | pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mw); | ||
150 | r_xprt->rx_stats.mrs_orphaned++; | ||
151 | |||
152 | spin_lock(&r_xprt->rx_buf.rb_mwlock); | ||
153 | list_del(&mw->mw_all); | ||
154 | spin_unlock(&r_xprt->rx_buf.rb_mwlock); | ||
155 | |||
156 | fmr_op_release_mr(mw); | ||
94 | } | 157 | } |
95 | 158 | ||
96 | static int | 159 | static int |
@@ -112,86 +175,21 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt) | |||
112 | RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES); | 175 | RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES); |
113 | } | 176 | } |
114 | 177 | ||
115 | static int | ||
116 | fmr_op_init(struct rpcrdma_xprt *r_xprt) | ||
117 | { | ||
118 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
119 | int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; | ||
120 | struct ib_fmr_attr fmr_attr = { | ||
121 | .max_pages = RPCRDMA_MAX_FMR_SGES, | ||
122 | .max_maps = 1, | ||
123 | .page_shift = PAGE_SHIFT | ||
124 | }; | ||
125 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | ||
126 | struct rpcrdma_mw *r; | ||
127 | int i, rc; | ||
128 | |||
129 | spin_lock_init(&buf->rb_mwlock); | ||
130 | INIT_LIST_HEAD(&buf->rb_mws); | ||
131 | INIT_LIST_HEAD(&buf->rb_all); | ||
132 | |||
133 | i = max_t(int, RPCRDMA_MAX_DATA_SEGS / RPCRDMA_MAX_FMR_SGES, 1); | ||
134 | i += 2; /* head + tail */ | ||
135 | i *= buf->rb_max_requests; /* one set for each RPC slot */ | ||
136 | dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i); | ||
137 | |||
138 | rc = -ENOMEM; | ||
139 | while (i--) { | ||
140 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
141 | if (!r) | ||
142 | goto out; | ||
143 | |||
144 | r->fmr.physaddrs = kmalloc(RPCRDMA_MAX_FMR_SGES * | ||
145 | sizeof(u64), GFP_KERNEL); | ||
146 | if (!r->fmr.physaddrs) | ||
147 | goto out_free; | ||
148 | |||
149 | r->fmr.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr); | ||
150 | if (IS_ERR(r->fmr.fmr)) | ||
151 | goto out_fmr_err; | ||
152 | |||
153 | r->mw_xprt = r_xprt; | ||
154 | list_add(&r->mw_list, &buf->rb_mws); | ||
155 | list_add(&r->mw_all, &buf->rb_all); | ||
156 | } | ||
157 | return 0; | ||
158 | |||
159 | out_fmr_err: | ||
160 | rc = PTR_ERR(r->fmr.fmr); | ||
161 | dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc); | ||
162 | kfree(r->fmr.physaddrs); | ||
163 | out_free: | ||
164 | kfree(r); | ||
165 | out: | ||
166 | return rc; | ||
167 | } | ||
168 | |||
169 | /* Use the ib_map_phys_fmr() verb to register a memory region | 178 | /* Use the ib_map_phys_fmr() verb to register a memory region |
170 | * for remote access via RDMA READ or RDMA WRITE. | 179 | * for remote access via RDMA READ or RDMA WRITE. |
171 | */ | 180 | */ |
172 | static int | 181 | static int |
173 | fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | 182 | fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, |
174 | int nsegs, bool writing) | 183 | int nsegs, bool writing, struct rpcrdma_mw **out) |
175 | { | 184 | { |
176 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
177 | struct ib_device *device = ia->ri_device; | ||
178 | enum dma_data_direction direction = rpcrdma_data_dir(writing); | ||
179 | struct rpcrdma_mr_seg *seg1 = seg; | 185 | struct rpcrdma_mr_seg *seg1 = seg; |
180 | int len, pageoff, i, rc; | 186 | int len, pageoff, i, rc; |
181 | struct rpcrdma_mw *mw; | 187 | struct rpcrdma_mw *mw; |
188 | u64 *dma_pages; | ||
182 | 189 | ||
183 | mw = seg1->rl_mw; | 190 | mw = rpcrdma_get_mw(r_xprt); |
184 | seg1->rl_mw = NULL; | 191 | if (!mw) |
185 | if (!mw) { | 192 | return -ENOBUFS; |
186 | mw = rpcrdma_get_mw(r_xprt); | ||
187 | if (!mw) | ||
188 | return -ENOMEM; | ||
189 | } else { | ||
190 | /* this is a retransmit; generate a fresh rkey */ | ||
191 | rc = __fmr_unmap(mw); | ||
192 | if (rc) | ||
193 | return rc; | ||
194 | } | ||
195 | 193 | ||
196 | pageoff = offset_in_page(seg1->mr_offset); | 194 | pageoff = offset_in_page(seg1->mr_offset); |
197 | seg1->mr_offset -= pageoff; /* start of page */ | 195 | seg1->mr_offset -= pageoff; /* start of page */ |
@@ -200,8 +198,14 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
200 | if (nsegs > RPCRDMA_MAX_FMR_SGES) | 198 | if (nsegs > RPCRDMA_MAX_FMR_SGES) |
201 | nsegs = RPCRDMA_MAX_FMR_SGES; | 199 | nsegs = RPCRDMA_MAX_FMR_SGES; |
202 | for (i = 0; i < nsegs;) { | 200 | for (i = 0; i < nsegs;) { |
203 | rpcrdma_map_one(device, seg, direction); | 201 | if (seg->mr_page) |
204 | mw->fmr.physaddrs[i] = seg->mr_dma; | 202 | sg_set_page(&mw->mw_sg[i], |
203 | seg->mr_page, | ||
204 | seg->mr_len, | ||
205 | offset_in_page(seg->mr_offset)); | ||
206 | else | ||
207 | sg_set_buf(&mw->mw_sg[i], seg->mr_offset, | ||
208 | seg->mr_len); | ||
205 | len += seg->mr_len; | 209 | len += seg->mr_len; |
206 | ++seg; | 210 | ++seg; |
207 | ++i; | 211 | ++i; |
@@ -210,49 +214,54 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
210 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | 214 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) |
211 | break; | 215 | break; |
212 | } | 216 | } |
213 | 217 | mw->mw_nents = i; | |
214 | rc = ib_map_phys_fmr(mw->fmr.fmr, mw->fmr.physaddrs, | 218 | mw->mw_dir = rpcrdma_data_dir(writing); |
215 | i, seg1->mr_dma); | 219 | if (i == 0) |
220 | goto out_dmamap_err; | ||
221 | |||
222 | if (!ib_dma_map_sg(r_xprt->rx_ia.ri_device, | ||
223 | mw->mw_sg, mw->mw_nents, mw->mw_dir)) | ||
224 | goto out_dmamap_err; | ||
225 | |||
226 | for (i = 0, dma_pages = mw->fmr.fm_physaddrs; i < mw->mw_nents; i++) | ||
227 | dma_pages[i] = sg_dma_address(&mw->mw_sg[i]); | ||
228 | rc = ib_map_phys_fmr(mw->fmr.fm_mr, dma_pages, mw->mw_nents, | ||
229 | dma_pages[0]); | ||
216 | if (rc) | 230 | if (rc) |
217 | goto out_maperr; | 231 | goto out_maperr; |
218 | 232 | ||
219 | seg1->rl_mw = mw; | 233 | mw->mw_handle = mw->fmr.fm_mr->rkey; |
220 | seg1->mr_rkey = mw->fmr.fmr->rkey; | 234 | mw->mw_length = len; |
221 | seg1->mr_base = seg1->mr_dma + pageoff; | 235 | mw->mw_offset = dma_pages[0] + pageoff; |
222 | seg1->mr_nsegs = i; | ||
223 | seg1->mr_len = len; | ||
224 | return i; | ||
225 | 236 | ||
226 | out_maperr: | 237 | *out = mw; |
227 | dprintk("RPC: %s: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", | 238 | return mw->mw_nents; |
228 | __func__, len, (unsigned long long)seg1->mr_dma, | ||
229 | pageoff, i, rc); | ||
230 | while (i--) | ||
231 | rpcrdma_unmap_one(device, --seg); | ||
232 | return rc; | ||
233 | } | ||
234 | 239 | ||
235 | static void | 240 | out_dmamap_err: |
236 | __fmr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | 241 | pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n", |
237 | { | 242 | mw->mw_sg, mw->mw_nents); |
238 | struct ib_device *device = r_xprt->rx_ia.ri_device; | 243 | rpcrdma_defer_mr_recovery(mw); |
239 | int nsegs = seg->mr_nsegs; | 244 | return -EIO; |
240 | 245 | ||
241 | while (nsegs--) | 246 | out_maperr: |
242 | rpcrdma_unmap_one(device, seg++); | 247 | pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", |
248 | len, (unsigned long long)dma_pages[0], | ||
249 | pageoff, mw->mw_nents, rc); | ||
250 | rpcrdma_defer_mr_recovery(mw); | ||
251 | return -EIO; | ||
243 | } | 252 | } |
244 | 253 | ||
245 | /* Invalidate all memory regions that were registered for "req". | 254 | /* Invalidate all memory regions that were registered for "req". |
246 | * | 255 | * |
247 | * Sleeps until it is safe for the host CPU to access the | 256 | * Sleeps until it is safe for the host CPU to access the |
248 | * previously mapped memory regions. | 257 | * previously mapped memory regions. |
258 | * | ||
259 | * Caller ensures that req->rl_registered is not empty. | ||
249 | */ | 260 | */ |
250 | static void | 261 | static void |
251 | fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | 262 | fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) |
252 | { | 263 | { |
253 | struct rpcrdma_mr_seg *seg; | 264 | struct rpcrdma_mw *mw, *tmp; |
254 | unsigned int i, nchunks; | ||
255 | struct rpcrdma_mw *mw; | ||
256 | LIST_HEAD(unmap_list); | 265 | LIST_HEAD(unmap_list); |
257 | int rc; | 266 | int rc; |
258 | 267 | ||
@@ -261,90 +270,54 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | |||
261 | /* ORDER: Invalidate all of the req's MRs first | 270 | /* ORDER: Invalidate all of the req's MRs first |
262 | * | 271 | * |
263 | * ib_unmap_fmr() is slow, so use a single call instead | 272 | * ib_unmap_fmr() is slow, so use a single call instead |
264 | * of one call per mapped MR. | 273 | * of one call per mapped FMR. |
265 | */ | 274 | */ |
266 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { | 275 | list_for_each_entry(mw, &req->rl_registered, mw_list) |
267 | seg = &req->rl_segments[i]; | 276 | list_add_tail(&mw->fmr.fm_mr->list, &unmap_list); |
268 | mw = seg->rl_mw; | ||
269 | |||
270 | list_add(&mw->fmr.fmr->list, &unmap_list); | ||
271 | |||
272 | i += seg->mr_nsegs; | ||
273 | } | ||
274 | rc = ib_unmap_fmr(&unmap_list); | 277 | rc = ib_unmap_fmr(&unmap_list); |
275 | if (rc) | 278 | if (rc) |
276 | pr_warn("%s: ib_unmap_fmr failed (%i)\n", __func__, rc); | 279 | goto out_reset; |
277 | 280 | ||
278 | /* ORDER: Now DMA unmap all of the req's MRs, and return | 281 | /* ORDER: Now DMA unmap all of the req's MRs, and return |
279 | * them to the free MW list. | 282 | * them to the free MW list. |
280 | */ | 283 | */ |
281 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { | 284 | list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) { |
282 | seg = &req->rl_segments[i]; | 285 | list_del_init(&mw->mw_list); |
286 | list_del_init(&mw->fmr.fm_mr->list); | ||
287 | ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, | ||
288 | mw->mw_sg, mw->mw_nents, mw->mw_dir); | ||
289 | rpcrdma_put_mw(r_xprt, mw); | ||
290 | } | ||
283 | 291 | ||
284 | __fmr_dma_unmap(r_xprt, seg); | 292 | return; |
285 | rpcrdma_put_mw(r_xprt, seg->rl_mw); | ||
286 | 293 | ||
287 | i += seg->mr_nsegs; | 294 | out_reset: |
288 | seg->mr_nsegs = 0; | 295 | pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc); |
289 | seg->rl_mw = NULL; | ||
290 | } | ||
291 | 296 | ||
292 | req->rl_nchunks = 0; | 297 | list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) { |
298 | list_del_init(&mw->fmr.fm_mr->list); | ||
299 | fmr_op_recover_mr(mw); | ||
300 | } | ||
293 | } | 301 | } |
294 | 302 | ||
295 | /* Use a slow, safe mechanism to invalidate all memory regions | 303 | /* Use a slow, safe mechanism to invalidate all memory regions |
296 | * that were registered for "req". | 304 | * that were registered for "req". |
297 | * | ||
298 | * In the asynchronous case, DMA unmapping occurs first here | ||
299 | * because the rpcrdma_mr_seg is released immediately after this | ||
300 | * call. It's contents won't be available in __fmr_dma_unmap later. | ||
301 | * FIXME. | ||
302 | */ | 305 | */ |
303 | static void | 306 | static void |
304 | fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | 307 | fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, |
305 | bool sync) | 308 | bool sync) |
306 | { | 309 | { |
307 | struct rpcrdma_mr_seg *seg; | ||
308 | struct rpcrdma_mw *mw; | 310 | struct rpcrdma_mw *mw; |
309 | unsigned int i; | ||
310 | |||
311 | for (i = 0; req->rl_nchunks; req->rl_nchunks--) { | ||
312 | seg = &req->rl_segments[i]; | ||
313 | mw = seg->rl_mw; | ||
314 | |||
315 | if (sync) { | ||
316 | /* ORDER */ | ||
317 | __fmr_unmap(mw); | ||
318 | __fmr_dma_unmap(r_xprt, seg); | ||
319 | rpcrdma_put_mw(r_xprt, mw); | ||
320 | } else { | ||
321 | __fmr_dma_unmap(r_xprt, seg); | ||
322 | __fmr_queue_recovery(mw); | ||
323 | } | ||
324 | |||
325 | i += seg->mr_nsegs; | ||
326 | seg->mr_nsegs = 0; | ||
327 | seg->rl_mw = NULL; | ||
328 | } | ||
329 | } | ||
330 | |||
331 | static void | ||
332 | fmr_op_destroy(struct rpcrdma_buffer *buf) | ||
333 | { | ||
334 | struct rpcrdma_mw *r; | ||
335 | int rc; | ||
336 | |||
337 | while (!list_empty(&buf->rb_all)) { | ||
338 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
339 | list_del(&r->mw_all); | ||
340 | kfree(r->fmr.physaddrs); | ||
341 | 311 | ||
342 | rc = ib_dealloc_fmr(r->fmr.fmr); | 312 | while (!list_empty(&req->rl_registered)) { |
343 | if (rc) | 313 | mw = list_first_entry(&req->rl_registered, |
344 | dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", | 314 | struct rpcrdma_mw, mw_list); |
345 | __func__, rc); | 315 | list_del_init(&mw->mw_list); |
346 | 316 | ||
347 | kfree(r); | 317 | if (sync) |
318 | fmr_op_recover_mr(mw); | ||
319 | else | ||
320 | rpcrdma_defer_mr_recovery(mw); | ||
348 | } | 321 | } |
349 | } | 322 | } |
350 | 323 | ||
@@ -352,9 +325,10 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { | |||
352 | .ro_map = fmr_op_map, | 325 | .ro_map = fmr_op_map, |
353 | .ro_unmap_sync = fmr_op_unmap_sync, | 326 | .ro_unmap_sync = fmr_op_unmap_sync, |
354 | .ro_unmap_safe = fmr_op_unmap_safe, | 327 | .ro_unmap_safe = fmr_op_unmap_safe, |
328 | .ro_recover_mr = fmr_op_recover_mr, | ||
355 | .ro_open = fmr_op_open, | 329 | .ro_open = fmr_op_open, |
356 | .ro_maxpages = fmr_op_maxpages, | 330 | .ro_maxpages = fmr_op_maxpages, |
357 | .ro_init = fmr_op_init, | 331 | .ro_init_mr = fmr_op_init_mr, |
358 | .ro_destroy = fmr_op_destroy, | 332 | .ro_release_mr = fmr_op_release_mr, |
359 | .ro_displayname = "fmr", | 333 | .ro_displayname = "fmr", |
360 | }; | 334 | }; |
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index c0947544babe..892b5e1d9b09 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c | |||
@@ -73,29 +73,71 @@ | |||
73 | # define RPCDBG_FACILITY RPCDBG_TRANS | 73 | # define RPCDBG_FACILITY RPCDBG_TRANS |
74 | #endif | 74 | #endif |
75 | 75 | ||
76 | static struct workqueue_struct *frwr_recovery_wq; | 76 | bool |
77 | 77 | frwr_is_supported(struct rpcrdma_ia *ia) | |
78 | #define FRWR_RECOVERY_WQ_FLAGS (WQ_UNBOUND | WQ_MEM_RECLAIM) | 78 | { |
79 | struct ib_device_attr *attrs = &ia->ri_device->attrs; | ||
80 | |||
81 | if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) | ||
82 | goto out_not_supported; | ||
83 | if (attrs->max_fast_reg_page_list_len == 0) | ||
84 | goto out_not_supported; | ||
85 | return true; | ||
86 | |||
87 | out_not_supported: | ||
88 | pr_info("rpcrdma: 'frwr' mode is not supported by device %s\n", | ||
89 | ia->ri_device->name); | ||
90 | return false; | ||
91 | } | ||
79 | 92 | ||
80 | int | 93 | static int |
81 | frwr_alloc_recovery_wq(void) | 94 | frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) |
82 | { | 95 | { |
83 | frwr_recovery_wq = alloc_workqueue("frwr_recovery", | 96 | unsigned int depth = ia->ri_max_frmr_depth; |
84 | FRWR_RECOVERY_WQ_FLAGS, 0); | 97 | struct rpcrdma_frmr *f = &r->frmr; |
85 | return !frwr_recovery_wq ? -ENOMEM : 0; | 98 | int rc; |
99 | |||
100 | f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG, depth); | ||
101 | if (IS_ERR(f->fr_mr)) | ||
102 | goto out_mr_err; | ||
103 | |||
104 | r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL); | ||
105 | if (!r->mw_sg) | ||
106 | goto out_list_err; | ||
107 | |||
108 | sg_init_table(r->mw_sg, depth); | ||
109 | init_completion(&f->fr_linv_done); | ||
110 | return 0; | ||
111 | |||
112 | out_mr_err: | ||
113 | rc = PTR_ERR(f->fr_mr); | ||
114 | dprintk("RPC: %s: ib_alloc_mr status %i\n", | ||
115 | __func__, rc); | ||
116 | return rc; | ||
117 | |||
118 | out_list_err: | ||
119 | rc = -ENOMEM; | ||
120 | dprintk("RPC: %s: sg allocation failure\n", | ||
121 | __func__); | ||
122 | ib_dereg_mr(f->fr_mr); | ||
123 | return rc; | ||
86 | } | 124 | } |
87 | 125 | ||
88 | void | 126 | static void |
89 | frwr_destroy_recovery_wq(void) | 127 | frwr_op_release_mr(struct rpcrdma_mw *r) |
90 | { | 128 | { |
91 | struct workqueue_struct *wq; | 129 | int rc; |
92 | 130 | ||
93 | if (!frwr_recovery_wq) | 131 | /* Ensure MW is not on any rl_registered list */ |
94 | return; | 132 | if (!list_empty(&r->mw_list)) |
133 | list_del(&r->mw_list); | ||
95 | 134 | ||
96 | wq = frwr_recovery_wq; | 135 | rc = ib_dereg_mr(r->frmr.fr_mr); |
97 | frwr_recovery_wq = NULL; | 136 | if (rc) |
98 | destroy_workqueue(wq); | 137 | pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", |
138 | r, rc); | ||
139 | kfree(r->mw_sg); | ||
140 | kfree(r); | ||
99 | } | 141 | } |
100 | 142 | ||
101 | static int | 143 | static int |
@@ -124,93 +166,37 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) | |||
124 | return 0; | 166 | return 0; |
125 | } | 167 | } |
126 | 168 | ||
127 | static void | 169 | /* Reset of a single FRMR. Generate a fresh rkey by replacing the MR. |
128 | __frwr_reset_and_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) | ||
129 | { | ||
130 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
131 | struct rpcrdma_frmr *f = &mw->frmr; | ||
132 | int rc; | ||
133 | |||
134 | rc = __frwr_reset_mr(ia, mw); | ||
135 | ib_dma_unmap_sg(ia->ri_device, f->fr_sg, f->fr_nents, f->fr_dir); | ||
136 | if (rc) | ||
137 | return; | ||
138 | |||
139 | rpcrdma_put_mw(r_xprt, mw); | ||
140 | } | ||
141 | |||
142 | /* Deferred reset of a single FRMR. Generate a fresh rkey by | ||
143 | * replacing the MR. | ||
144 | * | 170 | * |
145 | * There's no recovery if this fails. The FRMR is abandoned, but | 171 | * There's no recovery if this fails. The FRMR is abandoned, but |
146 | * remains in rb_all. It will be cleaned up when the transport is | 172 | * remains in rb_all. It will be cleaned up when the transport is |
147 | * destroyed. | 173 | * destroyed. |
148 | */ | 174 | */ |
149 | static void | 175 | static void |
150 | __frwr_recovery_worker(struct work_struct *work) | 176 | frwr_op_recover_mr(struct rpcrdma_mw *mw) |
151 | { | ||
152 | struct rpcrdma_mw *r = container_of(work, struct rpcrdma_mw, | ||
153 | mw_work); | ||
154 | |||
155 | __frwr_reset_and_unmap(r->mw_xprt, r); | ||
156 | return; | ||
157 | } | ||
158 | |||
159 | /* A broken MR was discovered in a context that can't sleep. | ||
160 | * Defer recovery to the recovery worker. | ||
161 | */ | ||
162 | static void | ||
163 | __frwr_queue_recovery(struct rpcrdma_mw *r) | ||
164 | { | ||
165 | INIT_WORK(&r->mw_work, __frwr_recovery_worker); | ||
166 | queue_work(frwr_recovery_wq, &r->mw_work); | ||
167 | } | ||
168 | |||
169 | static int | ||
170 | __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, | ||
171 | unsigned int depth) | ||
172 | { | 177 | { |
173 | struct rpcrdma_frmr *f = &r->frmr; | 178 | struct rpcrdma_xprt *r_xprt = mw->mw_xprt; |
179 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
174 | int rc; | 180 | int rc; |
175 | 181 | ||
176 | f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); | 182 | rc = __frwr_reset_mr(ia, mw); |
177 | if (IS_ERR(f->fr_mr)) | 183 | ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir); |
178 | goto out_mr_err; | 184 | if (rc) |
179 | 185 | goto out_release; | |
180 | f->fr_sg = kcalloc(depth, sizeof(*f->fr_sg), GFP_KERNEL); | ||
181 | if (!f->fr_sg) | ||
182 | goto out_list_err; | ||
183 | |||
184 | sg_init_table(f->fr_sg, depth); | ||
185 | |||
186 | init_completion(&f->fr_linv_done); | ||
187 | |||
188 | return 0; | ||
189 | 186 | ||
190 | out_mr_err: | 187 | rpcrdma_put_mw(r_xprt, mw); |
191 | rc = PTR_ERR(f->fr_mr); | 188 | r_xprt->rx_stats.mrs_recovered++; |
192 | dprintk("RPC: %s: ib_alloc_mr status %i\n", | 189 | return; |
193 | __func__, rc); | ||
194 | return rc; | ||
195 | 190 | ||
196 | out_list_err: | 191 | out_release: |
197 | rc = -ENOMEM; | 192 | pr_err("rpcrdma: FRMR reset failed %d, %p release\n", rc, mw); |
198 | dprintk("RPC: %s: sg allocation failure\n", | 193 | r_xprt->rx_stats.mrs_orphaned++; |
199 | __func__); | ||
200 | ib_dereg_mr(f->fr_mr); | ||
201 | return rc; | ||
202 | } | ||
203 | 194 | ||
204 | static void | 195 | spin_lock(&r_xprt->rx_buf.rb_mwlock); |
205 | __frwr_release(struct rpcrdma_mw *r) | 196 | list_del(&mw->mw_all); |
206 | { | 197 | spin_unlock(&r_xprt->rx_buf.rb_mwlock); |
207 | int rc; | ||
208 | 198 | ||
209 | rc = ib_dereg_mr(r->frmr.fr_mr); | 199 | frwr_op_release_mr(mw); |
210 | if (rc) | ||
211 | dprintk("RPC: %s: ib_dereg_mr status %i\n", | ||
212 | __func__, rc); | ||
213 | kfree(r->frmr.fr_sg); | ||
214 | } | 200 | } |
215 | 201 | ||
216 | static int | 202 | static int |
@@ -346,57 +332,14 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) | |||
346 | complete_all(&frmr->fr_linv_done); | 332 | complete_all(&frmr->fr_linv_done); |
347 | } | 333 | } |
348 | 334 | ||
349 | static int | 335 | /* Post a REG_MR Work Request to register a memory region |
350 | frwr_op_init(struct rpcrdma_xprt *r_xprt) | ||
351 | { | ||
352 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
353 | struct ib_device *device = r_xprt->rx_ia.ri_device; | ||
354 | unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; | ||
355 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | ||
356 | int i; | ||
357 | |||
358 | spin_lock_init(&buf->rb_mwlock); | ||
359 | INIT_LIST_HEAD(&buf->rb_mws); | ||
360 | INIT_LIST_HEAD(&buf->rb_all); | ||
361 | |||
362 | i = max_t(int, RPCRDMA_MAX_DATA_SEGS / depth, 1); | ||
363 | i += 2; /* head + tail */ | ||
364 | i *= buf->rb_max_requests; /* one set for each RPC slot */ | ||
365 | dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i); | ||
366 | |||
367 | while (i--) { | ||
368 | struct rpcrdma_mw *r; | ||
369 | int rc; | ||
370 | |||
371 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
372 | if (!r) | ||
373 | return -ENOMEM; | ||
374 | |||
375 | rc = __frwr_init(r, pd, device, depth); | ||
376 | if (rc) { | ||
377 | kfree(r); | ||
378 | return rc; | ||
379 | } | ||
380 | |||
381 | r->mw_xprt = r_xprt; | ||
382 | list_add(&r->mw_list, &buf->rb_mws); | ||
383 | list_add(&r->mw_all, &buf->rb_all); | ||
384 | } | ||
385 | |||
386 | return 0; | ||
387 | } | ||
388 | |||
389 | /* Post a FAST_REG Work Request to register a memory region | ||
390 | * for remote access via RDMA READ or RDMA WRITE. | 336 | * for remote access via RDMA READ or RDMA WRITE. |
391 | */ | 337 | */ |
392 | static int | 338 | static int |
393 | frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | 339 | frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, |
394 | int nsegs, bool writing) | 340 | int nsegs, bool writing, struct rpcrdma_mw **out) |
395 | { | 341 | { |
396 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 342 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
397 | struct ib_device *device = ia->ri_device; | ||
398 | enum dma_data_direction direction = rpcrdma_data_dir(writing); | ||
399 | struct rpcrdma_mr_seg *seg1 = seg; | ||
400 | struct rpcrdma_mw *mw; | 343 | struct rpcrdma_mw *mw; |
401 | struct rpcrdma_frmr *frmr; | 344 | struct rpcrdma_frmr *frmr; |
402 | struct ib_mr *mr; | 345 | struct ib_mr *mr; |
@@ -405,14 +348,13 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
405 | int rc, i, n, dma_nents; | 348 | int rc, i, n, dma_nents; |
406 | u8 key; | 349 | u8 key; |
407 | 350 | ||
408 | mw = seg1->rl_mw; | 351 | mw = NULL; |
409 | seg1->rl_mw = NULL; | ||
410 | do { | 352 | do { |
411 | if (mw) | 353 | if (mw) |
412 | __frwr_queue_recovery(mw); | 354 | rpcrdma_defer_mr_recovery(mw); |
413 | mw = rpcrdma_get_mw(r_xprt); | 355 | mw = rpcrdma_get_mw(r_xprt); |
414 | if (!mw) | 356 | if (!mw) |
415 | return -ENOMEM; | 357 | return -ENOBUFS; |
416 | } while (mw->frmr.fr_state != FRMR_IS_INVALID); | 358 | } while (mw->frmr.fr_state != FRMR_IS_INVALID); |
417 | frmr = &mw->frmr; | 359 | frmr = &mw->frmr; |
418 | frmr->fr_state = FRMR_IS_VALID; | 360 | frmr->fr_state = FRMR_IS_VALID; |
@@ -421,15 +363,14 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
421 | 363 | ||
422 | if (nsegs > ia->ri_max_frmr_depth) | 364 | if (nsegs > ia->ri_max_frmr_depth) |
423 | nsegs = ia->ri_max_frmr_depth; | 365 | nsegs = ia->ri_max_frmr_depth; |
424 | |||
425 | for (i = 0; i < nsegs;) { | 366 | for (i = 0; i < nsegs;) { |
426 | if (seg->mr_page) | 367 | if (seg->mr_page) |
427 | sg_set_page(&frmr->fr_sg[i], | 368 | sg_set_page(&mw->mw_sg[i], |
428 | seg->mr_page, | 369 | seg->mr_page, |
429 | seg->mr_len, | 370 | seg->mr_len, |
430 | offset_in_page(seg->mr_offset)); | 371 | offset_in_page(seg->mr_offset)); |
431 | else | 372 | else |
432 | sg_set_buf(&frmr->fr_sg[i], seg->mr_offset, | 373 | sg_set_buf(&mw->mw_sg[i], seg->mr_offset, |
433 | seg->mr_len); | 374 | seg->mr_len); |
434 | 375 | ||
435 | ++seg; | 376 | ++seg; |
@@ -440,26 +381,22 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
440 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | 381 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) |
441 | break; | 382 | break; |
442 | } | 383 | } |
443 | frmr->fr_nents = i; | 384 | mw->mw_nents = i; |
444 | frmr->fr_dir = direction; | 385 | mw->mw_dir = rpcrdma_data_dir(writing); |
445 | 386 | if (i == 0) | |
446 | dma_nents = ib_dma_map_sg(device, frmr->fr_sg, frmr->fr_nents, direction); | 387 | goto out_dmamap_err; |
447 | if (!dma_nents) { | ||
448 | pr_err("RPC: %s: failed to dma map sg %p sg_nents %u\n", | ||
449 | __func__, frmr->fr_sg, frmr->fr_nents); | ||
450 | return -ENOMEM; | ||
451 | } | ||
452 | 388 | ||
453 | n = ib_map_mr_sg(mr, frmr->fr_sg, frmr->fr_nents, NULL, PAGE_SIZE); | 389 | dma_nents = ib_dma_map_sg(ia->ri_device, |
454 | if (unlikely(n != frmr->fr_nents)) { | 390 | mw->mw_sg, mw->mw_nents, mw->mw_dir); |
455 | pr_err("RPC: %s: failed to map mr %p (%u/%u)\n", | 391 | if (!dma_nents) |
456 | __func__, frmr->fr_mr, n, frmr->fr_nents); | 392 | goto out_dmamap_err; |
457 | rc = n < 0 ? n : -EINVAL; | 393 | |
458 | goto out_senderr; | 394 | n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE); |
459 | } | 395 | if (unlikely(n != mw->mw_nents)) |
396 | goto out_mapmr_err; | ||
460 | 397 | ||
461 | dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n", | 398 | dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n", |
462 | __func__, mw, frmr->fr_nents, mr->length); | 399 | __func__, mw, mw->mw_nents, mr->length); |
463 | 400 | ||
464 | key = (u8)(mr->rkey & 0x000000FF); | 401 | key = (u8)(mr->rkey & 0x000000FF); |
465 | ib_update_fast_reg_key(mr, ++key); | 402 | ib_update_fast_reg_key(mr, ++key); |
@@ -481,24 +418,34 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
481 | if (rc) | 418 | if (rc) |
482 | goto out_senderr; | 419 | goto out_senderr; |
483 | 420 | ||
484 | seg1->rl_mw = mw; | 421 | mw->mw_handle = mr->rkey; |
485 | seg1->mr_rkey = mr->rkey; | 422 | mw->mw_length = mr->length; |
486 | seg1->mr_base = mr->iova; | 423 | mw->mw_offset = mr->iova; |
487 | seg1->mr_nsegs = frmr->fr_nents; | 424 | |
488 | seg1->mr_len = mr->length; | 425 | *out = mw; |
426 | return mw->mw_nents; | ||
489 | 427 | ||
490 | return frmr->fr_nents; | 428 | out_dmamap_err: |
429 | pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n", | ||
430 | mw->mw_sg, mw->mw_nents); | ||
431 | rpcrdma_defer_mr_recovery(mw); | ||
432 | return -EIO; | ||
433 | |||
434 | out_mapmr_err: | ||
435 | pr_err("rpcrdma: failed to map mr %p (%u/%u)\n", | ||
436 | frmr->fr_mr, n, mw->mw_nents); | ||
437 | rpcrdma_defer_mr_recovery(mw); | ||
438 | return -EIO; | ||
491 | 439 | ||
492 | out_senderr: | 440 | out_senderr: |
493 | dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); | 441 | pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc); |
494 | __frwr_queue_recovery(mw); | 442 | rpcrdma_defer_mr_recovery(mw); |
495 | return rc; | 443 | return -ENOTCONN; |
496 | } | 444 | } |
497 | 445 | ||
498 | static struct ib_send_wr * | 446 | static struct ib_send_wr * |
499 | __frwr_prepare_linv_wr(struct rpcrdma_mr_seg *seg) | 447 | __frwr_prepare_linv_wr(struct rpcrdma_mw *mw) |
500 | { | 448 | { |
501 | struct rpcrdma_mw *mw = seg->rl_mw; | ||
502 | struct rpcrdma_frmr *f = &mw->frmr; | 449 | struct rpcrdma_frmr *f = &mw->frmr; |
503 | struct ib_send_wr *invalidate_wr; | 450 | struct ib_send_wr *invalidate_wr; |
504 | 451 | ||
@@ -518,16 +465,16 @@ __frwr_prepare_linv_wr(struct rpcrdma_mr_seg *seg) | |||
518 | * | 465 | * |
519 | * Sleeps until it is safe for the host CPU to access the | 466 | * Sleeps until it is safe for the host CPU to access the |
520 | * previously mapped memory regions. | 467 | * previously mapped memory regions. |
468 | * | ||
469 | * Caller ensures that req->rl_registered is not empty. | ||
521 | */ | 470 | */ |
522 | static void | 471 | static void |
523 | frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | 472 | frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) |
524 | { | 473 | { |
525 | struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr; | 474 | struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr; |
526 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 475 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
527 | struct rpcrdma_mr_seg *seg; | 476 | struct rpcrdma_mw *mw, *tmp; |
528 | unsigned int i, nchunks; | ||
529 | struct rpcrdma_frmr *f; | 477 | struct rpcrdma_frmr *f; |
530 | struct rpcrdma_mw *mw; | ||
531 | int rc; | 478 | int rc; |
532 | 479 | ||
533 | dprintk("RPC: %s: req %p\n", __func__, req); | 480 | dprintk("RPC: %s: req %p\n", __func__, req); |
@@ -537,22 +484,18 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | |||
537 | * Chain the LOCAL_INV Work Requests and post them with | 484 | * Chain the LOCAL_INV Work Requests and post them with |
538 | * a single ib_post_send() call. | 485 | * a single ib_post_send() call. |
539 | */ | 486 | */ |
487 | f = NULL; | ||
540 | invalidate_wrs = pos = prev = NULL; | 488 | invalidate_wrs = pos = prev = NULL; |
541 | seg = NULL; | 489 | list_for_each_entry(mw, &req->rl_registered, mw_list) { |
542 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { | 490 | pos = __frwr_prepare_linv_wr(mw); |
543 | seg = &req->rl_segments[i]; | ||
544 | |||
545 | pos = __frwr_prepare_linv_wr(seg); | ||
546 | 491 | ||
547 | if (!invalidate_wrs) | 492 | if (!invalidate_wrs) |
548 | invalidate_wrs = pos; | 493 | invalidate_wrs = pos; |
549 | else | 494 | else |
550 | prev->next = pos; | 495 | prev->next = pos; |
551 | prev = pos; | 496 | prev = pos; |
552 | 497 | f = &mw->frmr; | |
553 | i += seg->mr_nsegs; | ||
554 | } | 498 | } |
555 | f = &seg->rl_mw->frmr; | ||
556 | 499 | ||
557 | /* Strong send queue ordering guarantees that when the | 500 | /* Strong send queue ordering guarantees that when the |
558 | * last WR in the chain completes, all WRs in the chain | 501 | * last WR in the chain completes, all WRs in the chain |
@@ -577,39 +520,27 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | |||
577 | * them to the free MW list. | 520 | * them to the free MW list. |
578 | */ | 521 | */ |
579 | unmap: | 522 | unmap: |
580 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { | 523 | list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) { |
581 | seg = &req->rl_segments[i]; | 524 | list_del_init(&mw->mw_list); |
582 | mw = seg->rl_mw; | 525 | ib_dma_unmap_sg(ia->ri_device, |
583 | seg->rl_mw = NULL; | 526 | mw->mw_sg, mw->mw_nents, mw->mw_dir); |
584 | |||
585 | ib_dma_unmap_sg(ia->ri_device, f->fr_sg, f->fr_nents, | ||
586 | f->fr_dir); | ||
587 | rpcrdma_put_mw(r_xprt, mw); | 527 | rpcrdma_put_mw(r_xprt, mw); |
588 | |||
589 | i += seg->mr_nsegs; | ||
590 | seg->mr_nsegs = 0; | ||
591 | } | 528 | } |
592 | |||
593 | req->rl_nchunks = 0; | ||
594 | return; | 529 | return; |
595 | 530 | ||
596 | reset_mrs: | 531 | reset_mrs: |
597 | pr_warn("%s: ib_post_send failed %i\n", __func__, rc); | 532 | pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc); |
533 | rdma_disconnect(ia->ri_id); | ||
598 | 534 | ||
599 | /* Find and reset the MRs in the LOCAL_INV WRs that did not | 535 | /* Find and reset the MRs in the LOCAL_INV WRs that did not |
600 | * get posted. This is synchronous, and slow. | 536 | * get posted. This is synchronous, and slow. |
601 | */ | 537 | */ |
602 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { | 538 | list_for_each_entry(mw, &req->rl_registered, mw_list) { |
603 | seg = &req->rl_segments[i]; | ||
604 | mw = seg->rl_mw; | ||
605 | f = &mw->frmr; | 539 | f = &mw->frmr; |
606 | |||
607 | if (mw->frmr.fr_mr->rkey == bad_wr->ex.invalidate_rkey) { | 540 | if (mw->frmr.fr_mr->rkey == bad_wr->ex.invalidate_rkey) { |
608 | __frwr_reset_mr(ia, mw); | 541 | __frwr_reset_mr(ia, mw); |
609 | bad_wr = bad_wr->next; | 542 | bad_wr = bad_wr->next; |
610 | } | 543 | } |
611 | |||
612 | i += seg->mr_nsegs; | ||
613 | } | 544 | } |
614 | goto unmap; | 545 | goto unmap; |
615 | } | 546 | } |
@@ -621,38 +552,17 @@ static void | |||
621 | frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | 552 | frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, |
622 | bool sync) | 553 | bool sync) |
623 | { | 554 | { |
624 | struct rpcrdma_mr_seg *seg; | ||
625 | struct rpcrdma_mw *mw; | 555 | struct rpcrdma_mw *mw; |
626 | unsigned int i; | ||
627 | 556 | ||
628 | for (i = 0; req->rl_nchunks; req->rl_nchunks--) { | 557 | while (!list_empty(&req->rl_registered)) { |
629 | seg = &req->rl_segments[i]; | 558 | mw = list_first_entry(&req->rl_registered, |
630 | mw = seg->rl_mw; | 559 | struct rpcrdma_mw, mw_list); |
560 | list_del_init(&mw->mw_list); | ||
631 | 561 | ||
632 | if (sync) | 562 | if (sync) |
633 | __frwr_reset_and_unmap(r_xprt, mw); | 563 | frwr_op_recover_mr(mw); |
634 | else | 564 | else |
635 | __frwr_queue_recovery(mw); | 565 | rpcrdma_defer_mr_recovery(mw); |
636 | |||
637 | i += seg->mr_nsegs; | ||
638 | seg->mr_nsegs = 0; | ||
639 | seg->rl_mw = NULL; | ||
640 | } | ||
641 | } | ||
642 | |||
643 | static void | ||
644 | frwr_op_destroy(struct rpcrdma_buffer *buf) | ||
645 | { | ||
646 | struct rpcrdma_mw *r; | ||
647 | |||
648 | /* Ensure stale MWs for "buf" are no longer in flight */ | ||
649 | flush_workqueue(frwr_recovery_wq); | ||
650 | |||
651 | while (!list_empty(&buf->rb_all)) { | ||
652 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
653 | list_del(&r->mw_all); | ||
654 | __frwr_release(r); | ||
655 | kfree(r); | ||
656 | } | 566 | } |
657 | } | 567 | } |
658 | 568 | ||
@@ -660,9 +570,10 @@ const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { | |||
660 | .ro_map = frwr_op_map, | 570 | .ro_map = frwr_op_map, |
661 | .ro_unmap_sync = frwr_op_unmap_sync, | 571 | .ro_unmap_sync = frwr_op_unmap_sync, |
662 | .ro_unmap_safe = frwr_op_unmap_safe, | 572 | .ro_unmap_safe = frwr_op_unmap_safe, |
573 | .ro_recover_mr = frwr_op_recover_mr, | ||
663 | .ro_open = frwr_op_open, | 574 | .ro_open = frwr_op_open, |
664 | .ro_maxpages = frwr_op_maxpages, | 575 | .ro_maxpages = frwr_op_maxpages, |
665 | .ro_init = frwr_op_init, | 576 | .ro_init_mr = frwr_op_init_mr, |
666 | .ro_destroy = frwr_op_destroy, | 577 | .ro_release_mr = frwr_op_release_mr, |
667 | .ro_displayname = "frwr", | 578 | .ro_displayname = "frwr", |
668 | }; | 579 | }; |
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c deleted file mode 100644 index 3750596cc432..000000000000 --- a/net/sunrpc/xprtrdma/physical_ops.c +++ /dev/null | |||
@@ -1,122 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2015 Oracle. All rights reserved. | ||
3 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
4 | */ | ||
5 | |||
6 | /* No-op chunk preparation. All client memory is pre-registered. | ||
7 | * Sometimes referred to as ALLPHYSICAL mode. | ||
8 | * | ||
9 | * Physical registration is simple because all client memory is | ||
10 | * pre-registered and never deregistered. This mode is good for | ||
11 | * adapter bring up, but is considered not safe: the server is | ||
12 | * trusted not to abuse its access to client memory not involved | ||
13 | * in RDMA I/O. | ||
14 | */ | ||
15 | |||
16 | #include "xprt_rdma.h" | ||
17 | |||
18 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
19 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
20 | #endif | ||
21 | |||
22 | static int | ||
23 | physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||
24 | struct rpcrdma_create_data_internal *cdata) | ||
25 | { | ||
26 | struct ib_mr *mr; | ||
27 | |||
28 | /* Obtain an rkey to use for RPC data payloads. | ||
29 | */ | ||
30 | mr = ib_get_dma_mr(ia->ri_pd, | ||
31 | IB_ACCESS_LOCAL_WRITE | | ||
32 | IB_ACCESS_REMOTE_WRITE | | ||
33 | IB_ACCESS_REMOTE_READ); | ||
34 | if (IS_ERR(mr)) { | ||
35 | pr_err("%s: ib_get_dma_mr for failed with %lX\n", | ||
36 | __func__, PTR_ERR(mr)); | ||
37 | return -ENOMEM; | ||
38 | } | ||
39 | ia->ri_dma_mr = mr; | ||
40 | |||
41 | rpcrdma_set_max_header_sizes(ia, cdata, min_t(unsigned int, | ||
42 | RPCRDMA_MAX_DATA_SEGS, | ||
43 | RPCRDMA_MAX_HDR_SEGS)); | ||
44 | return 0; | ||
45 | } | ||
46 | |||
47 | /* PHYSICAL memory registration conveys one page per chunk segment. | ||
48 | */ | ||
49 | static size_t | ||
50 | physical_op_maxpages(struct rpcrdma_xprt *r_xprt) | ||
51 | { | ||
52 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
53 | RPCRDMA_MAX_HDR_SEGS); | ||
54 | } | ||
55 | |||
56 | static int | ||
57 | physical_op_init(struct rpcrdma_xprt *r_xprt) | ||
58 | { | ||
59 | return 0; | ||
60 | } | ||
61 | |||
62 | /* The client's physical memory is already exposed for | ||
63 | * remote access via RDMA READ or RDMA WRITE. | ||
64 | */ | ||
65 | static int | ||
66 | physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | ||
67 | int nsegs, bool writing) | ||
68 | { | ||
69 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
70 | |||
71 | rpcrdma_map_one(ia->ri_device, seg, rpcrdma_data_dir(writing)); | ||
72 | seg->mr_rkey = ia->ri_dma_mr->rkey; | ||
73 | seg->mr_base = seg->mr_dma; | ||
74 | return 1; | ||
75 | } | ||
76 | |||
77 | /* DMA unmap all memory regions that were mapped for "req". | ||
78 | */ | ||
79 | static void | ||
80 | physical_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | ||
81 | { | ||
82 | struct ib_device *device = r_xprt->rx_ia.ri_device; | ||
83 | unsigned int i; | ||
84 | |||
85 | for (i = 0; req->rl_nchunks; --req->rl_nchunks) | ||
86 | rpcrdma_unmap_one(device, &req->rl_segments[i++]); | ||
87 | } | ||
88 | |||
89 | /* Use a slow, safe mechanism to invalidate all memory regions | ||
90 | * that were registered for "req". | ||
91 | * | ||
92 | * For physical memory registration, there is no good way to | ||
93 | * fence a single MR that has been advertised to the server. The | ||
94 | * client has already handed the server an R_key that cannot be | ||
95 | * invalidated and is shared by all MRs on this connection. | ||
96 | * Tearing down the PD might be the only safe choice, but it's | ||
97 | * not clear that a freshly acquired DMA R_key would be different | ||
98 | * than the one used by the PD that was just destroyed. | ||
99 | * FIXME. | ||
100 | */ | ||
101 | static void | ||
102 | physical_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | ||
103 | bool sync) | ||
104 | { | ||
105 | physical_op_unmap_sync(r_xprt, req); | ||
106 | } | ||
107 | |||
108 | static void | ||
109 | physical_op_destroy(struct rpcrdma_buffer *buf) | ||
110 | { | ||
111 | } | ||
112 | |||
113 | const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { | ||
114 | .ro_map = physical_op_map, | ||
115 | .ro_unmap_sync = physical_op_unmap_sync, | ||
116 | .ro_unmap_safe = physical_op_unmap_safe, | ||
117 | .ro_open = physical_op_open, | ||
118 | .ro_maxpages = physical_op_maxpages, | ||
119 | .ro_init = physical_op_init, | ||
120 | .ro_destroy = physical_op_destroy, | ||
121 | .ro_displayname = "physical", | ||
122 | }; | ||
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 35a81096e83d..a47f170b20ef 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
@@ -196,8 +196,7 @@ rpcrdma_tail_pullup(struct xdr_buf *buf) | |||
196 | * MR when they can. | 196 | * MR when they can. |
197 | */ | 197 | */ |
198 | static int | 198 | static int |
199 | rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, | 199 | rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n) |
200 | int n, int nsegs) | ||
201 | { | 200 | { |
202 | size_t page_offset; | 201 | size_t page_offset; |
203 | u32 remaining; | 202 | u32 remaining; |
@@ -206,7 +205,7 @@ rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, | |||
206 | base = vec->iov_base; | 205 | base = vec->iov_base; |
207 | page_offset = offset_in_page(base); | 206 | page_offset = offset_in_page(base); |
208 | remaining = vec->iov_len; | 207 | remaining = vec->iov_len; |
209 | while (remaining && n < nsegs) { | 208 | while (remaining && n < RPCRDMA_MAX_SEGS) { |
210 | seg[n].mr_page = NULL; | 209 | seg[n].mr_page = NULL; |
211 | seg[n].mr_offset = base; | 210 | seg[n].mr_offset = base; |
212 | seg[n].mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining); | 211 | seg[n].mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining); |
@@ -230,34 +229,34 @@ rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, | |||
230 | 229 | ||
231 | static int | 230 | static int |
232 | rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | 231 | rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, |
233 | enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs) | 232 | enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg) |
234 | { | 233 | { |
235 | int len, n = 0, p; | 234 | int len, n, p, page_base; |
236 | int page_base; | ||
237 | struct page **ppages; | 235 | struct page **ppages; |
238 | 236 | ||
237 | n = 0; | ||
239 | if (pos == 0) { | 238 | if (pos == 0) { |
240 | n = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, n, nsegs); | 239 | n = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, n); |
241 | if (n == nsegs) | 240 | if (n == RPCRDMA_MAX_SEGS) |
242 | return -EIO; | 241 | goto out_overflow; |
243 | } | 242 | } |
244 | 243 | ||
245 | len = xdrbuf->page_len; | 244 | len = xdrbuf->page_len; |
246 | ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT); | 245 | ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT); |
247 | page_base = xdrbuf->page_base & ~PAGE_MASK; | 246 | page_base = xdrbuf->page_base & ~PAGE_MASK; |
248 | p = 0; | 247 | p = 0; |
249 | while (len && n < nsegs) { | 248 | while (len && n < RPCRDMA_MAX_SEGS) { |
250 | if (!ppages[p]) { | 249 | if (!ppages[p]) { |
251 | /* alloc the pagelist for receiving buffer */ | 250 | /* alloc the pagelist for receiving buffer */ |
252 | ppages[p] = alloc_page(GFP_ATOMIC); | 251 | ppages[p] = alloc_page(GFP_ATOMIC); |
253 | if (!ppages[p]) | 252 | if (!ppages[p]) |
254 | return -ENOMEM; | 253 | return -EAGAIN; |
255 | } | 254 | } |
256 | seg[n].mr_page = ppages[p]; | 255 | seg[n].mr_page = ppages[p]; |
257 | seg[n].mr_offset = (void *)(unsigned long) page_base; | 256 | seg[n].mr_offset = (void *)(unsigned long) page_base; |
258 | seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len); | 257 | seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len); |
259 | if (seg[n].mr_len > PAGE_SIZE) | 258 | if (seg[n].mr_len > PAGE_SIZE) |
260 | return -EIO; | 259 | goto out_overflow; |
261 | len -= seg[n].mr_len; | 260 | len -= seg[n].mr_len; |
262 | ++n; | 261 | ++n; |
263 | ++p; | 262 | ++p; |
@@ -265,8 +264,8 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
265 | } | 264 | } |
266 | 265 | ||
267 | /* Message overflows the seg array */ | 266 | /* Message overflows the seg array */ |
268 | if (len && n == nsegs) | 267 | if (len && n == RPCRDMA_MAX_SEGS) |
269 | return -EIO; | 268 | goto out_overflow; |
270 | 269 | ||
271 | /* When encoding the read list, the tail is always sent inline */ | 270 | /* When encoding the read list, the tail is always sent inline */ |
272 | if (type == rpcrdma_readch) | 271 | if (type == rpcrdma_readch) |
@@ -277,20 +276,24 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
277 | * xdr pad bytes, saving the server an RDMA operation. */ | 276 | * xdr pad bytes, saving the server an RDMA operation. */ |
278 | if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) | 277 | if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) |
279 | return n; | 278 | return n; |
280 | n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n, nsegs); | 279 | n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n); |
281 | if (n == nsegs) | 280 | if (n == RPCRDMA_MAX_SEGS) |
282 | return -EIO; | 281 | goto out_overflow; |
283 | } | 282 | } |
284 | 283 | ||
285 | return n; | 284 | return n; |
285 | |||
286 | out_overflow: | ||
287 | pr_err("rpcrdma: segment array overflow\n"); | ||
288 | return -EIO; | ||
286 | } | 289 | } |
287 | 290 | ||
288 | static inline __be32 * | 291 | static inline __be32 * |
289 | xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr_seg *seg) | 292 | xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw) |
290 | { | 293 | { |
291 | *iptr++ = cpu_to_be32(seg->mr_rkey); | 294 | *iptr++ = cpu_to_be32(mw->mw_handle); |
292 | *iptr++ = cpu_to_be32(seg->mr_len); | 295 | *iptr++ = cpu_to_be32(mw->mw_length); |
293 | return xdr_encode_hyper(iptr, seg->mr_base); | 296 | return xdr_encode_hyper(iptr, mw->mw_offset); |
294 | } | 297 | } |
295 | 298 | ||
296 | /* XDR-encode the Read list. Supports encoding a list of read | 299 | /* XDR-encode the Read list. Supports encoding a list of read |
@@ -310,7 +313,8 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, | |||
310 | struct rpcrdma_req *req, struct rpc_rqst *rqst, | 313 | struct rpcrdma_req *req, struct rpc_rqst *rqst, |
311 | __be32 *iptr, enum rpcrdma_chunktype rtype) | 314 | __be32 *iptr, enum rpcrdma_chunktype rtype) |
312 | { | 315 | { |
313 | struct rpcrdma_mr_seg *seg = req->rl_nextseg; | 316 | struct rpcrdma_mr_seg *seg; |
317 | struct rpcrdma_mw *mw; | ||
314 | unsigned int pos; | 318 | unsigned int pos; |
315 | int n, nsegs; | 319 | int n, nsegs; |
316 | 320 | ||
@@ -322,15 +326,17 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, | |||
322 | pos = rqst->rq_snd_buf.head[0].iov_len; | 326 | pos = rqst->rq_snd_buf.head[0].iov_len; |
323 | if (rtype == rpcrdma_areadch) | 327 | if (rtype == rpcrdma_areadch) |
324 | pos = 0; | 328 | pos = 0; |
325 | nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, | 329 | seg = req->rl_segments; |
326 | RPCRDMA_MAX_SEGS - req->rl_nchunks); | 330 | nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg); |
327 | if (nsegs < 0) | 331 | if (nsegs < 0) |
328 | return ERR_PTR(nsegs); | 332 | return ERR_PTR(nsegs); |
329 | 333 | ||
330 | do { | 334 | do { |
331 | n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, false); | 335 | n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, |
332 | if (n <= 0) | 336 | false, &mw); |
337 | if (n < 0) | ||
333 | return ERR_PTR(n); | 338 | return ERR_PTR(n); |
339 | list_add(&mw->mw_list, &req->rl_registered); | ||
334 | 340 | ||
335 | *iptr++ = xdr_one; /* item present */ | 341 | *iptr++ = xdr_one; /* item present */ |
336 | 342 | ||
@@ -338,20 +344,17 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, | |||
338 | * have the same "position". | 344 | * have the same "position". |
339 | */ | 345 | */ |
340 | *iptr++ = cpu_to_be32(pos); | 346 | *iptr++ = cpu_to_be32(pos); |
341 | iptr = xdr_encode_rdma_segment(iptr, seg); | 347 | iptr = xdr_encode_rdma_segment(iptr, mw); |
342 | 348 | ||
343 | dprintk("RPC: %5u %s: read segment pos %u " | 349 | dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n", |
344 | "%d@0x%016llx:0x%08x (%s)\n", | ||
345 | rqst->rq_task->tk_pid, __func__, pos, | 350 | rqst->rq_task->tk_pid, __func__, pos, |
346 | seg->mr_len, (unsigned long long)seg->mr_base, | 351 | mw->mw_length, (unsigned long long)mw->mw_offset, |
347 | seg->mr_rkey, n < nsegs ? "more" : "last"); | 352 | mw->mw_handle, n < nsegs ? "more" : "last"); |
348 | 353 | ||
349 | r_xprt->rx_stats.read_chunk_count++; | 354 | r_xprt->rx_stats.read_chunk_count++; |
350 | req->rl_nchunks++; | ||
351 | seg += n; | 355 | seg += n; |
352 | nsegs -= n; | 356 | nsegs -= n; |
353 | } while (nsegs); | 357 | } while (nsegs); |
354 | req->rl_nextseg = seg; | ||
355 | 358 | ||
356 | /* Finish Read list */ | 359 | /* Finish Read list */ |
357 | *iptr++ = xdr_zero; /* Next item not present */ | 360 | *iptr++ = xdr_zero; /* Next item not present */ |
@@ -375,7 +378,8 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
375 | struct rpc_rqst *rqst, __be32 *iptr, | 378 | struct rpc_rqst *rqst, __be32 *iptr, |
376 | enum rpcrdma_chunktype wtype) | 379 | enum rpcrdma_chunktype wtype) |
377 | { | 380 | { |
378 | struct rpcrdma_mr_seg *seg = req->rl_nextseg; | 381 | struct rpcrdma_mr_seg *seg; |
382 | struct rpcrdma_mw *mw; | ||
379 | int n, nsegs, nchunks; | 383 | int n, nsegs, nchunks; |
380 | __be32 *segcount; | 384 | __be32 *segcount; |
381 | 385 | ||
@@ -384,10 +388,10 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
384 | return iptr; | 388 | return iptr; |
385 | } | 389 | } |
386 | 390 | ||
391 | seg = req->rl_segments; | ||
387 | nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, | 392 | nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, |
388 | rqst->rq_rcv_buf.head[0].iov_len, | 393 | rqst->rq_rcv_buf.head[0].iov_len, |
389 | wtype, seg, | 394 | wtype, seg); |
390 | RPCRDMA_MAX_SEGS - req->rl_nchunks); | ||
391 | if (nsegs < 0) | 395 | if (nsegs < 0) |
392 | return ERR_PTR(nsegs); | 396 | return ERR_PTR(nsegs); |
393 | 397 | ||
@@ -396,26 +400,25 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
396 | 400 | ||
397 | nchunks = 0; | 401 | nchunks = 0; |
398 | do { | 402 | do { |
399 | n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, true); | 403 | n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, |
400 | if (n <= 0) | 404 | true, &mw); |
405 | if (n < 0) | ||
401 | return ERR_PTR(n); | 406 | return ERR_PTR(n); |
407 | list_add(&mw->mw_list, &req->rl_registered); | ||
402 | 408 | ||
403 | iptr = xdr_encode_rdma_segment(iptr, seg); | 409 | iptr = xdr_encode_rdma_segment(iptr, mw); |
404 | 410 | ||
405 | dprintk("RPC: %5u %s: write segment " | 411 | dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n", |
406 | "%d@0x016%llx:0x%08x (%s)\n", | ||
407 | rqst->rq_task->tk_pid, __func__, | 412 | rqst->rq_task->tk_pid, __func__, |
408 | seg->mr_len, (unsigned long long)seg->mr_base, | 413 | mw->mw_length, (unsigned long long)mw->mw_offset, |
409 | seg->mr_rkey, n < nsegs ? "more" : "last"); | 414 | mw->mw_handle, n < nsegs ? "more" : "last"); |
410 | 415 | ||
411 | r_xprt->rx_stats.write_chunk_count++; | 416 | r_xprt->rx_stats.write_chunk_count++; |
412 | r_xprt->rx_stats.total_rdma_request += seg->mr_len; | 417 | r_xprt->rx_stats.total_rdma_request += seg->mr_len; |
413 | req->rl_nchunks++; | ||
414 | nchunks++; | 418 | nchunks++; |
415 | seg += n; | 419 | seg += n; |
416 | nsegs -= n; | 420 | nsegs -= n; |
417 | } while (nsegs); | 421 | } while (nsegs); |
418 | req->rl_nextseg = seg; | ||
419 | 422 | ||
420 | /* Update count of segments in this Write chunk */ | 423 | /* Update count of segments in this Write chunk */ |
421 | *segcount = cpu_to_be32(nchunks); | 424 | *segcount = cpu_to_be32(nchunks); |
@@ -442,7 +445,8 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, | |||
442 | struct rpcrdma_req *req, struct rpc_rqst *rqst, | 445 | struct rpcrdma_req *req, struct rpc_rqst *rqst, |
443 | __be32 *iptr, enum rpcrdma_chunktype wtype) | 446 | __be32 *iptr, enum rpcrdma_chunktype wtype) |
444 | { | 447 | { |
445 | struct rpcrdma_mr_seg *seg = req->rl_nextseg; | 448 | struct rpcrdma_mr_seg *seg; |
449 | struct rpcrdma_mw *mw; | ||
446 | int n, nsegs, nchunks; | 450 | int n, nsegs, nchunks; |
447 | __be32 *segcount; | 451 | __be32 *segcount; |
448 | 452 | ||
@@ -451,8 +455,8 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, | |||
451 | return iptr; | 455 | return iptr; |
452 | } | 456 | } |
453 | 457 | ||
454 | nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg, | 458 | seg = req->rl_segments; |
455 | RPCRDMA_MAX_SEGS - req->rl_nchunks); | 459 | nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg); |
456 | if (nsegs < 0) | 460 | if (nsegs < 0) |
457 | return ERR_PTR(nsegs); | 461 | return ERR_PTR(nsegs); |
458 | 462 | ||
@@ -461,26 +465,25 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, | |||
461 | 465 | ||
462 | nchunks = 0; | 466 | nchunks = 0; |
463 | do { | 467 | do { |
464 | n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, true); | 468 | n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, |
465 | if (n <= 0) | 469 | true, &mw); |
470 | if (n < 0) | ||
466 | return ERR_PTR(n); | 471 | return ERR_PTR(n); |
472 | list_add(&mw->mw_list, &req->rl_registered); | ||
467 | 473 | ||
468 | iptr = xdr_encode_rdma_segment(iptr, seg); | 474 | iptr = xdr_encode_rdma_segment(iptr, mw); |
469 | 475 | ||
470 | dprintk("RPC: %5u %s: reply segment " | 476 | dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n", |
471 | "%d@0x%016llx:0x%08x (%s)\n", | ||
472 | rqst->rq_task->tk_pid, __func__, | 477 | rqst->rq_task->tk_pid, __func__, |
473 | seg->mr_len, (unsigned long long)seg->mr_base, | 478 | mw->mw_length, (unsigned long long)mw->mw_offset, |
474 | seg->mr_rkey, n < nsegs ? "more" : "last"); | 479 | mw->mw_handle, n < nsegs ? "more" : "last"); |
475 | 480 | ||
476 | r_xprt->rx_stats.reply_chunk_count++; | 481 | r_xprt->rx_stats.reply_chunk_count++; |
477 | r_xprt->rx_stats.total_rdma_request += seg->mr_len; | 482 | r_xprt->rx_stats.total_rdma_request += seg->mr_len; |
478 | req->rl_nchunks++; | ||
479 | nchunks++; | 483 | nchunks++; |
480 | seg += n; | 484 | seg += n; |
481 | nsegs -= n; | 485 | nsegs -= n; |
482 | } while (nsegs); | 486 | } while (nsegs); |
483 | req->rl_nextseg = seg; | ||
484 | 487 | ||
485 | /* Update count of segments in the Reply chunk */ | 488 | /* Update count of segments in the Reply chunk */ |
486 | *segcount = cpu_to_be32(nchunks); | 489 | *segcount = cpu_to_be32(nchunks); |
@@ -567,6 +570,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
567 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | 570 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
568 | enum rpcrdma_chunktype rtype, wtype; | 571 | enum rpcrdma_chunktype rtype, wtype; |
569 | struct rpcrdma_msg *headerp; | 572 | struct rpcrdma_msg *headerp; |
573 | bool ddp_allowed; | ||
570 | ssize_t hdrlen; | 574 | ssize_t hdrlen; |
571 | size_t rpclen; | 575 | size_t rpclen; |
572 | __be32 *iptr; | 576 | __be32 *iptr; |
@@ -583,6 +587,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
583 | headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_max_requests); | 587 | headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_max_requests); |
584 | headerp->rm_type = rdma_msg; | 588 | headerp->rm_type = rdma_msg; |
585 | 589 | ||
590 | /* When the ULP employs a GSS flavor that guarantees integrity | ||
591 | * or privacy, direct data placement of individual data items | ||
592 | * is not allowed. | ||
593 | */ | ||
594 | ddp_allowed = !(rqst->rq_cred->cr_auth->au_flags & | ||
595 | RPCAUTH_AUTH_DATATOUCH); | ||
596 | |||
586 | /* | 597 | /* |
587 | * Chunks needed for results? | 598 | * Chunks needed for results? |
588 | * | 599 | * |
@@ -594,7 +605,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
594 | */ | 605 | */ |
595 | if (rpcrdma_results_inline(r_xprt, rqst)) | 606 | if (rpcrdma_results_inline(r_xprt, rqst)) |
596 | wtype = rpcrdma_noch; | 607 | wtype = rpcrdma_noch; |
597 | else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) | 608 | else if (ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ) |
598 | wtype = rpcrdma_writech; | 609 | wtype = rpcrdma_writech; |
599 | else | 610 | else |
600 | wtype = rpcrdma_replych; | 611 | wtype = rpcrdma_replych; |
@@ -617,7 +628,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
617 | rtype = rpcrdma_noch; | 628 | rtype = rpcrdma_noch; |
618 | rpcrdma_inline_pullup(rqst); | 629 | rpcrdma_inline_pullup(rqst); |
619 | rpclen = rqst->rq_svec[0].iov_len; | 630 | rpclen = rqst->rq_svec[0].iov_len; |
620 | } else if (rqst->rq_snd_buf.flags & XDRBUF_WRITE) { | 631 | } else if (ddp_allowed && rqst->rq_snd_buf.flags & XDRBUF_WRITE) { |
621 | rtype = rpcrdma_readch; | 632 | rtype = rpcrdma_readch; |
622 | rpclen = rqst->rq_svec[0].iov_len; | 633 | rpclen = rqst->rq_svec[0].iov_len; |
623 | rpclen += rpcrdma_tail_pullup(&rqst->rq_snd_buf); | 634 | rpclen += rpcrdma_tail_pullup(&rqst->rq_snd_buf); |
@@ -650,8 +661,6 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
650 | * send a Call message with a Position Zero Read chunk and a | 661 | * send a Call message with a Position Zero Read chunk and a |
651 | * regular Read chunk at the same time. | 662 | * regular Read chunk at the same time. |
652 | */ | 663 | */ |
653 | req->rl_nchunks = 0; | ||
654 | req->rl_nextseg = req->rl_segments; | ||
655 | iptr = headerp->rm_body.rm_chunks; | 664 | iptr = headerp->rm_body.rm_chunks; |
656 | iptr = rpcrdma_encode_read_list(r_xprt, req, rqst, iptr, rtype); | 665 | iptr = rpcrdma_encode_read_list(r_xprt, req, rqst, iptr, rtype); |
657 | if (IS_ERR(iptr)) | 666 | if (IS_ERR(iptr)) |
@@ -690,10 +699,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
690 | out_overflow: | 699 | out_overflow: |
691 | pr_err("rpcrdma: send overflow: hdrlen %zd rpclen %zu %s/%s\n", | 700 | pr_err("rpcrdma: send overflow: hdrlen %zd rpclen %zu %s/%s\n", |
692 | hdrlen, rpclen, transfertypes[rtype], transfertypes[wtype]); | 701 | hdrlen, rpclen, transfertypes[rtype], transfertypes[wtype]); |
693 | /* Terminate this RPC. Chunks registered above will be | 702 | iptr = ERR_PTR(-EIO); |
694 | * released by xprt_release -> xprt_rmda_free . | ||
695 | */ | ||
696 | return -EIO; | ||
697 | 703 | ||
698 | out_unmap: | 704 | out_unmap: |
699 | r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); | 705 | r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); |
@@ -705,15 +711,13 @@ out_unmap: | |||
705 | * RDMA'd by server. See map at rpcrdma_create_chunks()! :-) | 711 | * RDMA'd by server. See map at rpcrdma_create_chunks()! :-) |
706 | */ | 712 | */ |
707 | static int | 713 | static int |
708 | rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __be32 **iptrp) | 714 | rpcrdma_count_chunks(struct rpcrdma_rep *rep, int wrchunk, __be32 **iptrp) |
709 | { | 715 | { |
710 | unsigned int i, total_len; | 716 | unsigned int i, total_len; |
711 | struct rpcrdma_write_chunk *cur_wchunk; | 717 | struct rpcrdma_write_chunk *cur_wchunk; |
712 | char *base = (char *)rdmab_to_msg(rep->rr_rdmabuf); | 718 | char *base = (char *)rdmab_to_msg(rep->rr_rdmabuf); |
713 | 719 | ||
714 | i = be32_to_cpu(**iptrp); | 720 | i = be32_to_cpu(**iptrp); |
715 | if (i > max) | ||
716 | return -1; | ||
717 | cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1); | 721 | cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1); |
718 | total_len = 0; | 722 | total_len = 0; |
719 | while (i--) { | 723 | while (i--) { |
@@ -744,45 +748,66 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b | |||
744 | return total_len; | 748 | return total_len; |
745 | } | 749 | } |
746 | 750 | ||
747 | /* | 751 | /** |
748 | * Scatter inline received data back into provided iov's. | 752 | * rpcrdma_inline_fixup - Scatter inline received data into rqst's iovecs |
753 | * @rqst: controlling RPC request | ||
754 | * @srcp: points to RPC message payload in receive buffer | ||
755 | * @copy_len: remaining length of receive buffer content | ||
756 | * @pad: Write chunk pad bytes needed (zero for pure inline) | ||
757 | * | ||
758 | * The upper layer has set the maximum number of bytes it can | ||
759 | * receive in each component of rq_rcv_buf. These values are set in | ||
760 | * the head.iov_len, page_len, tail.iov_len, and buflen fields. | ||
761 | * | ||
762 | * Unlike the TCP equivalent (xdr_partial_copy_from_skb), in | ||
763 | * many cases this function simply updates iov_base pointers in | ||
764 | * rq_rcv_buf to point directly to the received reply data, to | ||
765 | * avoid copying reply data. | ||
766 | * | ||
767 | * Returns the count of bytes which had to be memcopied. | ||
749 | */ | 768 | */ |
750 | static void | 769 | static unsigned long |
751 | rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) | 770 | rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) |
752 | { | 771 | { |
753 | int i, npages, curlen, olen; | 772 | unsigned long fixup_copy_count; |
773 | int i, npages, curlen; | ||
754 | char *destp; | 774 | char *destp; |
755 | struct page **ppages; | 775 | struct page **ppages; |
756 | int page_base; | 776 | int page_base; |
757 | 777 | ||
778 | /* The head iovec is redirected to the RPC reply message | ||
779 | * in the receive buffer, to avoid a memcopy. | ||
780 | */ | ||
781 | rqst->rq_rcv_buf.head[0].iov_base = srcp; | ||
782 | rqst->rq_private_buf.head[0].iov_base = srcp; | ||
783 | |||
784 | /* The contents of the receive buffer that follow | ||
785 | * head.iov_len bytes are copied into the page list. | ||
786 | */ | ||
758 | curlen = rqst->rq_rcv_buf.head[0].iov_len; | 787 | curlen = rqst->rq_rcv_buf.head[0].iov_len; |
759 | if (curlen > copy_len) { /* write chunk header fixup */ | 788 | if (curlen > copy_len) |
760 | curlen = copy_len; | 789 | curlen = copy_len; |
761 | rqst->rq_rcv_buf.head[0].iov_len = curlen; | ||
762 | } | ||
763 | |||
764 | dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n", | 790 | dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n", |
765 | __func__, srcp, copy_len, curlen); | 791 | __func__, srcp, copy_len, curlen); |
766 | |||
767 | /* Shift pointer for first receive segment only */ | ||
768 | rqst->rq_rcv_buf.head[0].iov_base = srcp; | ||
769 | srcp += curlen; | 792 | srcp += curlen; |
770 | copy_len -= curlen; | 793 | copy_len -= curlen; |
771 | 794 | ||
772 | olen = copy_len; | ||
773 | i = 0; | ||
774 | rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen; | ||
775 | page_base = rqst->rq_rcv_buf.page_base; | 795 | page_base = rqst->rq_rcv_buf.page_base; |
776 | ppages = rqst->rq_rcv_buf.pages + (page_base >> PAGE_SHIFT); | 796 | ppages = rqst->rq_rcv_buf.pages + (page_base >> PAGE_SHIFT); |
777 | page_base &= ~PAGE_MASK; | 797 | page_base &= ~PAGE_MASK; |
778 | 798 | fixup_copy_count = 0; | |
779 | if (copy_len && rqst->rq_rcv_buf.page_len) { | 799 | if (copy_len && rqst->rq_rcv_buf.page_len) { |
780 | npages = PAGE_ALIGN(page_base + | 800 | int pagelist_len; |
781 | rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT; | 801 | |
782 | for (; i < npages; i++) { | 802 | pagelist_len = rqst->rq_rcv_buf.page_len; |
803 | if (pagelist_len > copy_len) | ||
804 | pagelist_len = copy_len; | ||
805 | npages = PAGE_ALIGN(page_base + pagelist_len) >> PAGE_SHIFT; | ||
806 | for (i = 0; i < npages; i++) { | ||
783 | curlen = PAGE_SIZE - page_base; | 807 | curlen = PAGE_SIZE - page_base; |
784 | if (curlen > copy_len) | 808 | if (curlen > pagelist_len) |
785 | curlen = copy_len; | 809 | curlen = pagelist_len; |
810 | |||
786 | dprintk("RPC: %s: page %d" | 811 | dprintk("RPC: %s: page %d" |
787 | " srcp 0x%p len %d curlen %d\n", | 812 | " srcp 0x%p len %d curlen %d\n", |
788 | __func__, i, srcp, copy_len, curlen); | 813 | __func__, i, srcp, copy_len, curlen); |
@@ -792,39 +817,32 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) | |||
792 | kunmap_atomic(destp); | 817 | kunmap_atomic(destp); |
793 | srcp += curlen; | 818 | srcp += curlen; |
794 | copy_len -= curlen; | 819 | copy_len -= curlen; |
795 | if (copy_len == 0) | 820 | fixup_copy_count += curlen; |
821 | pagelist_len -= curlen; | ||
822 | if (!pagelist_len) | ||
796 | break; | 823 | break; |
797 | page_base = 0; | 824 | page_base = 0; |
798 | } | 825 | } |
799 | } | ||
800 | 826 | ||
801 | if (copy_len && rqst->rq_rcv_buf.tail[0].iov_len) { | 827 | /* Implicit padding for the last segment in a Write |
802 | curlen = copy_len; | 828 | * chunk is inserted inline at the front of the tail |
803 | if (curlen > rqst->rq_rcv_buf.tail[0].iov_len) | 829 | * iovec. The upper layer ignores the content of |
804 | curlen = rqst->rq_rcv_buf.tail[0].iov_len; | 830 | * the pad. Simply ensure inline content in the tail |
805 | if (rqst->rq_rcv_buf.tail[0].iov_base != srcp) | 831 | * that follows the Write chunk is properly aligned. |
806 | memmove(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen); | 832 | */ |
807 | dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n", | 833 | if (pad) |
808 | __func__, srcp, copy_len, curlen); | 834 | srcp -= pad; |
809 | rqst->rq_rcv_buf.tail[0].iov_len = curlen; | ||
810 | copy_len -= curlen; ++i; | ||
811 | } else | ||
812 | rqst->rq_rcv_buf.tail[0].iov_len = 0; | ||
813 | |||
814 | if (pad) { | ||
815 | /* implicit padding on terminal chunk */ | ||
816 | unsigned char *p = rqst->rq_rcv_buf.tail[0].iov_base; | ||
817 | while (pad--) | ||
818 | p[rqst->rq_rcv_buf.tail[0].iov_len++] = 0; | ||
819 | } | 835 | } |
820 | 836 | ||
821 | if (copy_len) | 837 | /* The tail iovec is redirected to the remaining data |
822 | dprintk("RPC: %s: %d bytes in" | 838 | * in the receive buffer, to avoid a memcopy. |
823 | " %d extra segments (%d lost)\n", | 839 | */ |
824 | __func__, olen, i, copy_len); | 840 | if (copy_len || pad) { |
841 | rqst->rq_rcv_buf.tail[0].iov_base = srcp; | ||
842 | rqst->rq_private_buf.tail[0].iov_base = srcp; | ||
843 | } | ||
825 | 844 | ||
826 | /* TBD avoid a warning from call_decode() */ | 845 | return fixup_copy_count; |
827 | rqst->rq_private_buf = rqst->rq_rcv_buf; | ||
828 | } | 846 | } |
829 | 847 | ||
830 | void | 848 | void |
@@ -960,14 +978,13 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
960 | (headerp->rm_body.rm_chunks[1] == xdr_zero && | 978 | (headerp->rm_body.rm_chunks[1] == xdr_zero && |
961 | headerp->rm_body.rm_chunks[2] != xdr_zero) || | 979 | headerp->rm_body.rm_chunks[2] != xdr_zero) || |
962 | (headerp->rm_body.rm_chunks[1] != xdr_zero && | 980 | (headerp->rm_body.rm_chunks[1] != xdr_zero && |
963 | req->rl_nchunks == 0)) | 981 | list_empty(&req->rl_registered))) |
964 | goto badheader; | 982 | goto badheader; |
965 | if (headerp->rm_body.rm_chunks[1] != xdr_zero) { | 983 | if (headerp->rm_body.rm_chunks[1] != xdr_zero) { |
966 | /* count any expected write chunks in read reply */ | 984 | /* count any expected write chunks in read reply */ |
967 | /* start at write chunk array count */ | 985 | /* start at write chunk array count */ |
968 | iptr = &headerp->rm_body.rm_chunks[2]; | 986 | iptr = &headerp->rm_body.rm_chunks[2]; |
969 | rdmalen = rpcrdma_count_chunks(rep, | 987 | rdmalen = rpcrdma_count_chunks(rep, 1, &iptr); |
970 | req->rl_nchunks, 1, &iptr); | ||
971 | /* check for validity, and no reply chunk after */ | 988 | /* check for validity, and no reply chunk after */ |
972 | if (rdmalen < 0 || *iptr++ != xdr_zero) | 989 | if (rdmalen < 0 || *iptr++ != xdr_zero) |
973 | goto badheader; | 990 | goto badheader; |
@@ -988,8 +1005,10 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
988 | rep->rr_len -= RPCRDMA_HDRLEN_MIN; | 1005 | rep->rr_len -= RPCRDMA_HDRLEN_MIN; |
989 | status = rep->rr_len; | 1006 | status = rep->rr_len; |
990 | } | 1007 | } |
991 | /* Fix up the rpc results for upper layer */ | 1008 | |
992 | rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen); | 1009 | r_xprt->rx_stats.fixup_copy_count += |
1010 | rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, | ||
1011 | rdmalen); | ||
993 | break; | 1012 | break; |
994 | 1013 | ||
995 | case rdma_nomsg: | 1014 | case rdma_nomsg: |
@@ -997,11 +1016,11 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
997 | if (headerp->rm_body.rm_chunks[0] != xdr_zero || | 1016 | if (headerp->rm_body.rm_chunks[0] != xdr_zero || |
998 | headerp->rm_body.rm_chunks[1] != xdr_zero || | 1017 | headerp->rm_body.rm_chunks[1] != xdr_zero || |
999 | headerp->rm_body.rm_chunks[2] != xdr_one || | 1018 | headerp->rm_body.rm_chunks[2] != xdr_one || |
1000 | req->rl_nchunks == 0) | 1019 | list_empty(&req->rl_registered)) |
1001 | goto badheader; | 1020 | goto badheader; |
1002 | iptr = (__be32 *)((unsigned char *)headerp + | 1021 | iptr = (__be32 *)((unsigned char *)headerp + |
1003 | RPCRDMA_HDRLEN_MIN); | 1022 | RPCRDMA_HDRLEN_MIN); |
1004 | rdmalen = rpcrdma_count_chunks(rep, req->rl_nchunks, 0, &iptr); | 1023 | rdmalen = rpcrdma_count_chunks(rep, 0, &iptr); |
1005 | if (rdmalen < 0) | 1024 | if (rdmalen < 0) |
1006 | goto badheader; | 1025 | goto badheader; |
1007 | r_xprt->rx_stats.total_rdma_reply += rdmalen; | 1026 | r_xprt->rx_stats.total_rdma_reply += rdmalen; |
@@ -1014,14 +1033,9 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
1014 | 1033 | ||
1015 | badheader: | 1034 | badheader: |
1016 | default: | 1035 | default: |
1017 | dprintk("%s: invalid rpcrdma reply header (type %d):" | 1036 | dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n", |
1018 | " chunks[012] == %d %d %d" | 1037 | rqst->rq_task->tk_pid, __func__, |
1019 | " expected chunks <= %d\n", | 1038 | be32_to_cpu(headerp->rm_type)); |
1020 | __func__, be32_to_cpu(headerp->rm_type), | ||
1021 | headerp->rm_body.rm_chunks[0], | ||
1022 | headerp->rm_body.rm_chunks[1], | ||
1023 | headerp->rm_body.rm_chunks[2], | ||
1024 | req->rl_nchunks); | ||
1025 | status = -EIO; | 1039 | status = -EIO; |
1026 | r_xprt->rx_stats.bad_reply_count++; | 1040 | r_xprt->rx_stats.bad_reply_count++; |
1027 | break; | 1041 | break; |
@@ -1035,7 +1049,7 @@ out: | |||
1035 | * control: waking the next RPC waits until this RPC has | 1049 | * control: waking the next RPC waits until this RPC has |
1036 | * relinquished all its Send Queue entries. | 1050 | * relinquished all its Send Queue entries. |
1037 | */ | 1051 | */ |
1038 | if (req->rl_nchunks) | 1052 | if (!list_empty(&req->rl_registered)) |
1039 | r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req); | 1053 | r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req); |
1040 | 1054 | ||
1041 | spin_lock_bh(&xprt->transport_lock); | 1055 | spin_lock_bh(&xprt->transport_lock); |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 99d2e5b72726..81f0e879f019 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -558,7 +558,6 @@ out_sendbuf: | |||
558 | 558 | ||
559 | out_fail: | 559 | out_fail: |
560 | rpcrdma_buffer_put(req); | 560 | rpcrdma_buffer_put(req); |
561 | r_xprt->rx_stats.failed_marshal_count++; | ||
562 | return NULL; | 561 | return NULL; |
563 | } | 562 | } |
564 | 563 | ||
@@ -590,8 +589,19 @@ xprt_rdma_free(void *buffer) | |||
590 | rpcrdma_buffer_put(req); | 589 | rpcrdma_buffer_put(req); |
591 | } | 590 | } |
592 | 591 | ||
593 | /* | 592 | /** |
593 | * xprt_rdma_send_request - marshal and send an RPC request | ||
594 | * @task: RPC task with an RPC message in rq_snd_buf | ||
595 | * | ||
596 | * Return values: | ||
597 | * 0: The request has been sent | ||
598 | * ENOTCONN: Caller needs to invoke connect logic then call again | ||
599 | * ENOBUFS: Call again later to send the request | ||
600 | * EIO: A permanent error occurred. The request was not sent, | ||
601 | * and don't try it again | ||
602 | * | ||
594 | * send_request invokes the meat of RPC RDMA. It must do the following: | 603 | * send_request invokes the meat of RPC RDMA. It must do the following: |
604 | * | ||
595 | * 1. Marshal the RPC request into an RPC RDMA request, which means | 605 | * 1. Marshal the RPC request into an RPC RDMA request, which means |
596 | * putting a header in front of data, and creating IOVs for RDMA | 606 | * putting a header in front of data, and creating IOVs for RDMA |
597 | * from those in the request. | 607 | * from those in the request. |
@@ -600,7 +610,6 @@ xprt_rdma_free(void *buffer) | |||
600 | * the request (rpcrdma_ep_post). | 610 | * the request (rpcrdma_ep_post). |
601 | * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP). | 611 | * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP). |
602 | */ | 612 | */ |
603 | |||
604 | static int | 613 | static int |
605 | xprt_rdma_send_request(struct rpc_task *task) | 614 | xprt_rdma_send_request(struct rpc_task *task) |
606 | { | 615 | { |
@@ -610,6 +619,9 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
610 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 619 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
611 | int rc = 0; | 620 | int rc = 0; |
612 | 621 | ||
622 | /* On retransmit, remove any previously registered chunks */ | ||
623 | r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); | ||
624 | |||
613 | rc = rpcrdma_marshal_req(rqst); | 625 | rc = rpcrdma_marshal_req(rqst); |
614 | if (rc < 0) | 626 | if (rc < 0) |
615 | goto failed_marshal; | 627 | goto failed_marshal; |
@@ -630,11 +642,12 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
630 | return 0; | 642 | return 0; |
631 | 643 | ||
632 | failed_marshal: | 644 | failed_marshal: |
633 | r_xprt->rx_stats.failed_marshal_count++; | ||
634 | dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n", | 645 | dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n", |
635 | __func__, rc); | 646 | __func__, rc); |
636 | if (rc == -EIO) | 647 | if (rc == -EIO) |
637 | return -EIO; | 648 | r_xprt->rx_stats.failed_marshal_count++; |
649 | if (rc != -ENOTCONN) | ||
650 | return rc; | ||
638 | drop_connection: | 651 | drop_connection: |
639 | xprt_disconnect_done(xprt); | 652 | xprt_disconnect_done(xprt); |
640 | return -ENOTCONN; /* implies disconnect */ | 653 | return -ENOTCONN; /* implies disconnect */ |
@@ -660,7 +673,7 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) | |||
660 | xprt->stat.bad_xids, | 673 | xprt->stat.bad_xids, |
661 | xprt->stat.req_u, | 674 | xprt->stat.req_u, |
662 | xprt->stat.bklog_u); | 675 | xprt->stat.bklog_u); |
663 | seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %lu %lu %lu %lu\n", | 676 | seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %lu %lu %lu %lu ", |
664 | r_xprt->rx_stats.read_chunk_count, | 677 | r_xprt->rx_stats.read_chunk_count, |
665 | r_xprt->rx_stats.write_chunk_count, | 678 | r_xprt->rx_stats.write_chunk_count, |
666 | r_xprt->rx_stats.reply_chunk_count, | 679 | r_xprt->rx_stats.reply_chunk_count, |
@@ -672,6 +685,10 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) | |||
672 | r_xprt->rx_stats.failed_marshal_count, | 685 | r_xprt->rx_stats.failed_marshal_count, |
673 | r_xprt->rx_stats.bad_reply_count, | 686 | r_xprt->rx_stats.bad_reply_count, |
674 | r_xprt->rx_stats.nomsg_call_count); | 687 | r_xprt->rx_stats.nomsg_call_count); |
688 | seq_printf(seq, "%lu %lu %lu\n", | ||
689 | r_xprt->rx_stats.mrs_recovered, | ||
690 | r_xprt->rx_stats.mrs_orphaned, | ||
691 | r_xprt->rx_stats.mrs_allocated); | ||
675 | } | 692 | } |
676 | 693 | ||
677 | static int | 694 | static int |
@@ -741,7 +758,6 @@ void xprt_rdma_cleanup(void) | |||
741 | __func__, rc); | 758 | __func__, rc); |
742 | 759 | ||
743 | rpcrdma_destroy_wq(); | 760 | rpcrdma_destroy_wq(); |
744 | frwr_destroy_recovery_wq(); | ||
745 | 761 | ||
746 | rc = xprt_unregister_transport(&xprt_rdma_bc); | 762 | rc = xprt_unregister_transport(&xprt_rdma_bc); |
747 | if (rc) | 763 | if (rc) |
@@ -753,20 +769,13 @@ int xprt_rdma_init(void) | |||
753 | { | 769 | { |
754 | int rc; | 770 | int rc; |
755 | 771 | ||
756 | rc = frwr_alloc_recovery_wq(); | ||
757 | if (rc) | ||
758 | return rc; | ||
759 | |||
760 | rc = rpcrdma_alloc_wq(); | 772 | rc = rpcrdma_alloc_wq(); |
761 | if (rc) { | 773 | if (rc) |
762 | frwr_destroy_recovery_wq(); | ||
763 | return rc; | 774 | return rc; |
764 | } | ||
765 | 775 | ||
766 | rc = xprt_register_transport(&xprt_rdma); | 776 | rc = xprt_register_transport(&xprt_rdma); |
767 | if (rc) { | 777 | if (rc) { |
768 | rpcrdma_destroy_wq(); | 778 | rpcrdma_destroy_wq(); |
769 | frwr_destroy_recovery_wq(); | ||
770 | return rc; | 779 | return rc; |
771 | } | 780 | } |
772 | 781 | ||
@@ -774,7 +783,6 @@ int xprt_rdma_init(void) | |||
774 | if (rc) { | 783 | if (rc) { |
775 | xprt_unregister_transport(&xprt_rdma); | 784 | xprt_unregister_transport(&xprt_rdma); |
776 | rpcrdma_destroy_wq(); | 785 | rpcrdma_destroy_wq(); |
777 | frwr_destroy_recovery_wq(); | ||
778 | return rc; | 786 | return rc; |
779 | } | 787 | } |
780 | 788 | ||
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index b044d98a1370..536d0be3f61b 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -379,8 +379,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
379 | struct rpcrdma_ia *ia = &xprt->rx_ia; | 379 | struct rpcrdma_ia *ia = &xprt->rx_ia; |
380 | int rc; | 380 | int rc; |
381 | 381 | ||
382 | ia->ri_dma_mr = NULL; | ||
383 | |||
384 | ia->ri_id = rpcrdma_create_id(xprt, ia, addr); | 382 | ia->ri_id = rpcrdma_create_id(xprt, ia, addr); |
385 | if (IS_ERR(ia->ri_id)) { | 383 | if (IS_ERR(ia->ri_id)) { |
386 | rc = PTR_ERR(ia->ri_id); | 384 | rc = PTR_ERR(ia->ri_id); |
@@ -391,47 +389,29 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
391 | ia->ri_pd = ib_alloc_pd(ia->ri_device); | 389 | ia->ri_pd = ib_alloc_pd(ia->ri_device); |
392 | if (IS_ERR(ia->ri_pd)) { | 390 | if (IS_ERR(ia->ri_pd)) { |
393 | rc = PTR_ERR(ia->ri_pd); | 391 | rc = PTR_ERR(ia->ri_pd); |
394 | dprintk("RPC: %s: ib_alloc_pd() failed %i\n", | 392 | pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc); |
395 | __func__, rc); | ||
396 | goto out2; | 393 | goto out2; |
397 | } | 394 | } |
398 | 395 | ||
399 | if (memreg == RPCRDMA_FRMR) { | ||
400 | if (!(ia->ri_device->attrs.device_cap_flags & | ||
401 | IB_DEVICE_MEM_MGT_EXTENSIONS) || | ||
402 | (ia->ri_device->attrs.max_fast_reg_page_list_len == 0)) { | ||
403 | dprintk("RPC: %s: FRMR registration " | ||
404 | "not supported by HCA\n", __func__); | ||
405 | memreg = RPCRDMA_MTHCAFMR; | ||
406 | } | ||
407 | } | ||
408 | if (memreg == RPCRDMA_MTHCAFMR) { | ||
409 | if (!ia->ri_device->alloc_fmr) { | ||
410 | dprintk("RPC: %s: MTHCAFMR registration " | ||
411 | "not supported by HCA\n", __func__); | ||
412 | rc = -EINVAL; | ||
413 | goto out3; | ||
414 | } | ||
415 | } | ||
416 | |||
417 | switch (memreg) { | 396 | switch (memreg) { |
418 | case RPCRDMA_FRMR: | 397 | case RPCRDMA_FRMR: |
419 | ia->ri_ops = &rpcrdma_frwr_memreg_ops; | 398 | if (frwr_is_supported(ia)) { |
420 | break; | 399 | ia->ri_ops = &rpcrdma_frwr_memreg_ops; |
421 | case RPCRDMA_ALLPHYSICAL: | 400 | break; |
422 | ia->ri_ops = &rpcrdma_physical_memreg_ops; | 401 | } |
423 | break; | 402 | /*FALLTHROUGH*/ |
424 | case RPCRDMA_MTHCAFMR: | 403 | case RPCRDMA_MTHCAFMR: |
425 | ia->ri_ops = &rpcrdma_fmr_memreg_ops; | 404 | if (fmr_is_supported(ia)) { |
426 | break; | 405 | ia->ri_ops = &rpcrdma_fmr_memreg_ops; |
406 | break; | ||
407 | } | ||
408 | /*FALLTHROUGH*/ | ||
427 | default: | 409 | default: |
428 | printk(KERN_ERR "RPC: Unsupported memory " | 410 | pr_err("rpcrdma: Unsupported memory registration mode: %d\n", |
429 | "registration mode: %d\n", memreg); | 411 | memreg); |
430 | rc = -ENOMEM; | 412 | rc = -EINVAL; |
431 | goto out3; | 413 | goto out3; |
432 | } | 414 | } |
433 | dprintk("RPC: %s: memory registration strategy is '%s'\n", | ||
434 | __func__, ia->ri_ops->ro_displayname); | ||
435 | 415 | ||
436 | return 0; | 416 | return 0; |
437 | 417 | ||
@@ -585,8 +565,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
585 | out2: | 565 | out2: |
586 | ib_free_cq(sendcq); | 566 | ib_free_cq(sendcq); |
587 | out1: | 567 | out1: |
588 | if (ia->ri_dma_mr) | ||
589 | ib_dereg_mr(ia->ri_dma_mr); | ||
590 | return rc; | 568 | return rc; |
591 | } | 569 | } |
592 | 570 | ||
@@ -600,8 +578,6 @@ out1: | |||
600 | void | 578 | void |
601 | rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | 579 | rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) |
602 | { | 580 | { |
603 | int rc; | ||
604 | |||
605 | dprintk("RPC: %s: entering, connected is %d\n", | 581 | dprintk("RPC: %s: entering, connected is %d\n", |
606 | __func__, ep->rep_connected); | 582 | __func__, ep->rep_connected); |
607 | 583 | ||
@@ -615,12 +591,6 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
615 | 591 | ||
616 | ib_free_cq(ep->rep_attr.recv_cq); | 592 | ib_free_cq(ep->rep_attr.recv_cq); |
617 | ib_free_cq(ep->rep_attr.send_cq); | 593 | ib_free_cq(ep->rep_attr.send_cq); |
618 | |||
619 | if (ia->ri_dma_mr) { | ||
620 | rc = ib_dereg_mr(ia->ri_dma_mr); | ||
621 | dprintk("RPC: %s: ib_dereg_mr returned %i\n", | ||
622 | __func__, rc); | ||
623 | } | ||
624 | } | 594 | } |
625 | 595 | ||
626 | /* | 596 | /* |
@@ -777,6 +747,90 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
777 | ib_drain_qp(ia->ri_id->qp); | 747 | ib_drain_qp(ia->ri_id->qp); |
778 | } | 748 | } |
779 | 749 | ||
750 | static void | ||
751 | rpcrdma_mr_recovery_worker(struct work_struct *work) | ||
752 | { | ||
753 | struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, | ||
754 | rb_recovery_worker.work); | ||
755 | struct rpcrdma_mw *mw; | ||
756 | |||
757 | spin_lock(&buf->rb_recovery_lock); | ||
758 | while (!list_empty(&buf->rb_stale_mrs)) { | ||
759 | mw = list_first_entry(&buf->rb_stale_mrs, | ||
760 | struct rpcrdma_mw, mw_list); | ||
761 | list_del_init(&mw->mw_list); | ||
762 | spin_unlock(&buf->rb_recovery_lock); | ||
763 | |||
764 | dprintk("RPC: %s: recovering MR %p\n", __func__, mw); | ||
765 | mw->mw_xprt->rx_ia.ri_ops->ro_recover_mr(mw); | ||
766 | |||
767 | spin_lock(&buf->rb_recovery_lock); | ||
768 | } | ||
769 | spin_unlock(&buf->rb_recovery_lock); | ||
770 | } | ||
771 | |||
772 | void | ||
773 | rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw) | ||
774 | { | ||
775 | struct rpcrdma_xprt *r_xprt = mw->mw_xprt; | ||
776 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
777 | |||
778 | spin_lock(&buf->rb_recovery_lock); | ||
779 | list_add(&mw->mw_list, &buf->rb_stale_mrs); | ||
780 | spin_unlock(&buf->rb_recovery_lock); | ||
781 | |||
782 | schedule_delayed_work(&buf->rb_recovery_worker, 0); | ||
783 | } | ||
784 | |||
785 | static void | ||
786 | rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt) | ||
787 | { | ||
788 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
789 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
790 | unsigned int count; | ||
791 | LIST_HEAD(free); | ||
792 | LIST_HEAD(all); | ||
793 | |||
794 | for (count = 0; count < 32; count++) { | ||
795 | struct rpcrdma_mw *mw; | ||
796 | int rc; | ||
797 | |||
798 | mw = kzalloc(sizeof(*mw), GFP_KERNEL); | ||
799 | if (!mw) | ||
800 | break; | ||
801 | |||
802 | rc = ia->ri_ops->ro_init_mr(ia, mw); | ||
803 | if (rc) { | ||
804 | kfree(mw); | ||
805 | break; | ||
806 | } | ||
807 | |||
808 | mw->mw_xprt = r_xprt; | ||
809 | |||
810 | list_add(&mw->mw_list, &free); | ||
811 | list_add(&mw->mw_all, &all); | ||
812 | } | ||
813 | |||
814 | spin_lock(&buf->rb_mwlock); | ||
815 | list_splice(&free, &buf->rb_mws); | ||
816 | list_splice(&all, &buf->rb_all); | ||
817 | r_xprt->rx_stats.mrs_allocated += count; | ||
818 | spin_unlock(&buf->rb_mwlock); | ||
819 | |||
820 | dprintk("RPC: %s: created %u MRs\n", __func__, count); | ||
821 | } | ||
822 | |||
823 | static void | ||
824 | rpcrdma_mr_refresh_worker(struct work_struct *work) | ||
825 | { | ||
826 | struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, | ||
827 | rb_refresh_worker.work); | ||
828 | struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, | ||
829 | rx_buf); | ||
830 | |||
831 | rpcrdma_create_mrs(r_xprt); | ||
832 | } | ||
833 | |||
780 | struct rpcrdma_req * | 834 | struct rpcrdma_req * |
781 | rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) | 835 | rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) |
782 | { | 836 | { |
@@ -793,6 +847,7 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) | |||
793 | spin_unlock(&buffer->rb_reqslock); | 847 | spin_unlock(&buffer->rb_reqslock); |
794 | req->rl_cqe.done = rpcrdma_wc_send; | 848 | req->rl_cqe.done = rpcrdma_wc_send; |
795 | req->rl_buffer = &r_xprt->rx_buf; | 849 | req->rl_buffer = &r_xprt->rx_buf; |
850 | INIT_LIST_HEAD(&req->rl_registered); | ||
796 | return req; | 851 | return req; |
797 | } | 852 | } |
798 | 853 | ||
@@ -832,17 +887,23 @@ int | |||
832 | rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | 887 | rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) |
833 | { | 888 | { |
834 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | 889 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
835 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
836 | int i, rc; | 890 | int i, rc; |
837 | 891 | ||
838 | buf->rb_max_requests = r_xprt->rx_data.max_requests; | 892 | buf->rb_max_requests = r_xprt->rx_data.max_requests; |
839 | buf->rb_bc_srv_max_requests = 0; | 893 | buf->rb_bc_srv_max_requests = 0; |
840 | spin_lock_init(&buf->rb_lock); | ||
841 | atomic_set(&buf->rb_credits, 1); | 894 | atomic_set(&buf->rb_credits, 1); |
895 | spin_lock_init(&buf->rb_mwlock); | ||
896 | spin_lock_init(&buf->rb_lock); | ||
897 | spin_lock_init(&buf->rb_recovery_lock); | ||
898 | INIT_LIST_HEAD(&buf->rb_mws); | ||
899 | INIT_LIST_HEAD(&buf->rb_all); | ||
900 | INIT_LIST_HEAD(&buf->rb_stale_mrs); | ||
901 | INIT_DELAYED_WORK(&buf->rb_refresh_worker, | ||
902 | rpcrdma_mr_refresh_worker); | ||
903 | INIT_DELAYED_WORK(&buf->rb_recovery_worker, | ||
904 | rpcrdma_mr_recovery_worker); | ||
842 | 905 | ||
843 | rc = ia->ri_ops->ro_init(r_xprt); | 906 | rpcrdma_create_mrs(r_xprt); |
844 | if (rc) | ||
845 | goto out; | ||
846 | 907 | ||
847 | INIT_LIST_HEAD(&buf->rb_send_bufs); | 908 | INIT_LIST_HEAD(&buf->rb_send_bufs); |
848 | INIT_LIST_HEAD(&buf->rb_allreqs); | 909 | INIT_LIST_HEAD(&buf->rb_allreqs); |
@@ -862,7 +923,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | |||
862 | } | 923 | } |
863 | 924 | ||
864 | INIT_LIST_HEAD(&buf->rb_recv_bufs); | 925 | INIT_LIST_HEAD(&buf->rb_recv_bufs); |
865 | for (i = 0; i < buf->rb_max_requests + 2; i++) { | 926 | for (i = 0; i < buf->rb_max_requests; i++) { |
866 | struct rpcrdma_rep *rep; | 927 | struct rpcrdma_rep *rep; |
867 | 928 | ||
868 | rep = rpcrdma_create_rep(r_xprt); | 929 | rep = rpcrdma_create_rep(r_xprt); |
@@ -918,11 +979,39 @@ rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req) | |||
918 | kfree(req); | 979 | kfree(req); |
919 | } | 980 | } |
920 | 981 | ||
982 | static void | ||
983 | rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf) | ||
984 | { | ||
985 | struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, | ||
986 | rx_buf); | ||
987 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); | ||
988 | struct rpcrdma_mw *mw; | ||
989 | unsigned int count; | ||
990 | |||
991 | count = 0; | ||
992 | spin_lock(&buf->rb_mwlock); | ||
993 | while (!list_empty(&buf->rb_all)) { | ||
994 | mw = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
995 | list_del(&mw->mw_all); | ||
996 | |||
997 | spin_unlock(&buf->rb_mwlock); | ||
998 | ia->ri_ops->ro_release_mr(mw); | ||
999 | count++; | ||
1000 | spin_lock(&buf->rb_mwlock); | ||
1001 | } | ||
1002 | spin_unlock(&buf->rb_mwlock); | ||
1003 | r_xprt->rx_stats.mrs_allocated = 0; | ||
1004 | |||
1005 | dprintk("RPC: %s: released %u MRs\n", __func__, count); | ||
1006 | } | ||
1007 | |||
921 | void | 1008 | void |
922 | rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | 1009 | rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) |
923 | { | 1010 | { |
924 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); | 1011 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); |
925 | 1012 | ||
1013 | cancel_delayed_work_sync(&buf->rb_recovery_worker); | ||
1014 | |||
926 | while (!list_empty(&buf->rb_recv_bufs)) { | 1015 | while (!list_empty(&buf->rb_recv_bufs)) { |
927 | struct rpcrdma_rep *rep; | 1016 | struct rpcrdma_rep *rep; |
928 | 1017 | ||
@@ -944,7 +1033,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
944 | } | 1033 | } |
945 | spin_unlock(&buf->rb_reqslock); | 1034 | spin_unlock(&buf->rb_reqslock); |
946 | 1035 | ||
947 | ia->ri_ops->ro_destroy(buf); | 1036 | rpcrdma_destroy_mrs(buf); |
948 | } | 1037 | } |
949 | 1038 | ||
950 | struct rpcrdma_mw * | 1039 | struct rpcrdma_mw * |
@@ -962,8 +1051,17 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt) | |||
962 | spin_unlock(&buf->rb_mwlock); | 1051 | spin_unlock(&buf->rb_mwlock); |
963 | 1052 | ||
964 | if (!mw) | 1053 | if (!mw) |
965 | pr_err("RPC: %s: no MWs available\n", __func__); | 1054 | goto out_nomws; |
966 | return mw; | 1055 | return mw; |
1056 | |||
1057 | out_nomws: | ||
1058 | dprintk("RPC: %s: no MWs available\n", __func__); | ||
1059 | schedule_delayed_work(&buf->rb_refresh_worker, 0); | ||
1060 | |||
1061 | /* Allow the reply handler and refresh worker to run */ | ||
1062 | cond_resched(); | ||
1063 | |||
1064 | return NULL; | ||
967 | } | 1065 | } |
968 | 1066 | ||
969 | void | 1067 | void |
@@ -978,8 +1076,6 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) | |||
978 | 1076 | ||
979 | /* | 1077 | /* |
980 | * Get a set of request/reply buffers. | 1078 | * Get a set of request/reply buffers. |
981 | * | ||
982 | * Reply buffer (if available) is attached to send buffer upon return. | ||
983 | */ | 1079 | */ |
984 | struct rpcrdma_req * | 1080 | struct rpcrdma_req * |
985 | rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) | 1081 | rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) |
@@ -998,13 +1094,13 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) | |||
998 | 1094 | ||
999 | out_reqbuf: | 1095 | out_reqbuf: |
1000 | spin_unlock(&buffers->rb_lock); | 1096 | spin_unlock(&buffers->rb_lock); |
1001 | pr_warn("RPC: %s: out of request buffers\n", __func__); | 1097 | pr_warn("rpcrdma: out of request buffers (%p)\n", buffers); |
1002 | return NULL; | 1098 | return NULL; |
1003 | out_repbuf: | 1099 | out_repbuf: |
1100 | list_add(&req->rl_free, &buffers->rb_send_bufs); | ||
1004 | spin_unlock(&buffers->rb_lock); | 1101 | spin_unlock(&buffers->rb_lock); |
1005 | pr_warn("RPC: %s: out of reply buffers\n", __func__); | 1102 | pr_warn("rpcrdma: out of reply buffers (%p)\n", buffers); |
1006 | req->rl_reply = NULL; | 1103 | return NULL; |
1007 | return req; | ||
1008 | } | 1104 | } |
1009 | 1105 | ||
1010 | /* | 1106 | /* |
@@ -1060,14 +1156,6 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) | |||
1060 | * Wrappers for internal-use kmalloc memory registration, used by buffer code. | 1156 | * Wrappers for internal-use kmalloc memory registration, used by buffer code. |
1061 | */ | 1157 | */ |
1062 | 1158 | ||
1063 | void | ||
1064 | rpcrdma_mapping_error(struct rpcrdma_mr_seg *seg) | ||
1065 | { | ||
1066 | dprintk("RPC: map_one: offset %p iova %llx len %zu\n", | ||
1067 | seg->mr_offset, | ||
1068 | (unsigned long long)seg->mr_dma, seg->mr_dmalen); | ||
1069 | } | ||
1070 | |||
1071 | /** | 1159 | /** |
1072 | * rpcrdma_alloc_regbuf - kmalloc and register memory for SEND/RECV buffers | 1160 | * rpcrdma_alloc_regbuf - kmalloc and register memory for SEND/RECV buffers |
1073 | * @ia: controlling rpcrdma_ia | 1161 | * @ia: controlling rpcrdma_ia |
@@ -1150,7 +1238,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, | |||
1150 | if (rep) { | 1238 | if (rep) { |
1151 | rc = rpcrdma_ep_post_recv(ia, ep, rep); | 1239 | rc = rpcrdma_ep_post_recv(ia, ep, rep); |
1152 | if (rc) | 1240 | if (rc) |
1153 | goto out; | 1241 | return rc; |
1154 | req->rl_reply = NULL; | 1242 | req->rl_reply = NULL; |
1155 | } | 1243 | } |
1156 | 1244 | ||
@@ -1175,10 +1263,12 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, | |||
1175 | 1263 | ||
1176 | rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail); | 1264 | rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail); |
1177 | if (rc) | 1265 | if (rc) |
1178 | dprintk("RPC: %s: ib_post_send returned %i\n", __func__, | 1266 | goto out_postsend_err; |
1179 | rc); | 1267 | return 0; |
1180 | out: | 1268 | |
1181 | return rc; | 1269 | out_postsend_err: |
1270 | pr_err("rpcrdma: RDMA Send ib_post_send returned %i\n", rc); | ||
1271 | return -ENOTCONN; | ||
1182 | } | 1272 | } |
1183 | 1273 | ||
1184 | /* | 1274 | /* |
@@ -1203,11 +1293,13 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, | |||
1203 | DMA_BIDIRECTIONAL); | 1293 | DMA_BIDIRECTIONAL); |
1204 | 1294 | ||
1205 | rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); | 1295 | rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); |
1206 | |||
1207 | if (rc) | 1296 | if (rc) |
1208 | dprintk("RPC: %s: ib_post_recv returned %i\n", __func__, | 1297 | goto out_postrecv; |
1209 | rc); | 1298 | return 0; |
1210 | return rc; | 1299 | |
1300 | out_postrecv: | ||
1301 | pr_err("rpcrdma: ib_post_recv returned %i\n", rc); | ||
1302 | return -ENOTCONN; | ||
1211 | } | 1303 | } |
1212 | 1304 | ||
1213 | /** | 1305 | /** |
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 95cdc66225ee..670fad57153a 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -68,7 +68,6 @@ struct rpcrdma_ia { | |||
68 | struct ib_device *ri_device; | 68 | struct ib_device *ri_device; |
69 | struct rdma_cm_id *ri_id; | 69 | struct rdma_cm_id *ri_id; |
70 | struct ib_pd *ri_pd; | 70 | struct ib_pd *ri_pd; |
71 | struct ib_mr *ri_dma_mr; | ||
72 | struct completion ri_done; | 71 | struct completion ri_done; |
73 | int ri_async_rc; | 72 | int ri_async_rc; |
74 | unsigned int ri_max_frmr_depth; | 73 | unsigned int ri_max_frmr_depth; |
@@ -172,23 +171,14 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb) | |||
172 | * o recv buffer (posted to provider) | 171 | * o recv buffer (posted to provider) |
173 | * o ib_sge (also donated to provider) | 172 | * o ib_sge (also donated to provider) |
174 | * o status of reply (length, success or not) | 173 | * o status of reply (length, success or not) |
175 | * o bookkeeping state to get run by tasklet (list, etc) | 174 | * o bookkeeping state to get run by reply handler (list, etc) |
176 | * | 175 | * |
177 | * These are allocated during initialization, per-transport instance; | 176 | * These are allocated during initialization, per-transport instance. |
178 | * however, the tasklet execution list itself is global, as it should | ||
179 | * always be pretty short. | ||
180 | * | 177 | * |
181 | * N of these are associated with a transport instance, and stored in | 178 | * N of these are associated with a transport instance, and stored in |
182 | * struct rpcrdma_buffer. N is the max number of outstanding requests. | 179 | * struct rpcrdma_buffer. N is the max number of outstanding requests. |
183 | */ | 180 | */ |
184 | 181 | ||
185 | #define RPCRDMA_MAX_DATA_SEGS ((1 * 1024 * 1024) / PAGE_SIZE) | ||
186 | |||
187 | /* data segments + head/tail for Call + head/tail for Reply */ | ||
188 | #define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 4) | ||
189 | |||
190 | struct rpcrdma_buffer; | ||
191 | |||
192 | struct rpcrdma_rep { | 182 | struct rpcrdma_rep { |
193 | struct ib_cqe rr_cqe; | 183 | struct ib_cqe rr_cqe; |
194 | unsigned int rr_len; | 184 | unsigned int rr_len; |
@@ -221,9 +211,6 @@ enum rpcrdma_frmr_state { | |||
221 | }; | 211 | }; |
222 | 212 | ||
223 | struct rpcrdma_frmr { | 213 | struct rpcrdma_frmr { |
224 | struct scatterlist *fr_sg; | ||
225 | int fr_nents; | ||
226 | enum dma_data_direction fr_dir; | ||
227 | struct ib_mr *fr_mr; | 214 | struct ib_mr *fr_mr; |
228 | struct ib_cqe fr_cqe; | 215 | struct ib_cqe fr_cqe; |
229 | enum rpcrdma_frmr_state fr_state; | 216 | enum rpcrdma_frmr_state fr_state; |
@@ -235,18 +222,23 @@ struct rpcrdma_frmr { | |||
235 | }; | 222 | }; |
236 | 223 | ||
237 | struct rpcrdma_fmr { | 224 | struct rpcrdma_fmr { |
238 | struct ib_fmr *fmr; | 225 | struct ib_fmr *fm_mr; |
239 | u64 *physaddrs; | 226 | u64 *fm_physaddrs; |
240 | }; | 227 | }; |
241 | 228 | ||
242 | struct rpcrdma_mw { | 229 | struct rpcrdma_mw { |
230 | struct list_head mw_list; | ||
231 | struct scatterlist *mw_sg; | ||
232 | int mw_nents; | ||
233 | enum dma_data_direction mw_dir; | ||
243 | union { | 234 | union { |
244 | struct rpcrdma_fmr fmr; | 235 | struct rpcrdma_fmr fmr; |
245 | struct rpcrdma_frmr frmr; | 236 | struct rpcrdma_frmr frmr; |
246 | }; | 237 | }; |
247 | struct work_struct mw_work; | ||
248 | struct rpcrdma_xprt *mw_xprt; | 238 | struct rpcrdma_xprt *mw_xprt; |
249 | struct list_head mw_list; | 239 | u32 mw_handle; |
240 | u32 mw_length; | ||
241 | u64 mw_offset; | ||
250 | struct list_head mw_all; | 242 | struct list_head mw_all; |
251 | }; | 243 | }; |
252 | 244 | ||
@@ -266,33 +258,30 @@ struct rpcrdma_mw { | |||
266 | * of iovs for send operations. The reason is that the iovs passed to | 258 | * of iovs for send operations. The reason is that the iovs passed to |
267 | * ib_post_{send,recv} must not be modified until the work request | 259 | * ib_post_{send,recv} must not be modified until the work request |
268 | * completes. | 260 | * completes. |
269 | * | ||
270 | * NOTES: | ||
271 | * o RPCRDMA_MAX_SEGS is the max number of addressible chunk elements we | ||
272 | * marshal. The number needed varies depending on the iov lists that | ||
273 | * are passed to us, the memory registration mode we are in, and if | ||
274 | * physical addressing is used, the layout. | ||
275 | */ | 261 | */ |
276 | 262 | ||
263 | /* Maximum number of page-sized "segments" per chunk list to be | ||
264 | * registered or invalidated. Must handle a Reply chunk: | ||
265 | */ | ||
266 | enum { | ||
267 | RPCRDMA_MAX_IOV_SEGS = 3, | ||
268 | RPCRDMA_MAX_DATA_SEGS = ((1 * 1024 * 1024) / PAGE_SIZE) + 1, | ||
269 | RPCRDMA_MAX_SEGS = RPCRDMA_MAX_DATA_SEGS + | ||
270 | RPCRDMA_MAX_IOV_SEGS, | ||
271 | }; | ||
272 | |||
277 | struct rpcrdma_mr_seg { /* chunk descriptors */ | 273 | struct rpcrdma_mr_seg { /* chunk descriptors */ |
278 | struct rpcrdma_mw *rl_mw; /* registered MR */ | ||
279 | u64 mr_base; /* registration result */ | ||
280 | u32 mr_rkey; /* registration result */ | ||
281 | u32 mr_len; /* length of chunk or segment */ | 274 | u32 mr_len; /* length of chunk or segment */ |
282 | int mr_nsegs; /* number of segments in chunk or 0 */ | ||
283 | enum dma_data_direction mr_dir; /* segment mapping direction */ | ||
284 | dma_addr_t mr_dma; /* segment mapping address */ | ||
285 | size_t mr_dmalen; /* segment mapping length */ | ||
286 | struct page *mr_page; /* owning page, if any */ | 275 | struct page *mr_page; /* owning page, if any */ |
287 | char *mr_offset; /* kva if no page, else offset */ | 276 | char *mr_offset; /* kva if no page, else offset */ |
288 | }; | 277 | }; |
289 | 278 | ||
290 | #define RPCRDMA_MAX_IOVS (2) | 279 | #define RPCRDMA_MAX_IOVS (2) |
291 | 280 | ||
281 | struct rpcrdma_buffer; | ||
292 | struct rpcrdma_req { | 282 | struct rpcrdma_req { |
293 | struct list_head rl_free; | 283 | struct list_head rl_free; |
294 | unsigned int rl_niovs; | 284 | unsigned int rl_niovs; |
295 | unsigned int rl_nchunks; | ||
296 | unsigned int rl_connect_cookie; | 285 | unsigned int rl_connect_cookie; |
297 | struct rpc_task *rl_task; | 286 | struct rpc_task *rl_task; |
298 | struct rpcrdma_buffer *rl_buffer; | 287 | struct rpcrdma_buffer *rl_buffer; |
@@ -300,12 +289,13 @@ struct rpcrdma_req { | |||
300 | struct ib_sge rl_send_iov[RPCRDMA_MAX_IOVS]; | 289 | struct ib_sge rl_send_iov[RPCRDMA_MAX_IOVS]; |
301 | struct rpcrdma_regbuf *rl_rdmabuf; | 290 | struct rpcrdma_regbuf *rl_rdmabuf; |
302 | struct rpcrdma_regbuf *rl_sendbuf; | 291 | struct rpcrdma_regbuf *rl_sendbuf; |
303 | struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; | ||
304 | struct rpcrdma_mr_seg *rl_nextseg; | ||
305 | 292 | ||
306 | struct ib_cqe rl_cqe; | 293 | struct ib_cqe rl_cqe; |
307 | struct list_head rl_all; | 294 | struct list_head rl_all; |
308 | bool rl_backchannel; | 295 | bool rl_backchannel; |
296 | |||
297 | struct list_head rl_registered; /* registered segments */ | ||
298 | struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; | ||
309 | }; | 299 | }; |
310 | 300 | ||
311 | static inline struct rpcrdma_req * | 301 | static inline struct rpcrdma_req * |
@@ -341,6 +331,11 @@ struct rpcrdma_buffer { | |||
341 | struct list_head rb_allreqs; | 331 | struct list_head rb_allreqs; |
342 | 332 | ||
343 | u32 rb_bc_max_requests; | 333 | u32 rb_bc_max_requests; |
334 | |||
335 | spinlock_t rb_recovery_lock; /* protect rb_stale_mrs */ | ||
336 | struct list_head rb_stale_mrs; | ||
337 | struct delayed_work rb_recovery_worker; | ||
338 | struct delayed_work rb_refresh_worker; | ||
344 | }; | 339 | }; |
345 | #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) | 340 | #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) |
346 | 341 | ||
@@ -387,6 +382,9 @@ struct rpcrdma_stats { | |||
387 | unsigned long bad_reply_count; | 382 | unsigned long bad_reply_count; |
388 | unsigned long nomsg_call_count; | 383 | unsigned long nomsg_call_count; |
389 | unsigned long bcall_count; | 384 | unsigned long bcall_count; |
385 | unsigned long mrs_recovered; | ||
386 | unsigned long mrs_orphaned; | ||
387 | unsigned long mrs_allocated; | ||
390 | }; | 388 | }; |
391 | 389 | ||
392 | /* | 390 | /* |
@@ -395,23 +393,25 @@ struct rpcrdma_stats { | |||
395 | struct rpcrdma_xprt; | 393 | struct rpcrdma_xprt; |
396 | struct rpcrdma_memreg_ops { | 394 | struct rpcrdma_memreg_ops { |
397 | int (*ro_map)(struct rpcrdma_xprt *, | 395 | int (*ro_map)(struct rpcrdma_xprt *, |
398 | struct rpcrdma_mr_seg *, int, bool); | 396 | struct rpcrdma_mr_seg *, int, bool, |
397 | struct rpcrdma_mw **); | ||
399 | void (*ro_unmap_sync)(struct rpcrdma_xprt *, | 398 | void (*ro_unmap_sync)(struct rpcrdma_xprt *, |
400 | struct rpcrdma_req *); | 399 | struct rpcrdma_req *); |
401 | void (*ro_unmap_safe)(struct rpcrdma_xprt *, | 400 | void (*ro_unmap_safe)(struct rpcrdma_xprt *, |
402 | struct rpcrdma_req *, bool); | 401 | struct rpcrdma_req *, bool); |
402 | void (*ro_recover_mr)(struct rpcrdma_mw *); | ||
403 | int (*ro_open)(struct rpcrdma_ia *, | 403 | int (*ro_open)(struct rpcrdma_ia *, |
404 | struct rpcrdma_ep *, | 404 | struct rpcrdma_ep *, |
405 | struct rpcrdma_create_data_internal *); | 405 | struct rpcrdma_create_data_internal *); |
406 | size_t (*ro_maxpages)(struct rpcrdma_xprt *); | 406 | size_t (*ro_maxpages)(struct rpcrdma_xprt *); |
407 | int (*ro_init)(struct rpcrdma_xprt *); | 407 | int (*ro_init_mr)(struct rpcrdma_ia *, |
408 | void (*ro_destroy)(struct rpcrdma_buffer *); | 408 | struct rpcrdma_mw *); |
409 | void (*ro_release_mr)(struct rpcrdma_mw *); | ||
409 | const char *ro_displayname; | 410 | const char *ro_displayname; |
410 | }; | 411 | }; |
411 | 412 | ||
412 | extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops; | 413 | extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops; |
413 | extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops; | 414 | extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops; |
414 | extern const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops; | ||
415 | 415 | ||
416 | /* | 416 | /* |
417 | * RPCRDMA transport -- encapsulates the structures above for | 417 | * RPCRDMA transport -- encapsulates the structures above for |
@@ -446,6 +446,8 @@ extern int xprt_rdma_pad_optimize; | |||
446 | */ | 446 | */ |
447 | int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int); | 447 | int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int); |
448 | void rpcrdma_ia_close(struct rpcrdma_ia *); | 448 | void rpcrdma_ia_close(struct rpcrdma_ia *); |
449 | bool frwr_is_supported(struct rpcrdma_ia *); | ||
450 | bool fmr_is_supported(struct rpcrdma_ia *); | ||
449 | 451 | ||
450 | /* | 452 | /* |
451 | * Endpoint calls - xprtrdma/verbs.c | 453 | * Endpoint calls - xprtrdma/verbs.c |
@@ -477,6 +479,8 @@ void rpcrdma_buffer_put(struct rpcrdma_req *); | |||
477 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); | 479 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); |
478 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); | 480 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); |
479 | 481 | ||
482 | void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *); | ||
483 | |||
480 | struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *, | 484 | struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *, |
481 | size_t, gfp_t); | 485 | size_t, gfp_t); |
482 | void rpcrdma_free_regbuf(struct rpcrdma_ia *, | 486 | void rpcrdma_free_regbuf(struct rpcrdma_ia *, |
@@ -484,9 +488,6 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *, | |||
484 | 488 | ||
485 | int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int); | 489 | int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int); |
486 | 490 | ||
487 | int frwr_alloc_recovery_wq(void); | ||
488 | void frwr_destroy_recovery_wq(void); | ||
489 | |||
490 | int rpcrdma_alloc_wq(void); | 491 | int rpcrdma_alloc_wq(void); |
491 | void rpcrdma_destroy_wq(void); | 492 | void rpcrdma_destroy_wq(void); |
492 | 493 | ||
@@ -494,45 +495,12 @@ void rpcrdma_destroy_wq(void); | |||
494 | * Wrappers for chunk registration, shared by read/write chunk code. | 495 | * Wrappers for chunk registration, shared by read/write chunk code. |
495 | */ | 496 | */ |
496 | 497 | ||
497 | void rpcrdma_mapping_error(struct rpcrdma_mr_seg *); | ||
498 | |||
499 | static inline enum dma_data_direction | 498 | static inline enum dma_data_direction |
500 | rpcrdma_data_dir(bool writing) | 499 | rpcrdma_data_dir(bool writing) |
501 | { | 500 | { |
502 | return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; | 501 | return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; |
503 | } | 502 | } |
504 | 503 | ||
505 | static inline void | ||
506 | rpcrdma_map_one(struct ib_device *device, struct rpcrdma_mr_seg *seg, | ||
507 | enum dma_data_direction direction) | ||
508 | { | ||
509 | seg->mr_dir = direction; | ||
510 | seg->mr_dmalen = seg->mr_len; | ||
511 | |||
512 | if (seg->mr_page) | ||
513 | seg->mr_dma = ib_dma_map_page(device, | ||
514 | seg->mr_page, offset_in_page(seg->mr_offset), | ||
515 | seg->mr_dmalen, seg->mr_dir); | ||
516 | else | ||
517 | seg->mr_dma = ib_dma_map_single(device, | ||
518 | seg->mr_offset, | ||
519 | seg->mr_dmalen, seg->mr_dir); | ||
520 | |||
521 | if (ib_dma_mapping_error(device, seg->mr_dma)) | ||
522 | rpcrdma_mapping_error(seg); | ||
523 | } | ||
524 | |||
525 | static inline void | ||
526 | rpcrdma_unmap_one(struct ib_device *device, struct rpcrdma_mr_seg *seg) | ||
527 | { | ||
528 | if (seg->mr_page) | ||
529 | ib_dma_unmap_page(device, | ||
530 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
531 | else | ||
532 | ib_dma_unmap_single(device, | ||
533 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
534 | } | ||
535 | |||
536 | /* | 504 | /* |
537 | * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c | 505 | * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c |
538 | */ | 506 | */ |
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 3ad9fab1985f..1fd464764765 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c | |||
@@ -604,7 +604,7 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, | |||
604 | 604 | ||
605 | link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]); | 605 | link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]); |
606 | link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); | 606 | link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); |
607 | nla_strlcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME]), | 607 | nla_strlcpy(link_info.str, link[TIPC_NLA_LINK_NAME], |
608 | TIPC_MAX_LINK_NAME); | 608 | TIPC_MAX_LINK_NAME); |
609 | 609 | ||
610 | return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO, | 610 | return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO, |