diff options
Diffstat (limited to 'net')
60 files changed, 1294 insertions, 914 deletions
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index d8e376a5f0f1..36a1a739ad68 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c | |||
@@ -658,14 +658,30 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args) | |||
658 | static void p9_virtio_remove(struct virtio_device *vdev) | 658 | static void p9_virtio_remove(struct virtio_device *vdev) |
659 | { | 659 | { |
660 | struct virtio_chan *chan = vdev->priv; | 660 | struct virtio_chan *chan = vdev->priv; |
661 | 661 | unsigned long warning_time; | |
662 | if (chan->inuse) | ||
663 | p9_virtio_close(chan->client); | ||
664 | vdev->config->del_vqs(vdev); | ||
665 | 662 | ||
666 | mutex_lock(&virtio_9p_lock); | 663 | mutex_lock(&virtio_9p_lock); |
664 | |||
665 | /* Remove self from list so we don't get new users. */ | ||
667 | list_del(&chan->chan_list); | 666 | list_del(&chan->chan_list); |
667 | warning_time = jiffies; | ||
668 | |||
669 | /* Wait for existing users to close. */ | ||
670 | while (chan->inuse) { | ||
671 | mutex_unlock(&virtio_9p_lock); | ||
672 | msleep(250); | ||
673 | if (time_after(jiffies, warning_time + 10 * HZ)) { | ||
674 | dev_emerg(&vdev->dev, | ||
675 | "p9_virtio_remove: waiting for device in use.\n"); | ||
676 | warning_time = jiffies; | ||
677 | } | ||
678 | mutex_lock(&virtio_9p_lock); | ||
679 | } | ||
680 | |||
668 | mutex_unlock(&virtio_9p_lock); | 681 | mutex_unlock(&virtio_9p_lock); |
682 | |||
683 | vdev->config->del_vqs(vdev); | ||
684 | |||
669 | sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); | 685 | sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); |
670 | kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE); | 686 | kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE); |
671 | kfree(chan->tag); | 687 | kfree(chan->tag); |
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index b087d278c679..1849d96b3c91 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c | |||
@@ -563,6 +563,8 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) | |||
563 | */ | 563 | */ |
564 | del_nbp(p); | 564 | del_nbp(p); |
565 | 565 | ||
566 | dev_set_mtu(br->dev, br_min_mtu(br)); | ||
567 | |||
566 | spin_lock_bh(&br->lock); | 568 | spin_lock_bh(&br->lock); |
567 | changed_addr = br_stp_recalculate_bridge_id(br); | 569 | changed_addr = br_stp_recalculate_bridge_id(br); |
568 | spin_unlock_bh(&br->lock); | 570 | spin_unlock_bh(&br->lock); |
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 769b185fefbd..a6e2da0bc718 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c | |||
@@ -281,7 +281,7 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
281 | int copylen; | 281 | int copylen; |
282 | 282 | ||
283 | ret = -EOPNOTSUPP; | 283 | ret = -EOPNOTSUPP; |
284 | if (m->msg_flags&MSG_OOB) | 284 | if (flags & MSG_OOB) |
285 | goto read_error; | 285 | goto read_error; |
286 | 286 | ||
287 | skb = skb_recv_datagram(sk, flags, 0 , &ret); | 287 | skb = skb_recv_datagram(sk, flags, 0 , &ret); |
diff --git a/net/can/af_can.c b/net/can/af_can.c index 66e08040ced7..32d710eaf1fc 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c | |||
@@ -259,6 +259,9 @@ int can_send(struct sk_buff *skb, int loop) | |||
259 | goto inval_skb; | 259 | goto inval_skb; |
260 | } | 260 | } |
261 | 261 | ||
262 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
263 | |||
264 | skb_reset_mac_header(skb); | ||
262 | skb_reset_network_header(skb); | 265 | skb_reset_network_header(skb); |
263 | skb_reset_transport_header(skb); | 266 | skb_reset_transport_header(skb); |
264 | 267 | ||
diff --git a/net/compat.c b/net/compat.c index 94d3d5e97883..f7bd286a8280 100644 --- a/net/compat.c +++ b/net/compat.c | |||
@@ -49,6 +49,13 @@ ssize_t get_compat_msghdr(struct msghdr *kmsg, | |||
49 | __get_user(kmsg->msg_controllen, &umsg->msg_controllen) || | 49 | __get_user(kmsg->msg_controllen, &umsg->msg_controllen) || |
50 | __get_user(kmsg->msg_flags, &umsg->msg_flags)) | 50 | __get_user(kmsg->msg_flags, &umsg->msg_flags)) |
51 | return -EFAULT; | 51 | return -EFAULT; |
52 | |||
53 | if (!uaddr) | ||
54 | kmsg->msg_namelen = 0; | ||
55 | |||
56 | if (kmsg->msg_namelen < 0) | ||
57 | return -EINVAL; | ||
58 | |||
52 | if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) | 59 | if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) |
53 | kmsg->msg_namelen = sizeof(struct sockaddr_storage); | 60 | kmsg->msg_namelen = sizeof(struct sockaddr_storage); |
54 | kmsg->msg_control = compat_ptr(tmp3); | 61 | kmsg->msg_control = compat_ptr(tmp3); |
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 25b4b5d23485..ee0608bb3bc0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
@@ -2166,28 +2166,28 @@ replay: | |||
2166 | } | 2166 | } |
2167 | } | 2167 | } |
2168 | err = rtnl_configure_link(dev, ifm); | 2168 | err = rtnl_configure_link(dev, ifm); |
2169 | if (err < 0) { | 2169 | if (err < 0) |
2170 | if (ops->newlink) { | 2170 | goto out_unregister; |
2171 | LIST_HEAD(list_kill); | ||
2172 | |||
2173 | ops->dellink(dev, &list_kill); | ||
2174 | unregister_netdevice_many(&list_kill); | ||
2175 | } else { | ||
2176 | unregister_netdevice(dev); | ||
2177 | } | ||
2178 | goto out; | ||
2179 | } | ||
2180 | |||
2181 | if (link_net) { | 2171 | if (link_net) { |
2182 | err = dev_change_net_namespace(dev, dest_net, ifname); | 2172 | err = dev_change_net_namespace(dev, dest_net, ifname); |
2183 | if (err < 0) | 2173 | if (err < 0) |
2184 | unregister_netdevice(dev); | 2174 | goto out_unregister; |
2185 | } | 2175 | } |
2186 | out: | 2176 | out: |
2187 | if (link_net) | 2177 | if (link_net) |
2188 | put_net(link_net); | 2178 | put_net(link_net); |
2189 | put_net(dest_net); | 2179 | put_net(dest_net); |
2190 | return err; | 2180 | return err; |
2181 | out_unregister: | ||
2182 | if (ops->newlink) { | ||
2183 | LIST_HEAD(list_kill); | ||
2184 | |||
2185 | ops->dellink(dev, &list_kill); | ||
2186 | unregister_netdevice_many(&list_kill); | ||
2187 | } else { | ||
2188 | unregister_netdevice(dev); | ||
2189 | } | ||
2190 | goto out; | ||
2191 | } | 2191 | } |
2192 | } | 2192 | } |
2193 | 2193 | ||
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f80507823531..8e4ac97c8477 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -3733,9 +3733,13 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, | |||
3733 | struct sock *sk, int tstype) | 3733 | struct sock *sk, int tstype) |
3734 | { | 3734 | { |
3735 | struct sk_buff *skb; | 3735 | struct sk_buff *skb; |
3736 | bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY; | 3736 | bool tsonly; |
3737 | 3737 | ||
3738 | if (!sk || !skb_may_tx_timestamp(sk, tsonly)) | 3738 | if (!sk) |
3739 | return; | ||
3740 | |||
3741 | tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY; | ||
3742 | if (!skb_may_tx_timestamp(sk, tsonly)) | ||
3739 | return; | 3743 | return; |
3740 | 3744 | ||
3741 | if (tsonly) | 3745 | if (tsonly) |
@@ -4173,7 +4177,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) | |||
4173 | skb->ignore_df = 0; | 4177 | skb->ignore_df = 0; |
4174 | skb_dst_drop(skb); | 4178 | skb_dst_drop(skb); |
4175 | skb->mark = 0; | 4179 | skb->mark = 0; |
4176 | skb->sender_cpu = 0; | 4180 | skb_sender_cpu_clear(skb); |
4177 | skb_init_secmark(skb); | 4181 | skb_init_secmark(skb); |
4178 | secpath_reset(skb); | 4182 | secpath_reset(skb); |
4179 | nf_reset(skb); | 4183 | nf_reset(skb); |
diff --git a/net/core/sock.c b/net/core/sock.c index 93c8b20c91e4..78e89eb7eb70 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -1655,6 +1655,10 @@ void sock_rfree(struct sk_buff *skb) | |||
1655 | } | 1655 | } |
1656 | EXPORT_SYMBOL(sock_rfree); | 1656 | EXPORT_SYMBOL(sock_rfree); |
1657 | 1657 | ||
1658 | /* | ||
1659 | * Buffer destructor for skbs that are not used directly in read or write | ||
1660 | * path, e.g. for error handler skbs. Automatically called from kfree_skb. | ||
1661 | */ | ||
1658 | void sock_efree(struct sk_buff *skb) | 1662 | void sock_efree(struct sk_buff *skb) |
1659 | { | 1663 | { |
1660 | sock_put(skb->sk); | 1664 | sock_put(skb->sk); |
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 433424804284..8ce351ffceb1 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c | |||
@@ -25,6 +25,8 @@ | |||
25 | static int zero = 0; | 25 | static int zero = 0; |
26 | static int one = 1; | 26 | static int one = 1; |
27 | static int ushort_max = USHRT_MAX; | 27 | static int ushort_max = USHRT_MAX; |
28 | static int min_sndbuf = SOCK_MIN_SNDBUF; | ||
29 | static int min_rcvbuf = SOCK_MIN_RCVBUF; | ||
28 | 30 | ||
29 | static int net_msg_warn; /* Unused, but still a sysctl */ | 31 | static int net_msg_warn; /* Unused, but still a sysctl */ |
30 | 32 | ||
@@ -237,7 +239,7 @@ static struct ctl_table net_core_table[] = { | |||
237 | .maxlen = sizeof(int), | 239 | .maxlen = sizeof(int), |
238 | .mode = 0644, | 240 | .mode = 0644, |
239 | .proc_handler = proc_dointvec_minmax, | 241 | .proc_handler = proc_dointvec_minmax, |
240 | .extra1 = &one, | 242 | .extra1 = &min_sndbuf, |
241 | }, | 243 | }, |
242 | { | 244 | { |
243 | .procname = "rmem_max", | 245 | .procname = "rmem_max", |
@@ -245,7 +247,7 @@ static struct ctl_table net_core_table[] = { | |||
245 | .maxlen = sizeof(int), | 247 | .maxlen = sizeof(int), |
246 | .mode = 0644, | 248 | .mode = 0644, |
247 | .proc_handler = proc_dointvec_minmax, | 249 | .proc_handler = proc_dointvec_minmax, |
248 | .extra1 = &one, | 250 | .extra1 = &min_rcvbuf, |
249 | }, | 251 | }, |
250 | { | 252 | { |
251 | .procname = "wmem_default", | 253 | .procname = "wmem_default", |
@@ -253,7 +255,7 @@ static struct ctl_table net_core_table[] = { | |||
253 | .maxlen = sizeof(int), | 255 | .maxlen = sizeof(int), |
254 | .mode = 0644, | 256 | .mode = 0644, |
255 | .proc_handler = proc_dointvec_minmax, | 257 | .proc_handler = proc_dointvec_minmax, |
256 | .extra1 = &one, | 258 | .extra1 = &min_sndbuf, |
257 | }, | 259 | }, |
258 | { | 260 | { |
259 | .procname = "rmem_default", | 261 | .procname = "rmem_default", |
@@ -261,7 +263,7 @@ static struct ctl_table net_core_table[] = { | |||
261 | .maxlen = sizeof(int), | 263 | .maxlen = sizeof(int), |
262 | .mode = 0644, | 264 | .mode = 0644, |
263 | .proc_handler = proc_dointvec_minmax, | 265 | .proc_handler = proc_dointvec_minmax, |
264 | .extra1 = &one, | 266 | .extra1 = &min_rcvbuf, |
265 | }, | 267 | }, |
266 | { | 268 | { |
267 | .procname = "dev_weight", | 269 | .procname = "dev_weight", |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 14d02ea905b6..3e44b9b0b78e 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -268,6 +268,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) | |||
268 | release_sock(sk); | 268 | release_sock(sk); |
269 | if (reqsk_queue_empty(&icsk->icsk_accept_queue)) | 269 | if (reqsk_queue_empty(&icsk->icsk_accept_queue)) |
270 | timeo = schedule_timeout(timeo); | 270 | timeo = schedule_timeout(timeo); |
271 | sched_annotate_sleep(); | ||
271 | lock_sock(sk); | 272 | lock_sock(sk); |
272 | err = 0; | 273 | err = 0; |
273 | if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) | 274 | if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 81751f12645f..592aff37366b 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
@@ -71,6 +71,20 @@ static inline void inet_diag_unlock_handler( | |||
71 | mutex_unlock(&inet_diag_table_mutex); | 71 | mutex_unlock(&inet_diag_table_mutex); |
72 | } | 72 | } |
73 | 73 | ||
74 | static size_t inet_sk_attr_size(void) | ||
75 | { | ||
76 | return nla_total_size(sizeof(struct tcp_info)) | ||
77 | + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ | ||
78 | + nla_total_size(1) /* INET_DIAG_TOS */ | ||
79 | + nla_total_size(1) /* INET_DIAG_TCLASS */ | ||
80 | + nla_total_size(sizeof(struct inet_diag_meminfo)) | ||
81 | + nla_total_size(sizeof(struct inet_diag_msg)) | ||
82 | + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) | ||
83 | + nla_total_size(TCP_CA_NAME_MAX) | ||
84 | + nla_total_size(sizeof(struct tcpvegas_info)) | ||
85 | + 64; | ||
86 | } | ||
87 | |||
74 | int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, | 88 | int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, |
75 | struct sk_buff *skb, struct inet_diag_req_v2 *req, | 89 | struct sk_buff *skb, struct inet_diag_req_v2 *req, |
76 | struct user_namespace *user_ns, | 90 | struct user_namespace *user_ns, |
@@ -326,9 +340,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s | |||
326 | if (err) | 340 | if (err) |
327 | goto out; | 341 | goto out; |
328 | 342 | ||
329 | rep = nlmsg_new(sizeof(struct inet_diag_msg) + | 343 | rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL); |
330 | sizeof(struct inet_diag_meminfo) + | ||
331 | sizeof(struct tcp_info) + 64, GFP_KERNEL); | ||
332 | if (!rep) { | 344 | if (!rep) { |
333 | err = -ENOMEM; | 345 | err = -ENOMEM; |
334 | goto out; | 346 | goto out; |
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 787b3c294ce6..d9bc28ac5d1b 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c | |||
@@ -67,6 +67,7 @@ static int ip_forward_finish(struct sk_buff *skb) | |||
67 | if (unlikely(opt->optlen)) | 67 | if (unlikely(opt->optlen)) |
68 | ip_forward_options(skb); | 68 | ip_forward_options(skb); |
69 | 69 | ||
70 | skb_sender_cpu_clear(skb); | ||
70 | return dst_output(skb); | 71 | return dst_output(skb); |
71 | } | 72 | } |
72 | 73 | ||
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 2c8d98e728c0..145a50c4d566 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -659,27 +659,30 @@ EXPORT_SYMBOL(ip_defrag); | |||
659 | struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user) | 659 | struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user) |
660 | { | 660 | { |
661 | struct iphdr iph; | 661 | struct iphdr iph; |
662 | int netoff; | ||
662 | u32 len; | 663 | u32 len; |
663 | 664 | ||
664 | if (skb->protocol != htons(ETH_P_IP)) | 665 | if (skb->protocol != htons(ETH_P_IP)) |
665 | return skb; | 666 | return skb; |
666 | 667 | ||
667 | if (skb_copy_bits(skb, 0, &iph, sizeof(iph)) < 0) | 668 | netoff = skb_network_offset(skb); |
669 | |||
670 | if (skb_copy_bits(skb, netoff, &iph, sizeof(iph)) < 0) | ||
668 | return skb; | 671 | return skb; |
669 | 672 | ||
670 | if (iph.ihl < 5 || iph.version != 4) | 673 | if (iph.ihl < 5 || iph.version != 4) |
671 | return skb; | 674 | return skb; |
672 | 675 | ||
673 | len = ntohs(iph.tot_len); | 676 | len = ntohs(iph.tot_len); |
674 | if (skb->len < len || len < (iph.ihl * 4)) | 677 | if (skb->len < netoff + len || len < (iph.ihl * 4)) |
675 | return skb; | 678 | return skb; |
676 | 679 | ||
677 | if (ip_is_fragment(&iph)) { | 680 | if (ip_is_fragment(&iph)) { |
678 | skb = skb_share_check(skb, GFP_ATOMIC); | 681 | skb = skb_share_check(skb, GFP_ATOMIC); |
679 | if (skb) { | 682 | if (skb) { |
680 | if (!pskb_may_pull(skb, iph.ihl*4)) | 683 | if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) |
681 | return skb; | 684 | return skb; |
682 | if (pskb_trim_rcsum(skb, len)) | 685 | if (pskb_trim_rcsum(skb, netoff + len)) |
683 | return skb; | 686 | return skb; |
684 | memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); | 687 | memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); |
685 | if (ip_defrag(skb, user)) | 688 | if (ip_defrag(skb, user)) |
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 31d8c71986b4..5cd99271d3a6 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c | |||
@@ -432,17 +432,32 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf | |||
432 | kfree_skb(skb); | 432 | kfree_skb(skb); |
433 | } | 433 | } |
434 | 434 | ||
435 | static bool ipv4_pktinfo_prepare_errqueue(const struct sock *sk, | 435 | /* IPv4 supports cmsg on all imcp errors and some timestamps |
436 | const struct sk_buff *skb, | 436 | * |
437 | int ee_origin) | 437 | * Timestamp code paths do not initialize the fields expected by cmsg: |
438 | * the PKTINFO fields in skb->cb[]. Fill those in here. | ||
439 | */ | ||
440 | static bool ipv4_datagram_support_cmsg(const struct sock *sk, | ||
441 | struct sk_buff *skb, | ||
442 | int ee_origin) | ||
438 | { | 443 | { |
439 | struct in_pktinfo *info = PKTINFO_SKB_CB(skb); | 444 | struct in_pktinfo *info; |
445 | |||
446 | if (ee_origin == SO_EE_ORIGIN_ICMP) | ||
447 | return true; | ||
440 | 448 | ||
441 | if ((ee_origin != SO_EE_ORIGIN_TIMESTAMPING) || | 449 | if (ee_origin == SO_EE_ORIGIN_LOCAL) |
442 | (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) || | 450 | return false; |
451 | |||
452 | /* Support IP_PKTINFO on tstamp packets if requested, to correlate | ||
453 | * timestamp with egress dev. Not possible for packets without dev | ||
454 | * or without payload (SOF_TIMESTAMPING_OPT_TSONLY). | ||
455 | */ | ||
456 | if ((!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) || | ||
443 | (!skb->dev)) | 457 | (!skb->dev)) |
444 | return false; | 458 | return false; |
445 | 459 | ||
460 | info = PKTINFO_SKB_CB(skb); | ||
446 | info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr; | 461 | info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr; |
447 | info->ipi_ifindex = skb->dev->ifindex; | 462 | info->ipi_ifindex = skb->dev->ifindex; |
448 | return true; | 463 | return true; |
@@ -483,7 +498,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) | |||
483 | 498 | ||
484 | serr = SKB_EXT_ERR(skb); | 499 | serr = SKB_EXT_ERR(skb); |
485 | 500 | ||
486 | if (sin && skb->len) { | 501 | if (sin && serr->port) { |
487 | sin->sin_family = AF_INET; | 502 | sin->sin_family = AF_INET; |
488 | sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + | 503 | sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + |
489 | serr->addr_offset); | 504 | serr->addr_offset); |
@@ -496,9 +511,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) | |||
496 | sin = &errhdr.offender; | 511 | sin = &errhdr.offender; |
497 | memset(sin, 0, sizeof(*sin)); | 512 | memset(sin, 0, sizeof(*sin)); |
498 | 513 | ||
499 | if (skb->len && | 514 | if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) { |
500 | (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || | ||
501 | ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin))) { | ||
502 | sin->sin_family = AF_INET; | 515 | sin->sin_family = AF_INET; |
503 | sin->sin_addr.s_addr = ip_hdr(skb)->saddr; | 516 | sin->sin_addr.s_addr = ip_hdr(skb)->saddr; |
504 | if (inet_sk(sk)->cmsg_flags) | 517 | if (inet_sk(sk)->cmsg_flags) |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 99e810f84671..cf5e82f39d3b 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -272,9 +272,9 @@ static void trace_packet(const struct sk_buff *skb, | |||
272 | &chainname, &comment, &rulenum) != 0) | 272 | &chainname, &comment, &rulenum) != 0) |
273 | break; | 273 | break; |
274 | 274 | ||
275 | nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo, | 275 | nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo, |
276 | "TRACE: %s:%s:%s:%u ", | 276 | "TRACE: %s:%s:%s:%u ", |
277 | tablename, chainname, comment, rulenum); | 277 | tablename, chainname, comment, rulenum); |
278 | } | 278 | } |
279 | #endif | 279 | #endif |
280 | 280 | ||
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index e9f66e1cda50..208d5439e59b 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c | |||
@@ -259,6 +259,9 @@ int ping_init_sock(struct sock *sk) | |||
259 | kgid_t low, high; | 259 | kgid_t low, high; |
260 | int ret = 0; | 260 | int ret = 0; |
261 | 261 | ||
262 | if (sk->sk_family == AF_INET6) | ||
263 | sk->sk_ipv6only = 1; | ||
264 | |||
262 | inet_get_ping_group_range_net(net, &low, &high); | 265 | inet_get_ping_group_range_net(net, &low, &high); |
263 | if (gid_lte(low, group) && gid_lte(group, high)) | 266 | if (gid_lte(low, group) && gid_lte(group, high)) |
264 | return 0; | 267 | return 0; |
@@ -305,6 +308,11 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, | |||
305 | if (addr_len < sizeof(*addr)) | 308 | if (addr_len < sizeof(*addr)) |
306 | return -EINVAL; | 309 | return -EINVAL; |
307 | 310 | ||
311 | if (addr->sin_family != AF_INET && | ||
312 | !(addr->sin_family == AF_UNSPEC && | ||
313 | addr->sin_addr.s_addr == htonl(INADDR_ANY))) | ||
314 | return -EAFNOSUPPORT; | ||
315 | |||
308 | pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n", | 316 | pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n", |
309 | sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port)); | 317 | sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port)); |
310 | 318 | ||
@@ -330,7 +338,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, | |||
330 | return -EINVAL; | 338 | return -EINVAL; |
331 | 339 | ||
332 | if (addr->sin6_family != AF_INET6) | 340 | if (addr->sin6_family != AF_INET6) |
333 | return -EINVAL; | 341 | return -EAFNOSUPPORT; |
334 | 342 | ||
335 | pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n", | 343 | pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n", |
336 | sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port)); | 344 | sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port)); |
@@ -716,7 +724,7 @@ static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m | |||
716 | if (msg->msg_namelen < sizeof(*usin)) | 724 | if (msg->msg_namelen < sizeof(*usin)) |
717 | return -EINVAL; | 725 | return -EINVAL; |
718 | if (usin->sin_family != AF_INET) | 726 | if (usin->sin_family != AF_INET) |
719 | return -EINVAL; | 727 | return -EAFNOSUPPORT; |
720 | daddr = usin->sin_addr.s_addr; | 728 | daddr = usin->sin_addr.s_addr; |
721 | /* no remote port */ | 729 | /* no remote port */ |
722 | } else { | 730 | } else { |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9d72a0fcd928..995a2259bcfc 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -835,17 +835,13 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, | |||
835 | int large_allowed) | 835 | int large_allowed) |
836 | { | 836 | { |
837 | struct tcp_sock *tp = tcp_sk(sk); | 837 | struct tcp_sock *tp = tcp_sk(sk); |
838 | u32 new_size_goal, size_goal, hlen; | 838 | u32 new_size_goal, size_goal; |
839 | 839 | ||
840 | if (!large_allowed || !sk_can_gso(sk)) | 840 | if (!large_allowed || !sk_can_gso(sk)) |
841 | return mss_now; | 841 | return mss_now; |
842 | 842 | ||
843 | /* Maybe we should/could use sk->sk_prot->max_header here ? */ | 843 | /* Note : tcp_tso_autosize() will eventually split this later */ |
844 | hlen = inet_csk(sk)->icsk_af_ops->net_header_len + | 844 | new_size_goal = sk->sk_gso_max_size - 1 - MAX_TCP_HEADER; |
845 | inet_csk(sk)->icsk_ext_hdr_len + | ||
846 | tp->tcp_header_len; | ||
847 | |||
848 | new_size_goal = sk->sk_gso_max_size - 1 - hlen; | ||
849 | new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal); | 845 | new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal); |
850 | 846 | ||
851 | /* We try hard to avoid divides here */ | 847 | /* We try hard to avoid divides here */ |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index d694088214cd..62856e185a93 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
@@ -378,6 +378,12 @@ EXPORT_SYMBOL_GPL(tcp_slow_start); | |||
378 | */ | 378 | */ |
379 | void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked) | 379 | void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked) |
380 | { | 380 | { |
381 | /* If credits accumulated at a higher w, apply them gently now. */ | ||
382 | if (tp->snd_cwnd_cnt >= w) { | ||
383 | tp->snd_cwnd_cnt = 0; | ||
384 | tp->snd_cwnd++; | ||
385 | } | ||
386 | |||
381 | tp->snd_cwnd_cnt += acked; | 387 | tp->snd_cwnd_cnt += acked; |
382 | if (tp->snd_cwnd_cnt >= w) { | 388 | if (tp->snd_cwnd_cnt >= w) { |
383 | u32 delta = tp->snd_cwnd_cnt / w; | 389 | u32 delta = tp->snd_cwnd_cnt / w; |
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 4b276d1ed980..06d3d665a9fd 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c | |||
@@ -306,8 +306,10 @@ tcp_friendliness: | |||
306 | } | 306 | } |
307 | } | 307 | } |
308 | 308 | ||
309 | if (ca->cnt == 0) /* cannot be zero */ | 309 | /* The maximum rate of cwnd increase CUBIC allows is 1 packet per |
310 | ca->cnt = 1; | 310 | * 2 packets ACKed, meaning cwnd grows at 1.5x per RTT. |
311 | */ | ||
312 | ca->cnt = max(ca->cnt, 2U); | ||
311 | } | 313 | } |
312 | 314 | ||
313 | static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) | 315 | static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a2a796c5536b..1db253e36045 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -2773,15 +2773,11 @@ void tcp_send_fin(struct sock *sk) | |||
2773 | } else { | 2773 | } else { |
2774 | /* Socket is locked, keep trying until memory is available. */ | 2774 | /* Socket is locked, keep trying until memory is available. */ |
2775 | for (;;) { | 2775 | for (;;) { |
2776 | skb = alloc_skb_fclone(MAX_TCP_HEADER, | 2776 | skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); |
2777 | sk->sk_allocation); | ||
2778 | if (skb) | 2777 | if (skb) |
2779 | break; | 2778 | break; |
2780 | yield(); | 2779 | yield(); |
2781 | } | 2780 | } |
2782 | |||
2783 | /* Reserve space for headers and prepare control bits. */ | ||
2784 | skb_reserve(skb, MAX_TCP_HEADER); | ||
2785 | /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ | 2781 | /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ |
2786 | tcp_init_nondata_skb(skb, tp->write_seq, | 2782 | tcp_init_nondata_skb(skb, tp->write_seq, |
2787 | TCPHDR_ACK | TCPHDR_FIN); | 2783 | TCPHDR_ACK | TCPHDR_FIN); |
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index d5f6bd9a210a..dab73813cb92 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c | |||
@@ -63,6 +63,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) | |||
63 | return err; | 63 | return err; |
64 | 64 | ||
65 | IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; | 65 | IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; |
66 | skb->protocol = htons(ETH_P_IP); | ||
66 | 67 | ||
67 | return x->outer_mode->output2(x, skb); | 68 | return x->outer_mode->output2(x, skb); |
68 | } | 69 | } |
@@ -71,7 +72,6 @@ EXPORT_SYMBOL(xfrm4_prepare_output); | |||
71 | int xfrm4_output_finish(struct sk_buff *skb) | 72 | int xfrm4_output_finish(struct sk_buff *skb) |
72 | { | 73 | { |
73 | memset(IPCB(skb), 0, sizeof(*IPCB(skb))); | 74 | memset(IPCB(skb), 0, sizeof(*IPCB(skb))); |
74 | skb->protocol = htons(ETH_P_IP); | ||
75 | 75 | ||
76 | #ifdef CONFIG_NETFILTER | 76 | #ifdef CONFIG_NETFILTER |
77 | IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; | 77 | IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; |
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index c215be70cac0..ace8daca5c83 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c | |||
@@ -325,14 +325,34 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) | |||
325 | kfree_skb(skb); | 325 | kfree_skb(skb); |
326 | } | 326 | } |
327 | 327 | ||
328 | static void ip6_datagram_prepare_pktinfo_errqueue(struct sk_buff *skb) | 328 | /* IPv6 supports cmsg on all origins aside from SO_EE_ORIGIN_LOCAL. |
329 | * | ||
330 | * At one point, excluding local errors was a quick test to identify icmp/icmp6 | ||
331 | * errors. This is no longer true, but the test remained, so the v6 stack, | ||
332 | * unlike v4, also honors cmsg requests on all wifi and timestamp errors. | ||
333 | * | ||
334 | * Timestamp code paths do not initialize the fields expected by cmsg: | ||
335 | * the PKTINFO fields in skb->cb[]. Fill those in here. | ||
336 | */ | ||
337 | static bool ip6_datagram_support_cmsg(struct sk_buff *skb, | ||
338 | struct sock_exterr_skb *serr) | ||
329 | { | 339 | { |
330 | int ifindex = skb->dev ? skb->dev->ifindex : -1; | 340 | if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || |
341 | serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) | ||
342 | return true; | ||
343 | |||
344 | if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL) | ||
345 | return false; | ||
346 | |||
347 | if (!skb->dev) | ||
348 | return false; | ||
331 | 349 | ||
332 | if (skb->protocol == htons(ETH_P_IPV6)) | 350 | if (skb->protocol == htons(ETH_P_IPV6)) |
333 | IP6CB(skb)->iif = ifindex; | 351 | IP6CB(skb)->iif = skb->dev->ifindex; |
334 | else | 352 | else |
335 | PKTINFO_SKB_CB(skb)->ipi_ifindex = ifindex; | 353 | PKTINFO_SKB_CB(skb)->ipi_ifindex = skb->dev->ifindex; |
354 | |||
355 | return true; | ||
336 | } | 356 | } |
337 | 357 | ||
338 | /* | 358 | /* |
@@ -369,7 +389,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) | |||
369 | 389 | ||
370 | serr = SKB_EXT_ERR(skb); | 390 | serr = SKB_EXT_ERR(skb); |
371 | 391 | ||
372 | if (sin && skb->len) { | 392 | if (sin && serr->port) { |
373 | const unsigned char *nh = skb_network_header(skb); | 393 | const unsigned char *nh = skb_network_header(skb); |
374 | sin->sin6_family = AF_INET6; | 394 | sin->sin6_family = AF_INET6; |
375 | sin->sin6_flowinfo = 0; | 395 | sin->sin6_flowinfo = 0; |
@@ -394,14 +414,11 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) | |||
394 | memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); | 414 | memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); |
395 | sin = &errhdr.offender; | 415 | sin = &errhdr.offender; |
396 | memset(sin, 0, sizeof(*sin)); | 416 | memset(sin, 0, sizeof(*sin)); |
397 | if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL && skb->len) { | 417 | |
418 | if (ip6_datagram_support_cmsg(skb, serr)) { | ||
398 | sin->sin6_family = AF_INET6; | 419 | sin->sin6_family = AF_INET6; |
399 | if (np->rxopt.all) { | 420 | if (np->rxopt.all) |
400 | if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP && | ||
401 | serr->ee.ee_origin != SO_EE_ORIGIN_ICMP6) | ||
402 | ip6_datagram_prepare_pktinfo_errqueue(skb); | ||
403 | ip6_datagram_recv_common_ctl(sk, msg, skb); | 421 | ip6_datagram_recv_common_ctl(sk, msg, skb); |
404 | } | ||
405 | if (skb->protocol == htons(ETH_P_IPV6)) { | 422 | if (skb->protocol == htons(ETH_P_IPV6)) { |
406 | sin->sin6_addr = ipv6_hdr(skb)->saddr; | 423 | sin->sin6_addr = ipv6_hdr(skb)->saddr; |
407 | if (np->rxopt.all) | 424 | if (np->rxopt.all) |
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index b4d5e1d97c1b..27ca79682efb 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c | |||
@@ -104,6 +104,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, | |||
104 | goto again; | 104 | goto again; |
105 | flp6->saddr = saddr; | 105 | flp6->saddr = saddr; |
106 | } | 106 | } |
107 | err = rt->dst.error; | ||
107 | goto out; | 108 | goto out; |
108 | } | 109 | } |
109 | again: | 110 | again: |
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0a04a37305d5..7e80b61b51ff 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c | |||
@@ -318,6 +318,7 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) | |||
318 | 318 | ||
319 | static inline int ip6_forward_finish(struct sk_buff *skb) | 319 | static inline int ip6_forward_finish(struct sk_buff *skb) |
320 | { | 320 | { |
321 | skb_sender_cpu_clear(skb); | ||
321 | return dst_output(skb); | 322 | return dst_output(skb); |
322 | } | 323 | } |
323 | 324 | ||
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 266a264ec212..ddd94eca19b3 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c | |||
@@ -314,7 +314,7 @@ out: | |||
314 | * Create tunnel matching given parameters. | 314 | * Create tunnel matching given parameters. |
315 | * | 315 | * |
316 | * Return: | 316 | * Return: |
317 | * created tunnel or NULL | 317 | * created tunnel or error pointer |
318 | **/ | 318 | **/ |
319 | 319 | ||
320 | static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) | 320 | static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) |
@@ -322,7 +322,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) | |||
322 | struct net_device *dev; | 322 | struct net_device *dev; |
323 | struct ip6_tnl *t; | 323 | struct ip6_tnl *t; |
324 | char name[IFNAMSIZ]; | 324 | char name[IFNAMSIZ]; |
325 | int err; | 325 | int err = -ENOMEM; |
326 | 326 | ||
327 | if (p->name[0]) | 327 | if (p->name[0]) |
328 | strlcpy(name, p->name, IFNAMSIZ); | 328 | strlcpy(name, p->name, IFNAMSIZ); |
@@ -348,7 +348,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) | |||
348 | failed_free: | 348 | failed_free: |
349 | ip6_dev_free(dev); | 349 | ip6_dev_free(dev); |
350 | failed: | 350 | failed: |
351 | return NULL; | 351 | return ERR_PTR(err); |
352 | } | 352 | } |
353 | 353 | ||
354 | /** | 354 | /** |
@@ -362,7 +362,7 @@ failed: | |||
362 | * tunnel device is created and registered for use. | 362 | * tunnel device is created and registered for use. |
363 | * | 363 | * |
364 | * Return: | 364 | * Return: |
365 | * matching tunnel or NULL | 365 | * matching tunnel or error pointer |
366 | **/ | 366 | **/ |
367 | 367 | ||
368 | static struct ip6_tnl *ip6_tnl_locate(struct net *net, | 368 | static struct ip6_tnl *ip6_tnl_locate(struct net *net, |
@@ -380,13 +380,13 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net, | |||
380 | if (ipv6_addr_equal(local, &t->parms.laddr) && | 380 | if (ipv6_addr_equal(local, &t->parms.laddr) && |
381 | ipv6_addr_equal(remote, &t->parms.raddr)) { | 381 | ipv6_addr_equal(remote, &t->parms.raddr)) { |
382 | if (create) | 382 | if (create) |
383 | return NULL; | 383 | return ERR_PTR(-EEXIST); |
384 | 384 | ||
385 | return t; | 385 | return t; |
386 | } | 386 | } |
387 | } | 387 | } |
388 | if (!create) | 388 | if (!create) |
389 | return NULL; | 389 | return ERR_PTR(-ENODEV); |
390 | return ip6_tnl_create(net, p); | 390 | return ip6_tnl_create(net, p); |
391 | } | 391 | } |
392 | 392 | ||
@@ -1420,7 +1420,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) | |||
1420 | } | 1420 | } |
1421 | ip6_tnl_parm_from_user(&p1, &p); | 1421 | ip6_tnl_parm_from_user(&p1, &p); |
1422 | t = ip6_tnl_locate(net, &p1, 0); | 1422 | t = ip6_tnl_locate(net, &p1, 0); |
1423 | if (t == NULL) | 1423 | if (IS_ERR(t)) |
1424 | t = netdev_priv(dev); | 1424 | t = netdev_priv(dev); |
1425 | } else { | 1425 | } else { |
1426 | memset(&p, 0, sizeof(p)); | 1426 | memset(&p, 0, sizeof(p)); |
@@ -1445,7 +1445,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) | |||
1445 | ip6_tnl_parm_from_user(&p1, &p); | 1445 | ip6_tnl_parm_from_user(&p1, &p); |
1446 | t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL); | 1446 | t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL); |
1447 | if (cmd == SIOCCHGTUNNEL) { | 1447 | if (cmd == SIOCCHGTUNNEL) { |
1448 | if (t != NULL) { | 1448 | if (!IS_ERR(t)) { |
1449 | if (t->dev != dev) { | 1449 | if (t->dev != dev) { |
1450 | err = -EEXIST; | 1450 | err = -EEXIST; |
1451 | break; | 1451 | break; |
@@ -1457,14 +1457,15 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) | |||
1457 | else | 1457 | else |
1458 | err = ip6_tnl_update(t, &p1); | 1458 | err = ip6_tnl_update(t, &p1); |
1459 | } | 1459 | } |
1460 | if (t) { | 1460 | if (!IS_ERR(t)) { |
1461 | err = 0; | 1461 | err = 0; |
1462 | ip6_tnl_parm_to_user(&p, &t->parms); | 1462 | ip6_tnl_parm_to_user(&p, &t->parms); |
1463 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) | 1463 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) |
1464 | err = -EFAULT; | 1464 | err = -EFAULT; |
1465 | 1465 | ||
1466 | } else | 1466 | } else { |
1467 | err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); | 1467 | err = PTR_ERR(t); |
1468 | } | ||
1468 | break; | 1469 | break; |
1469 | case SIOCDELTUNNEL: | 1470 | case SIOCDELTUNNEL: |
1470 | err = -EPERM; | 1471 | err = -EPERM; |
@@ -1478,7 +1479,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) | |||
1478 | err = -ENOENT; | 1479 | err = -ENOENT; |
1479 | ip6_tnl_parm_from_user(&p1, &p); | 1480 | ip6_tnl_parm_from_user(&p1, &p); |
1480 | t = ip6_tnl_locate(net, &p1, 0); | 1481 | t = ip6_tnl_locate(net, &p1, 0); |
1481 | if (t == NULL) | 1482 | if (IS_ERR(t)) |
1482 | break; | 1483 | break; |
1483 | err = -EPERM; | 1484 | err = -EPERM; |
1484 | if (t->dev == ip6n->fb_tnl_dev) | 1485 | if (t->dev == ip6n->fb_tnl_dev) |
@@ -1672,12 +1673,13 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, | |||
1672 | struct nlattr *tb[], struct nlattr *data[]) | 1673 | struct nlattr *tb[], struct nlattr *data[]) |
1673 | { | 1674 | { |
1674 | struct net *net = dev_net(dev); | 1675 | struct net *net = dev_net(dev); |
1675 | struct ip6_tnl *nt; | 1676 | struct ip6_tnl *nt, *t; |
1676 | 1677 | ||
1677 | nt = netdev_priv(dev); | 1678 | nt = netdev_priv(dev); |
1678 | ip6_tnl_netlink_parms(data, &nt->parms); | 1679 | ip6_tnl_netlink_parms(data, &nt->parms); |
1679 | 1680 | ||
1680 | if (ip6_tnl_locate(net, &nt->parms, 0)) | 1681 | t = ip6_tnl_locate(net, &nt->parms, 0); |
1682 | if (!IS_ERR(t)) | ||
1681 | return -EEXIST; | 1683 | return -EEXIST; |
1682 | 1684 | ||
1683 | return ip6_tnl_create2(dev); | 1685 | return ip6_tnl_create2(dev); |
@@ -1697,8 +1699,7 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], | |||
1697 | ip6_tnl_netlink_parms(data, &p); | 1699 | ip6_tnl_netlink_parms(data, &p); |
1698 | 1700 | ||
1699 | t = ip6_tnl_locate(net, &p, 0); | 1701 | t = ip6_tnl_locate(net, &p, 0); |
1700 | 1702 | if (!IS_ERR(t)) { | |
1701 | if (t) { | ||
1702 | if (t->dev != dev) | 1703 | if (t->dev != dev) |
1703 | return -EEXIST; | 1704 | return -EEXIST; |
1704 | } else | 1705 | } else |
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index e080fbbbc0e5..bb00c6f2a885 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c | |||
@@ -298,9 +298,9 @@ static void trace_packet(const struct sk_buff *skb, | |||
298 | &chainname, &comment, &rulenum) != 0) | 298 | &chainname, &comment, &rulenum) != 0) |
299 | break; | 299 | break; |
300 | 300 | ||
301 | nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo, | 301 | nf_log_trace(net, AF_INET6, hook, skb, in, out, &trace_loginfo, |
302 | "TRACE: %s:%s:%s:%u ", | 302 | "TRACE: %s:%s:%s:%u ", |
303 | tablename, chainname, comment, rulenum); | 303 | tablename, chainname, comment, rulenum); |
304 | } | 304 | } |
305 | #endif | 305 | #endif |
306 | 306 | ||
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index bd46f736f61d..a2dfff6ff227 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c | |||
@@ -102,9 +102,10 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
102 | 102 | ||
103 | if (msg->msg_name) { | 103 | if (msg->msg_name) { |
104 | DECLARE_SOCKADDR(struct sockaddr_in6 *, u, msg->msg_name); | 104 | DECLARE_SOCKADDR(struct sockaddr_in6 *, u, msg->msg_name); |
105 | if (msg->msg_namelen < sizeof(struct sockaddr_in6) || | 105 | if (msg->msg_namelen < sizeof(*u)) |
106 | u->sin6_family != AF_INET6) { | ||
107 | return -EINVAL; | 106 | return -EINVAL; |
107 | if (u->sin6_family != AF_INET6) { | ||
108 | return -EAFNOSUPPORT; | ||
108 | } | 109 | } |
109 | if (sk->sk_bound_dev_if && | 110 | if (sk->sk_bound_dev_if && |
110 | sk->sk_bound_dev_if != u->sin6_scope_id) { | 111 | sk->sk_bound_dev_if != u->sin6_scope_id) { |
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index ab889bb16b3c..be2c0ba82c85 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c | |||
@@ -112,11 +112,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, | |||
112 | fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); | 112 | fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); |
113 | fptr->nexthdr = nexthdr; | 113 | fptr->nexthdr = nexthdr; |
114 | fptr->reserved = 0; | 114 | fptr->reserved = 0; |
115 | if (skb_shinfo(skb)->ip6_frag_id) | 115 | if (!skb_shinfo(skb)->ip6_frag_id) |
116 | fptr->identification = skb_shinfo(skb)->ip6_frag_id; | 116 | ipv6_proxy_select_ident(skb); |
117 | else | 117 | fptr->identification = skb_shinfo(skb)->ip6_frag_id; |
118 | ipv6_select_ident(fptr, | ||
119 | (struct rt6_info *)skb_dst(skb)); | ||
120 | 118 | ||
121 | /* Fragment the skb. ipv6 header and the remaining fields of the | 119 | /* Fragment the skb. ipv6 header and the remaining fields of the |
122 | * fragment header are updated in ipv6_gso_segment() | 120 | * fragment header are updated in ipv6_gso_segment() |
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index ca3f29b98ae5..010f8bd2d577 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c | |||
@@ -114,6 +114,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) | |||
114 | return err; | 114 | return err; |
115 | 115 | ||
116 | skb->ignore_df = 1; | 116 | skb->ignore_df = 1; |
117 | skb->protocol = htons(ETH_P_IPV6); | ||
117 | 118 | ||
118 | return x->outer_mode->output2(x, skb); | 119 | return x->outer_mode->output2(x, skb); |
119 | } | 120 | } |
@@ -122,7 +123,6 @@ EXPORT_SYMBOL(xfrm6_prepare_output); | |||
122 | int xfrm6_output_finish(struct sk_buff *skb) | 123 | int xfrm6_output_finish(struct sk_buff *skb) |
123 | { | 124 | { |
124 | memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); | 125 | memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); |
125 | skb->protocol = htons(ETH_P_IPV6); | ||
126 | 126 | ||
127 | #ifdef CONFIG_NETFILTER | 127 | #ifdef CONFIG_NETFILTER |
128 | IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; | 128 | IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; |
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 48bf5a06847b..8d2d01b4800a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c | |||
@@ -200,6 +200,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
200 | 200 | ||
201 | #if IS_ENABLED(CONFIG_IPV6_MIP6) | 201 | #if IS_ENABLED(CONFIG_IPV6_MIP6) |
202 | case IPPROTO_MH: | 202 | case IPPROTO_MH: |
203 | offset += ipv6_optlen(exthdr); | ||
203 | if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) { | 204 | if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) { |
204 | struct ip6_mh *mh; | 205 | struct ip6_mh *mh; |
205 | 206 | ||
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 3afe36824703..8d53d65bd2ab 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h | |||
@@ -58,13 +58,24 @@ struct ieee80211_local; | |||
58 | #define IEEE80211_UNSET_POWER_LEVEL INT_MIN | 58 | #define IEEE80211_UNSET_POWER_LEVEL INT_MIN |
59 | 59 | ||
60 | /* | 60 | /* |
61 | * Some APs experience problems when working with U-APSD. Decrease the | 61 | * Some APs experience problems when working with U-APSD. Decreasing the |
62 | * probability of that happening by using legacy mode for all ACs but VO. | 62 | * probability of that happening by using legacy mode for all ACs but VO isn't |
63 | * The AP that caused us trouble was a Cisco 4410N. It ignores our | 63 | * enough. |
64 | * setting, and always treats non-VO ACs as legacy. | 64 | * |
65 | * Cisco 4410N originally forced us to enable VO by default only because it | ||
66 | * treated non-VO ACs as legacy. | ||
67 | * | ||
68 | * However some APs (notably Netgear R7000) silently reclassify packets to | ||
69 | * different ACs. Since u-APSD ACs require trigger frames for frame retrieval | ||
70 | * clients would never see some frames (e.g. ARP responses) or would fetch them | ||
71 | * accidentally after a long time. | ||
72 | * | ||
73 | * It makes little sense to enable u-APSD queues by default because it needs | ||
74 | * userspace applications to be aware of it to actually take advantage of the | ||
75 | * possible additional powersavings. Implicitly depending on driver autotrigger | ||
76 | * frame support doesn't make much sense. | ||
65 | */ | 77 | */ |
66 | #define IEEE80211_DEFAULT_UAPSD_QUEUES \ | 78 | #define IEEE80211_DEFAULT_UAPSD_QUEUES 0 |
67 | IEEE80211_WMM_IE_STA_QOSINFO_AC_VO | ||
68 | 79 | ||
69 | #define IEEE80211_DEFAULT_MAX_SP_LEN \ | 80 | #define IEEE80211_DEFAULT_MAX_SP_LEN \ |
70 | IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL | 81 | IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL |
@@ -453,6 +464,7 @@ struct ieee80211_if_managed { | |||
453 | unsigned int flags; | 464 | unsigned int flags; |
454 | 465 | ||
455 | bool csa_waiting_bcn; | 466 | bool csa_waiting_bcn; |
467 | bool csa_ignored_same_chan; | ||
456 | 468 | ||
457 | bool beacon_crc_valid; | 469 | bool beacon_crc_valid; |
458 | u32 beacon_crc; | 470 | u32 beacon_crc; |
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 10ac6324c1d0..142f66aece18 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c | |||
@@ -1150,6 +1150,17 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, | |||
1150 | return; | 1150 | return; |
1151 | } | 1151 | } |
1152 | 1152 | ||
1153 | if (cfg80211_chandef_identical(&csa_ie.chandef, | ||
1154 | &sdata->vif.bss_conf.chandef)) { | ||
1155 | if (ifmgd->csa_ignored_same_chan) | ||
1156 | return; | ||
1157 | sdata_info(sdata, | ||
1158 | "AP %pM tries to chanswitch to same channel, ignore\n", | ||
1159 | ifmgd->associated->bssid); | ||
1160 | ifmgd->csa_ignored_same_chan = true; | ||
1161 | return; | ||
1162 | } | ||
1163 | |||
1153 | mutex_lock(&local->mtx); | 1164 | mutex_lock(&local->mtx); |
1154 | mutex_lock(&local->chanctx_mtx); | 1165 | mutex_lock(&local->chanctx_mtx); |
1155 | conf = rcu_dereference_protected(sdata->vif.chanctx_conf, | 1166 | conf = rcu_dereference_protected(sdata->vif.chanctx_conf, |
@@ -1210,6 +1221,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, | |||
1210 | sdata->vif.csa_active = true; | 1221 | sdata->vif.csa_active = true; |
1211 | sdata->csa_chandef = csa_ie.chandef; | 1222 | sdata->csa_chandef = csa_ie.chandef; |
1212 | sdata->csa_block_tx = csa_ie.mode; | 1223 | sdata->csa_block_tx = csa_ie.mode; |
1224 | ifmgd->csa_ignored_same_chan = false; | ||
1213 | 1225 | ||
1214 | if (sdata->csa_block_tx) | 1226 | if (sdata->csa_block_tx) |
1215 | ieee80211_stop_vif_queues(local, sdata, | 1227 | ieee80211_stop_vif_queues(local, sdata, |
@@ -2090,6 +2102,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, | |||
2090 | 2102 | ||
2091 | sdata->vif.csa_active = false; | 2103 | sdata->vif.csa_active = false; |
2092 | ifmgd->csa_waiting_bcn = false; | 2104 | ifmgd->csa_waiting_bcn = false; |
2105 | ifmgd->csa_ignored_same_chan = false; | ||
2093 | if (sdata->csa_block_tx) { | 2106 | if (sdata->csa_block_tx) { |
2094 | ieee80211_wake_vif_queues(local, sdata, | 2107 | ieee80211_wake_vif_queues(local, sdata, |
2095 | IEEE80211_QUEUE_STOP_REASON_CSA); | 2108 | IEEE80211_QUEUE_STOP_REASON_CSA); |
@@ -3204,7 +3217,8 @@ static const u64 care_about_ies = | |||
3204 | (1ULL << WLAN_EID_CHANNEL_SWITCH) | | 3217 | (1ULL << WLAN_EID_CHANNEL_SWITCH) | |
3205 | (1ULL << WLAN_EID_PWR_CONSTRAINT) | | 3218 | (1ULL << WLAN_EID_PWR_CONSTRAINT) | |
3206 | (1ULL << WLAN_EID_HT_CAPABILITY) | | 3219 | (1ULL << WLAN_EID_HT_CAPABILITY) | |
3207 | (1ULL << WLAN_EID_HT_OPERATION); | 3220 | (1ULL << WLAN_EID_HT_OPERATION) | |
3221 | (1ULL << WLAN_EID_EXT_CHANSWITCH_ANN); | ||
3208 | 3222 | ||
3209 | static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, | 3223 | static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, |
3210 | struct ieee80211_mgmt *mgmt, size_t len, | 3224 | struct ieee80211_mgmt *mgmt, size_t len, |
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 1101563357ea..944bdc04e913 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c | |||
@@ -2214,6 +2214,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) | |||
2214 | hdr = (struct ieee80211_hdr *) skb->data; | 2214 | hdr = (struct ieee80211_hdr *) skb->data; |
2215 | mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen); | 2215 | mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen); |
2216 | 2216 | ||
2217 | if (ieee80211_drop_unencrypted(rx, hdr->frame_control)) | ||
2218 | return RX_DROP_MONITOR; | ||
2219 | |||
2217 | /* frame is in RMC, don't forward */ | 2220 | /* frame is in RMC, don't forward */ |
2218 | if (ieee80211_is_data(hdr->frame_control) && | 2221 | if (ieee80211_is_data(hdr->frame_control) && |
2219 | is_multicast_ether_addr(hdr->addr1) && | 2222 | is_multicast_ether_addr(hdr->addr1) && |
diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 8428f4a95479..747bdcf72e92 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c | |||
@@ -3178,7 +3178,7 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, | |||
3178 | wdev_iter = &sdata_iter->wdev; | 3178 | wdev_iter = &sdata_iter->wdev; |
3179 | 3179 | ||
3180 | if (sdata_iter == sdata || | 3180 | if (sdata_iter == sdata || |
3181 | rcu_access_pointer(sdata_iter->vif.chanctx_conf) == NULL || | 3181 | !ieee80211_sdata_running(sdata_iter) || |
3182 | local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype)) | 3182 | local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype)) |
3183 | continue; | 3183 | continue; |
3184 | 3184 | ||
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index c47ffd7a0a70..d93ceeb3ef04 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c | |||
@@ -896,6 +896,8 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, | |||
896 | IP_VS_DBG(2, "BACKUP, add new conn. failed\n"); | 896 | IP_VS_DBG(2, "BACKUP, add new conn. failed\n"); |
897 | return; | 897 | return; |
898 | } | 898 | } |
899 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) | ||
900 | kfree(param->pe_data); | ||
899 | } | 901 | } |
900 | 902 | ||
901 | if (opt) | 903 | if (opt) |
@@ -1169,6 +1171,7 @@ static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) | |||
1169 | (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) | 1171 | (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) |
1170 | ); | 1172 | ); |
1171 | #endif | 1173 | #endif |
1174 | ip_vs_pe_put(param.pe); | ||
1172 | return 0; | 1175 | return 0; |
1173 | /* Error exit */ | 1176 | /* Error exit */ |
1174 | out: | 1177 | out: |
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 0d8448f19dfe..675d12c69e32 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c | |||
@@ -212,6 +212,30 @@ void nf_log_packet(struct net *net, | |||
212 | } | 212 | } |
213 | EXPORT_SYMBOL(nf_log_packet); | 213 | EXPORT_SYMBOL(nf_log_packet); |
214 | 214 | ||
215 | void nf_log_trace(struct net *net, | ||
216 | u_int8_t pf, | ||
217 | unsigned int hooknum, | ||
218 | const struct sk_buff *skb, | ||
219 | const struct net_device *in, | ||
220 | const struct net_device *out, | ||
221 | const struct nf_loginfo *loginfo, const char *fmt, ...) | ||
222 | { | ||
223 | va_list args; | ||
224 | char prefix[NF_LOG_PREFIXLEN]; | ||
225 | const struct nf_logger *logger; | ||
226 | |||
227 | rcu_read_lock(); | ||
228 | logger = rcu_dereference(net->nf.nf_loggers[pf]); | ||
229 | if (logger) { | ||
230 | va_start(args, fmt); | ||
231 | vsnprintf(prefix, sizeof(prefix), fmt, args); | ||
232 | va_end(args); | ||
233 | logger->logfn(net, pf, hooknum, skb, in, out, loginfo, prefix); | ||
234 | } | ||
235 | rcu_read_unlock(); | ||
236 | } | ||
237 | EXPORT_SYMBOL(nf_log_trace); | ||
238 | |||
215 | #define S_SIZE (1024 - (sizeof(unsigned int) + 1)) | 239 | #define S_SIZE (1024 - (sizeof(unsigned int) + 1)) |
216 | 240 | ||
217 | struct nf_log_buf { | 241 | struct nf_log_buf { |
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 199fd0f27b0e..ac1a9528dbf2 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c | |||
@@ -227,7 +227,7 @@ nft_rule_deactivate_next(struct net *net, struct nft_rule *rule) | |||
227 | 227 | ||
228 | static inline void nft_rule_clear(struct net *net, struct nft_rule *rule) | 228 | static inline void nft_rule_clear(struct net *net, struct nft_rule *rule) |
229 | { | 229 | { |
230 | rule->genmask = 0; | 230 | rule->genmask &= ~(1 << gencursor_next(net)); |
231 | } | 231 | } |
232 | 232 | ||
233 | static int | 233 | static int |
@@ -1225,7 +1225,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, | |||
1225 | 1225 | ||
1226 | if (nla[NFTA_CHAIN_POLICY]) { | 1226 | if (nla[NFTA_CHAIN_POLICY]) { |
1227 | if ((chain != NULL && | 1227 | if ((chain != NULL && |
1228 | !(chain->flags & NFT_BASE_CHAIN)) || | 1228 | !(chain->flags & NFT_BASE_CHAIN))) |
1229 | return -EOPNOTSUPP; | ||
1230 | |||
1231 | if (chain == NULL && | ||
1229 | nla[NFTA_CHAIN_HOOK] == NULL) | 1232 | nla[NFTA_CHAIN_HOOK] == NULL) |
1230 | return -EOPNOTSUPP; | 1233 | return -EOPNOTSUPP; |
1231 | 1234 | ||
@@ -1711,9 +1714,12 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, | |||
1711 | } | 1714 | } |
1712 | nla_nest_end(skb, list); | 1715 | nla_nest_end(skb, list); |
1713 | 1716 | ||
1714 | if (rule->ulen && | 1717 | if (rule->udata) { |
1715 | nla_put(skb, NFTA_RULE_USERDATA, rule->ulen, nft_userdata(rule))) | 1718 | struct nft_userdata *udata = nft_userdata(rule); |
1716 | goto nla_put_failure; | 1719 | if (nla_put(skb, NFTA_RULE_USERDATA, udata->len + 1, |
1720 | udata->data) < 0) | ||
1721 | goto nla_put_failure; | ||
1722 | } | ||
1717 | 1723 | ||
1718 | nlmsg_end(skb, nlh); | 1724 | nlmsg_end(skb, nlh); |
1719 | return 0; | 1725 | return 0; |
@@ -1896,11 +1902,12 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, | |||
1896 | struct nft_table *table; | 1902 | struct nft_table *table; |
1897 | struct nft_chain *chain; | 1903 | struct nft_chain *chain; |
1898 | struct nft_rule *rule, *old_rule = NULL; | 1904 | struct nft_rule *rule, *old_rule = NULL; |
1905 | struct nft_userdata *udata; | ||
1899 | struct nft_trans *trans = NULL; | 1906 | struct nft_trans *trans = NULL; |
1900 | struct nft_expr *expr; | 1907 | struct nft_expr *expr; |
1901 | struct nft_ctx ctx; | 1908 | struct nft_ctx ctx; |
1902 | struct nlattr *tmp; | 1909 | struct nlattr *tmp; |
1903 | unsigned int size, i, n, ulen = 0; | 1910 | unsigned int size, i, n, ulen = 0, usize = 0; |
1904 | int err, rem; | 1911 | int err, rem; |
1905 | bool create; | 1912 | bool create; |
1906 | u64 handle, pos_handle; | 1913 | u64 handle, pos_handle; |
@@ -1968,12 +1975,19 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, | |||
1968 | n++; | 1975 | n++; |
1969 | } | 1976 | } |
1970 | } | 1977 | } |
1978 | /* Check for overflow of dlen field */ | ||
1979 | err = -EFBIG; | ||
1980 | if (size >= 1 << 12) | ||
1981 | goto err1; | ||
1971 | 1982 | ||
1972 | if (nla[NFTA_RULE_USERDATA]) | 1983 | if (nla[NFTA_RULE_USERDATA]) { |
1973 | ulen = nla_len(nla[NFTA_RULE_USERDATA]); | 1984 | ulen = nla_len(nla[NFTA_RULE_USERDATA]); |
1985 | if (ulen > 0) | ||
1986 | usize = sizeof(struct nft_userdata) + ulen; | ||
1987 | } | ||
1974 | 1988 | ||
1975 | err = -ENOMEM; | 1989 | err = -ENOMEM; |
1976 | rule = kzalloc(sizeof(*rule) + size + ulen, GFP_KERNEL); | 1990 | rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL); |
1977 | if (rule == NULL) | 1991 | if (rule == NULL) |
1978 | goto err1; | 1992 | goto err1; |
1979 | 1993 | ||
@@ -1981,10 +1995,13 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, | |||
1981 | 1995 | ||
1982 | rule->handle = handle; | 1996 | rule->handle = handle; |
1983 | rule->dlen = size; | 1997 | rule->dlen = size; |
1984 | rule->ulen = ulen; | 1998 | rule->udata = ulen ? 1 : 0; |
1985 | 1999 | ||
1986 | if (ulen) | 2000 | if (ulen) { |
1987 | nla_memcpy(nft_userdata(rule), nla[NFTA_RULE_USERDATA], ulen); | 2001 | udata = nft_userdata(rule); |
2002 | udata->len = ulen - 1; | ||
2003 | nla_memcpy(udata->data, nla[NFTA_RULE_USERDATA], ulen); | ||
2004 | } | ||
1988 | 2005 | ||
1989 | expr = nft_expr_first(rule); | 2006 | expr = nft_expr_first(rule); |
1990 | for (i = 0; i < n; i++) { | 2007 | for (i = 0; i < n; i++) { |
@@ -2031,12 +2048,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, | |||
2031 | 2048 | ||
2032 | err3: | 2049 | err3: |
2033 | list_del_rcu(&rule->list); | 2050 | list_del_rcu(&rule->list); |
2034 | if (trans) { | ||
2035 | list_del_rcu(&nft_trans_rule(trans)->list); | ||
2036 | nft_rule_clear(net, nft_trans_rule(trans)); | ||
2037 | nft_trans_destroy(trans); | ||
2038 | chain->use++; | ||
2039 | } | ||
2040 | err2: | 2051 | err2: |
2041 | nf_tables_rule_destroy(&ctx, rule); | 2052 | nf_tables_rule_destroy(&ctx, rule); |
2042 | err1: | 2053 | err1: |
@@ -3612,12 +3623,11 @@ static int nf_tables_commit(struct sk_buff *skb) | |||
3612 | &te->elem, | 3623 | &te->elem, |
3613 | NFT_MSG_DELSETELEM, 0); | 3624 | NFT_MSG_DELSETELEM, 0); |
3614 | te->set->ops->get(te->set, &te->elem); | 3625 | te->set->ops->get(te->set, &te->elem); |
3615 | te->set->ops->remove(te->set, &te->elem); | ||
3616 | nft_data_uninit(&te->elem.key, NFT_DATA_VALUE); | 3626 | nft_data_uninit(&te->elem.key, NFT_DATA_VALUE); |
3617 | if (te->elem.flags & NFT_SET_MAP) { | 3627 | if (te->set->flags & NFT_SET_MAP && |
3618 | nft_data_uninit(&te->elem.data, | 3628 | !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END)) |
3619 | te->set->dtype); | 3629 | nft_data_uninit(&te->elem.data, te->set->dtype); |
3620 | } | 3630 | te->set->ops->remove(te->set, &te->elem); |
3621 | nft_trans_destroy(trans); | 3631 | nft_trans_destroy(trans); |
3622 | break; | 3632 | break; |
3623 | } | 3633 | } |
@@ -3658,7 +3668,7 @@ static int nf_tables_abort(struct sk_buff *skb) | |||
3658 | { | 3668 | { |
3659 | struct net *net = sock_net(skb->sk); | 3669 | struct net *net = sock_net(skb->sk); |
3660 | struct nft_trans *trans, *next; | 3670 | struct nft_trans *trans, *next; |
3661 | struct nft_set *set; | 3671 | struct nft_trans_elem *te; |
3662 | 3672 | ||
3663 | list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { | 3673 | list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { |
3664 | switch (trans->msg_type) { | 3674 | switch (trans->msg_type) { |
@@ -3719,9 +3729,13 @@ static int nf_tables_abort(struct sk_buff *skb) | |||
3719 | break; | 3729 | break; |
3720 | case NFT_MSG_NEWSETELEM: | 3730 | case NFT_MSG_NEWSETELEM: |
3721 | nft_trans_elem_set(trans)->nelems--; | 3731 | nft_trans_elem_set(trans)->nelems--; |
3722 | set = nft_trans_elem_set(trans); | 3732 | te = (struct nft_trans_elem *)trans->data; |
3723 | set->ops->get(set, &nft_trans_elem(trans)); | 3733 | te->set->ops->get(te->set, &te->elem); |
3724 | set->ops->remove(set, &nft_trans_elem(trans)); | 3734 | nft_data_uninit(&te->elem.key, NFT_DATA_VALUE); |
3735 | if (te->set->flags & NFT_SET_MAP && | ||
3736 | !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END)) | ||
3737 | nft_data_uninit(&te->elem.data, te->set->dtype); | ||
3738 | te->set->ops->remove(te->set, &te->elem); | ||
3725 | nft_trans_destroy(trans); | 3739 | nft_trans_destroy(trans); |
3726 | break; | 3740 | break; |
3727 | case NFT_MSG_DELSETELEM: | 3741 | case NFT_MSG_DELSETELEM: |
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 3b90eb2b2c55..2d298dccb6dd 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c | |||
@@ -94,10 +94,10 @@ static void nft_trace_packet(const struct nft_pktinfo *pkt, | |||
94 | { | 94 | { |
95 | struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); | 95 | struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); |
96 | 96 | ||
97 | nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, | 97 | nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, |
98 | pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", | 98 | pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", |
99 | chain->table->name, chain->name, comments[type], | 99 | chain->table->name, chain->name, comments[type], |
100 | rulenum); | 100 | rulenum); |
101 | } | 101 | } |
102 | 102 | ||
103 | unsigned int | 103 | unsigned int |
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index a5599fc51a6f..54330fb5efaf 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c | |||
@@ -77,6 +77,9 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, | |||
77 | if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) | 77 | if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) |
78 | return -EINVAL; | 78 | return -EINVAL; |
79 | 79 | ||
80 | /* Not all fields are initialized so first zero the tuple */ | ||
81 | memset(tuple, 0, sizeof(struct nf_conntrack_tuple)); | ||
82 | |||
80 | tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM])); | 83 | tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM])); |
81 | tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]); | 84 | tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]); |
82 | 85 | ||
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 1279cd85663e..65f3e2b6be44 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c | |||
@@ -123,7 +123,7 @@ static void | |||
123 | nft_target_set_tgchk_param(struct xt_tgchk_param *par, | 123 | nft_target_set_tgchk_param(struct xt_tgchk_param *par, |
124 | const struct nft_ctx *ctx, | 124 | const struct nft_ctx *ctx, |
125 | struct xt_target *target, void *info, | 125 | struct xt_target *target, void *info, |
126 | union nft_entry *entry, u8 proto, bool inv) | 126 | union nft_entry *entry, u16 proto, bool inv) |
127 | { | 127 | { |
128 | par->net = ctx->net; | 128 | par->net = ctx->net; |
129 | par->table = ctx->table->name; | 129 | par->table = ctx->table->name; |
@@ -133,11 +133,14 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, | |||
133 | entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; | 133 | entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; |
134 | break; | 134 | break; |
135 | case AF_INET6: | 135 | case AF_INET6: |
136 | if (proto) | ||
137 | entry->e6.ipv6.flags |= IP6T_F_PROTO; | ||
138 | |||
136 | entry->e6.ipv6.proto = proto; | 139 | entry->e6.ipv6.proto = proto; |
137 | entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; | 140 | entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; |
138 | break; | 141 | break; |
139 | case NFPROTO_BRIDGE: | 142 | case NFPROTO_BRIDGE: |
140 | entry->ebt.ethproto = proto; | 143 | entry->ebt.ethproto = (__force __be16)proto; |
141 | entry->ebt.invflags = inv ? EBT_IPROTO : 0; | 144 | entry->ebt.invflags = inv ? EBT_IPROTO : 0; |
142 | break; | 145 | break; |
143 | } | 146 | } |
@@ -171,7 +174,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] | |||
171 | [NFTA_RULE_COMPAT_FLAGS] = { .type = NLA_U32 }, | 174 | [NFTA_RULE_COMPAT_FLAGS] = { .type = NLA_U32 }, |
172 | }; | 175 | }; |
173 | 176 | ||
174 | static int nft_parse_compat(const struct nlattr *attr, u8 *proto, bool *inv) | 177 | static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) |
175 | { | 178 | { |
176 | struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1]; | 179 | struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1]; |
177 | u32 flags; | 180 | u32 flags; |
@@ -203,7 +206,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, | |||
203 | struct xt_target *target = expr->ops->data; | 206 | struct xt_target *target = expr->ops->data; |
204 | struct xt_tgchk_param par; | 207 | struct xt_tgchk_param par; |
205 | size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO])); | 208 | size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO])); |
206 | u8 proto = 0; | 209 | u16 proto = 0; |
207 | bool inv = false; | 210 | bool inv = false; |
208 | union nft_entry e = {}; | 211 | union nft_entry e = {}; |
209 | int ret; | 212 | int ret; |
@@ -334,7 +337,7 @@ static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = { | |||
334 | static void | 337 | static void |
335 | nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, | 338 | nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, |
336 | struct xt_match *match, void *info, | 339 | struct xt_match *match, void *info, |
337 | union nft_entry *entry, u8 proto, bool inv) | 340 | union nft_entry *entry, u16 proto, bool inv) |
338 | { | 341 | { |
339 | par->net = ctx->net; | 342 | par->net = ctx->net; |
340 | par->table = ctx->table->name; | 343 | par->table = ctx->table->name; |
@@ -344,11 +347,14 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, | |||
344 | entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; | 347 | entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; |
345 | break; | 348 | break; |
346 | case AF_INET6: | 349 | case AF_INET6: |
350 | if (proto) | ||
351 | entry->e6.ipv6.flags |= IP6T_F_PROTO; | ||
352 | |||
347 | entry->e6.ipv6.proto = proto; | 353 | entry->e6.ipv6.proto = proto; |
348 | entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; | 354 | entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; |
349 | break; | 355 | break; |
350 | case NFPROTO_BRIDGE: | 356 | case NFPROTO_BRIDGE: |
351 | entry->ebt.ethproto = proto; | 357 | entry->ebt.ethproto = (__force __be16)proto; |
352 | entry->ebt.invflags = inv ? EBT_IPROTO : 0; | 358 | entry->ebt.invflags = inv ? EBT_IPROTO : 0; |
353 | break; | 359 | break; |
354 | } | 360 | } |
@@ -385,7 +391,7 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, | |||
385 | struct xt_match *match = expr->ops->data; | 391 | struct xt_match *match = expr->ops->data; |
386 | struct xt_mtchk_param par; | 392 | struct xt_mtchk_param par; |
387 | size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO])); | 393 | size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO])); |
388 | u8 proto = 0; | 394 | u16 proto = 0; |
389 | bool inv = false; | 395 | bool inv = false; |
390 | union nft_entry e = {}; | 396 | union nft_entry e = {}; |
391 | int ret; | 397 | int ret; |
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index c82df0a48fcd..37c15e674884 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c | |||
@@ -153,6 +153,8 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, | |||
153 | iter->err = err; | 153 | iter->err = err; |
154 | goto out; | 154 | goto out; |
155 | } | 155 | } |
156 | |||
157 | continue; | ||
156 | } | 158 | } |
157 | 159 | ||
158 | if (iter->count < iter->skip) | 160 | if (iter->count < iter->skip) |
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index ef8a926752a9..50e1e5aaf4ce 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c | |||
@@ -513,8 +513,8 @@ static int tproxy_tg6_check(const struct xt_tgchk_param *par) | |||
513 | { | 513 | { |
514 | const struct ip6t_ip6 *i = par->entryinfo; | 514 | const struct ip6t_ip6 *i = par->entryinfo; |
515 | 515 | ||
516 | if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) | 516 | if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) && |
517 | && !(i->flags & IP6T_INV_PROTO)) | 517 | !(i->invflags & IP6T_INV_PROTO)) |
518 | return 0; | 518 | return 0; |
519 | 519 | ||
520 | pr_info("Can be used only in combination with " | 520 | pr_info("Can be used only in combination with " |
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 5bf1e968a728..f8db7064d81c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -3123,11 +3123,18 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, | |||
3123 | return 0; | 3123 | return 0; |
3124 | } | 3124 | } |
3125 | 3125 | ||
3126 | static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what) | 3126 | static void packet_dev_mclist_delete(struct net_device *dev, |
3127 | struct packet_mclist **mlp) | ||
3127 | { | 3128 | { |
3128 | for ( ; i; i = i->next) { | 3129 | struct packet_mclist *ml; |
3129 | if (i->ifindex == dev->ifindex) | 3130 | |
3130 | packet_dev_mc(dev, i, what); | 3131 | while ((ml = *mlp) != NULL) { |
3132 | if (ml->ifindex == dev->ifindex) { | ||
3133 | packet_dev_mc(dev, ml, -1); | ||
3134 | *mlp = ml->next; | ||
3135 | kfree(ml); | ||
3136 | } else | ||
3137 | mlp = &ml->next; | ||
3131 | } | 3138 | } |
3132 | } | 3139 | } |
3133 | 3140 | ||
@@ -3204,12 +3211,11 @@ static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) | |||
3204 | packet_dev_mc(dev, ml, -1); | 3211 | packet_dev_mc(dev, ml, -1); |
3205 | kfree(ml); | 3212 | kfree(ml); |
3206 | } | 3213 | } |
3207 | rtnl_unlock(); | 3214 | break; |
3208 | return 0; | ||
3209 | } | 3215 | } |
3210 | } | 3216 | } |
3211 | rtnl_unlock(); | 3217 | rtnl_unlock(); |
3212 | return -EADDRNOTAVAIL; | 3218 | return 0; |
3213 | } | 3219 | } |
3214 | 3220 | ||
3215 | static void packet_flush_mclist(struct sock *sk) | 3221 | static void packet_flush_mclist(struct sock *sk) |
@@ -3559,7 +3565,7 @@ static int packet_notifier(struct notifier_block *this, | |||
3559 | switch (msg) { | 3565 | switch (msg) { |
3560 | case NETDEV_UNREGISTER: | 3566 | case NETDEV_UNREGISTER: |
3561 | if (po->mclist) | 3567 | if (po->mclist) |
3562 | packet_dev_mclist(dev, po->mclist, -1); | 3568 | packet_dev_mclist_delete(dev, &po->mclist); |
3563 | /* fallthrough */ | 3569 | /* fallthrough */ |
3564 | 3570 | ||
3565 | case NETDEV_DOWN: | 3571 | case NETDEV_DOWN: |
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index a817705ce2d0..dba8d0864f18 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c | |||
@@ -88,7 +88,9 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, | |||
88 | int *unpinned); | 88 | int *unpinned); |
89 | static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); | 89 | static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); |
90 | 90 | ||
91 | static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwdev, struct rdma_cm_id **cm_id) | 91 | static int rds_iw_get_device(struct sockaddr_in *src, struct sockaddr_in *dst, |
92 | struct rds_iw_device **rds_iwdev, | ||
93 | struct rdma_cm_id **cm_id) | ||
92 | { | 94 | { |
93 | struct rds_iw_device *iwdev; | 95 | struct rds_iw_device *iwdev; |
94 | struct rds_iw_cm_id *i_cm_id; | 96 | struct rds_iw_cm_id *i_cm_id; |
@@ -112,15 +114,15 @@ static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwd | |||
112 | src_addr->sin_port, | 114 | src_addr->sin_port, |
113 | dst_addr->sin_addr.s_addr, | 115 | dst_addr->sin_addr.s_addr, |
114 | dst_addr->sin_port, | 116 | dst_addr->sin_port, |
115 | rs->rs_bound_addr, | 117 | src->sin_addr.s_addr, |
116 | rs->rs_bound_port, | 118 | src->sin_port, |
117 | rs->rs_conn_addr, | 119 | dst->sin_addr.s_addr, |
118 | rs->rs_conn_port); | 120 | dst->sin_port); |
119 | #ifdef WORKING_TUPLE_DETECTION | 121 | #ifdef WORKING_TUPLE_DETECTION |
120 | if (src_addr->sin_addr.s_addr == rs->rs_bound_addr && | 122 | if (src_addr->sin_addr.s_addr == src->sin_addr.s_addr && |
121 | src_addr->sin_port == rs->rs_bound_port && | 123 | src_addr->sin_port == src->sin_port && |
122 | dst_addr->sin_addr.s_addr == rs->rs_conn_addr && | 124 | dst_addr->sin_addr.s_addr == dst->sin_addr.s_addr && |
123 | dst_addr->sin_port == rs->rs_conn_port) { | 125 | dst_addr->sin_port == dst->sin_port) { |
124 | #else | 126 | #else |
125 | /* FIXME - needs to compare the local and remote | 127 | /* FIXME - needs to compare the local and remote |
126 | * ipaddr/port tuple, but the ipaddr is the only | 128 | * ipaddr/port tuple, but the ipaddr is the only |
@@ -128,7 +130,7 @@ static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwd | |||
128 | * zero'ed. It doesn't appear to be properly populated | 130 | * zero'ed. It doesn't appear to be properly populated |
129 | * during connection setup... | 131 | * during connection setup... |
130 | */ | 132 | */ |
131 | if (src_addr->sin_addr.s_addr == rs->rs_bound_addr) { | 133 | if (src_addr->sin_addr.s_addr == src->sin_addr.s_addr) { |
132 | #endif | 134 | #endif |
133 | spin_unlock_irq(&iwdev->spinlock); | 135 | spin_unlock_irq(&iwdev->spinlock); |
134 | *rds_iwdev = iwdev; | 136 | *rds_iwdev = iwdev; |
@@ -180,19 +182,13 @@ int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_i | |||
180 | { | 182 | { |
181 | struct sockaddr_in *src_addr, *dst_addr; | 183 | struct sockaddr_in *src_addr, *dst_addr; |
182 | struct rds_iw_device *rds_iwdev_old; | 184 | struct rds_iw_device *rds_iwdev_old; |
183 | struct rds_sock rs; | ||
184 | struct rdma_cm_id *pcm_id; | 185 | struct rdma_cm_id *pcm_id; |
185 | int rc; | 186 | int rc; |
186 | 187 | ||
187 | src_addr = (struct sockaddr_in *)&cm_id->route.addr.src_addr; | 188 | src_addr = (struct sockaddr_in *)&cm_id->route.addr.src_addr; |
188 | dst_addr = (struct sockaddr_in *)&cm_id->route.addr.dst_addr; | 189 | dst_addr = (struct sockaddr_in *)&cm_id->route.addr.dst_addr; |
189 | 190 | ||
190 | rs.rs_bound_addr = src_addr->sin_addr.s_addr; | 191 | rc = rds_iw_get_device(src_addr, dst_addr, &rds_iwdev_old, &pcm_id); |
191 | rs.rs_bound_port = src_addr->sin_port; | ||
192 | rs.rs_conn_addr = dst_addr->sin_addr.s_addr; | ||
193 | rs.rs_conn_port = dst_addr->sin_port; | ||
194 | |||
195 | rc = rds_iw_get_device(&rs, &rds_iwdev_old, &pcm_id); | ||
196 | if (rc) | 192 | if (rc) |
197 | rds_iw_remove_cm_id(rds_iwdev, cm_id); | 193 | rds_iw_remove_cm_id(rds_iwdev, cm_id); |
198 | 194 | ||
@@ -598,9 +594,17 @@ void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents, | |||
598 | struct rds_iw_device *rds_iwdev; | 594 | struct rds_iw_device *rds_iwdev; |
599 | struct rds_iw_mr *ibmr = NULL; | 595 | struct rds_iw_mr *ibmr = NULL; |
600 | struct rdma_cm_id *cm_id; | 596 | struct rdma_cm_id *cm_id; |
597 | struct sockaddr_in src = { | ||
598 | .sin_addr.s_addr = rs->rs_bound_addr, | ||
599 | .sin_port = rs->rs_bound_port, | ||
600 | }; | ||
601 | struct sockaddr_in dst = { | ||
602 | .sin_addr.s_addr = rs->rs_conn_addr, | ||
603 | .sin_port = rs->rs_conn_port, | ||
604 | }; | ||
601 | int ret; | 605 | int ret; |
602 | 606 | ||
603 | ret = rds_iw_get_device(rs, &rds_iwdev, &cm_id); | 607 | ret = rds_iw_get_device(&src, &dst, &rds_iwdev, &cm_id); |
604 | if (ret || !cm_id) { | 608 | if (ret || !cm_id) { |
605 | ret = -ENODEV; | 609 | ret = -ENODEV; |
606 | goto out; | 610 | goto out; |
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c index 5394b6be46ec..0610efa83d72 100644 --- a/net/rxrpc/ar-error.c +++ b/net/rxrpc/ar-error.c | |||
@@ -42,7 +42,8 @@ void rxrpc_UDP_error_report(struct sock *sk) | |||
42 | _leave("UDP socket errqueue empty"); | 42 | _leave("UDP socket errqueue empty"); |
43 | return; | 43 | return; |
44 | } | 44 | } |
45 | if (!skb->len) { | 45 | serr = SKB_EXT_ERR(skb); |
46 | if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) { | ||
46 | _leave("UDP empty message"); | 47 | _leave("UDP empty message"); |
47 | kfree_skb(skb); | 48 | kfree_skb(skb); |
48 | return; | 49 | return; |
@@ -50,7 +51,6 @@ void rxrpc_UDP_error_report(struct sock *sk) | |||
50 | 51 | ||
51 | rxrpc_new_skb(skb); | 52 | rxrpc_new_skb(skb); |
52 | 53 | ||
53 | serr = SKB_EXT_ERR(skb); | ||
54 | addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset); | 54 | addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset); |
55 | port = serr->port; | 55 | port = serr->port; |
56 | 56 | ||
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 4575485ad1b4..19a560626dc4 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c | |||
@@ -87,7 +87,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
87 | if (!skb) { | 87 | if (!skb) { |
88 | /* nothing remains on the queue */ | 88 | /* nothing remains on the queue */ |
89 | if (copied && | 89 | if (copied && |
90 | (msg->msg_flags & MSG_PEEK || timeo == 0)) | 90 | (flags & MSG_PEEK || timeo == 0)) |
91 | goto out; | 91 | goto out; |
92 | 92 | ||
93 | /* wait for a message to turn up */ | 93 | /* wait for a message to turn up */ |
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 82c5d7fc1988..5f6288fa3f12 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c | |||
@@ -25,21 +25,41 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a, | |||
25 | struct tcf_result *res) | 25 | struct tcf_result *res) |
26 | { | 26 | { |
27 | struct tcf_bpf *b = a->priv; | 27 | struct tcf_bpf *b = a->priv; |
28 | int action; | 28 | int action, filter_res; |
29 | int filter_res; | ||
30 | 29 | ||
31 | spin_lock(&b->tcf_lock); | 30 | spin_lock(&b->tcf_lock); |
31 | |||
32 | b->tcf_tm.lastuse = jiffies; | 32 | b->tcf_tm.lastuse = jiffies; |
33 | bstats_update(&b->tcf_bstats, skb); | 33 | bstats_update(&b->tcf_bstats, skb); |
34 | action = b->tcf_action; | ||
35 | 34 | ||
36 | filter_res = BPF_PROG_RUN(b->filter, skb); | 35 | filter_res = BPF_PROG_RUN(b->filter, skb); |
37 | if (filter_res == 0) { | 36 | |
38 | /* Return code 0 from the BPF program | 37 | /* A BPF program may overwrite the default action opcode. |
39 | * is being interpreted as a drop here. | 38 | * Similarly as in cls_bpf, if filter_res == -1 we use the |
40 | */ | 39 | * default action specified from tc. |
41 | action = TC_ACT_SHOT; | 40 | * |
41 | * In case a different well-known TC_ACT opcode has been | ||
42 | * returned, it will overwrite the default one. | ||
43 | * | ||
44 | * For everything else that is unkown, TC_ACT_UNSPEC is | ||
45 | * returned. | ||
46 | */ | ||
47 | switch (filter_res) { | ||
48 | case TC_ACT_PIPE: | ||
49 | case TC_ACT_RECLASSIFY: | ||
50 | case TC_ACT_OK: | ||
51 | action = filter_res; | ||
52 | break; | ||
53 | case TC_ACT_SHOT: | ||
54 | action = filter_res; | ||
42 | b->tcf_qstats.drops++; | 55 | b->tcf_qstats.drops++; |
56 | break; | ||
57 | case TC_ACT_UNSPEC: | ||
58 | action = b->tcf_action; | ||
59 | break; | ||
60 | default: | ||
61 | action = TC_ACT_UNSPEC; | ||
62 | break; | ||
43 | } | 63 | } |
44 | 64 | ||
45 | spin_unlock(&b->tcf_lock); | 65 | spin_unlock(&b->tcf_lock); |
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 09487afbfd51..95fdf4e40051 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c | |||
@@ -78,8 +78,11 @@ struct tc_u_hnode { | |||
78 | struct tc_u_common *tp_c; | 78 | struct tc_u_common *tp_c; |
79 | int refcnt; | 79 | int refcnt; |
80 | unsigned int divisor; | 80 | unsigned int divisor; |
81 | struct tc_u_knode __rcu *ht[1]; | ||
82 | struct rcu_head rcu; | 81 | struct rcu_head rcu; |
82 | /* The 'ht' field MUST be the last field in structure to allow for | ||
83 | * more entries allocated at end of structure. | ||
84 | */ | ||
85 | struct tc_u_knode __rcu *ht[1]; | ||
83 | }; | 86 | }; |
84 | 87 | ||
85 | struct tc_u_common { | 88 | struct tc_u_common { |
diff --git a/net/socket.c b/net/socket.c index bbedbfcb42c2..245330ca0015 100644 --- a/net/socket.c +++ b/net/socket.c | |||
@@ -1702,6 +1702,8 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, | |||
1702 | 1702 | ||
1703 | if (len > INT_MAX) | 1703 | if (len > INT_MAX) |
1704 | len = INT_MAX; | 1704 | len = INT_MAX; |
1705 | if (unlikely(!access_ok(VERIFY_READ, buff, len))) | ||
1706 | return -EFAULT; | ||
1705 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1707 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1706 | if (!sock) | 1708 | if (!sock) |
1707 | goto out; | 1709 | goto out; |
@@ -1760,6 +1762,8 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, | |||
1760 | 1762 | ||
1761 | if (size > INT_MAX) | 1763 | if (size > INT_MAX) |
1762 | size = INT_MAX; | 1764 | size = INT_MAX; |
1765 | if (unlikely(!access_ok(VERIFY_WRITE, ubuf, size))) | ||
1766 | return -EFAULT; | ||
1763 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1767 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1764 | if (!sock) | 1768 | if (!sock) |
1765 | goto out; | 1769 | goto out; |
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index da5136fd5694..579f72bbcf4b 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile | |||
@@ -1,6 +1,7 @@ | |||
1 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o | 1 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o |
2 | 2 | ||
3 | xprtrdma-y := transport.o rpc_rdma.o verbs.o | 3 | xprtrdma-y := transport.o rpc_rdma.o verbs.o \ |
4 | fmr_ops.o frwr_ops.o physical_ops.o | ||
4 | 5 | ||
5 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o | 6 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o |
6 | 7 | ||
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c new file mode 100644 index 000000000000..a91ba2c8ef1e --- /dev/null +++ b/net/sunrpc/xprtrdma/fmr_ops.c | |||
@@ -0,0 +1,208 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2015 Oracle. All rights reserved. | ||
3 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
4 | */ | ||
5 | |||
6 | /* Lightweight memory registration using Fast Memory Regions (FMR). | ||
7 | * Referred to sometimes as MTHCAFMR mode. | ||
8 | * | ||
9 | * FMR uses synchronous memory registration and deregistration. | ||
10 | * FMR registration is known to be fast, but FMR deregistration | ||
11 | * can take tens of usecs to complete. | ||
12 | */ | ||
13 | |||
14 | #include "xprt_rdma.h" | ||
15 | |||
16 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
17 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
18 | #endif | ||
19 | |||
20 | /* Maximum scatter/gather per FMR */ | ||
21 | #define RPCRDMA_MAX_FMR_SGES (64) | ||
22 | |||
23 | static int | ||
24 | fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||
25 | struct rpcrdma_create_data_internal *cdata) | ||
26 | { | ||
27 | return 0; | ||
28 | } | ||
29 | |||
30 | /* FMR mode conveys up to 64 pages of payload per chunk segment. | ||
31 | */ | ||
32 | static size_t | ||
33 | fmr_op_maxpages(struct rpcrdma_xprt *r_xprt) | ||
34 | { | ||
35 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
36 | rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES); | ||
37 | } | ||
38 | |||
39 | static int | ||
40 | fmr_op_init(struct rpcrdma_xprt *r_xprt) | ||
41 | { | ||
42 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
43 | int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; | ||
44 | struct ib_fmr_attr fmr_attr = { | ||
45 | .max_pages = RPCRDMA_MAX_FMR_SGES, | ||
46 | .max_maps = 1, | ||
47 | .page_shift = PAGE_SHIFT | ||
48 | }; | ||
49 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | ||
50 | struct rpcrdma_mw *r; | ||
51 | int i, rc; | ||
52 | |||
53 | INIT_LIST_HEAD(&buf->rb_mws); | ||
54 | INIT_LIST_HEAD(&buf->rb_all); | ||
55 | |||
56 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
57 | dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i); | ||
58 | |||
59 | while (i--) { | ||
60 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
61 | if (!r) | ||
62 | return -ENOMEM; | ||
63 | |||
64 | r->r.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr); | ||
65 | if (IS_ERR(r->r.fmr)) | ||
66 | goto out_fmr_err; | ||
67 | |||
68 | list_add(&r->mw_list, &buf->rb_mws); | ||
69 | list_add(&r->mw_all, &buf->rb_all); | ||
70 | } | ||
71 | return 0; | ||
72 | |||
73 | out_fmr_err: | ||
74 | rc = PTR_ERR(r->r.fmr); | ||
75 | dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc); | ||
76 | kfree(r); | ||
77 | return rc; | ||
78 | } | ||
79 | |||
80 | /* Use the ib_map_phys_fmr() verb to register a memory region | ||
81 | * for remote access via RDMA READ or RDMA WRITE. | ||
82 | */ | ||
83 | static int | ||
84 | fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | ||
85 | int nsegs, bool writing) | ||
86 | { | ||
87 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
88 | struct ib_device *device = ia->ri_id->device; | ||
89 | enum dma_data_direction direction = rpcrdma_data_dir(writing); | ||
90 | struct rpcrdma_mr_seg *seg1 = seg; | ||
91 | struct rpcrdma_mw *mw = seg1->rl_mw; | ||
92 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
93 | int len, pageoff, i, rc; | ||
94 | |||
95 | pageoff = offset_in_page(seg1->mr_offset); | ||
96 | seg1->mr_offset -= pageoff; /* start of page */ | ||
97 | seg1->mr_len += pageoff; | ||
98 | len = -pageoff; | ||
99 | if (nsegs > RPCRDMA_MAX_FMR_SGES) | ||
100 | nsegs = RPCRDMA_MAX_FMR_SGES; | ||
101 | for (i = 0; i < nsegs;) { | ||
102 | rpcrdma_map_one(device, seg, direction); | ||
103 | physaddrs[i] = seg->mr_dma; | ||
104 | len += seg->mr_len; | ||
105 | ++seg; | ||
106 | ++i; | ||
107 | /* Check for holes */ | ||
108 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
109 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
110 | break; | ||
111 | } | ||
112 | |||
113 | rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma); | ||
114 | if (rc) | ||
115 | goto out_maperr; | ||
116 | |||
117 | seg1->mr_rkey = mw->r.fmr->rkey; | ||
118 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
119 | seg1->mr_nsegs = i; | ||
120 | seg1->mr_len = len; | ||
121 | return i; | ||
122 | |||
123 | out_maperr: | ||
124 | dprintk("RPC: %s: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", | ||
125 | __func__, len, (unsigned long long)seg1->mr_dma, | ||
126 | pageoff, i, rc); | ||
127 | while (i--) | ||
128 | rpcrdma_unmap_one(device, --seg); | ||
129 | return rc; | ||
130 | } | ||
131 | |||
132 | /* Use the ib_unmap_fmr() verb to prevent further remote | ||
133 | * access via RDMA READ or RDMA WRITE. | ||
134 | */ | ||
135 | static int | ||
136 | fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | ||
137 | { | ||
138 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
139 | struct rpcrdma_mr_seg *seg1 = seg; | ||
140 | struct ib_device *device; | ||
141 | int rc, nsegs = seg->mr_nsegs; | ||
142 | LIST_HEAD(l); | ||
143 | |||
144 | list_add(&seg1->rl_mw->r.fmr->list, &l); | ||
145 | rc = ib_unmap_fmr(&l); | ||
146 | read_lock(&ia->ri_qplock); | ||
147 | device = ia->ri_id->device; | ||
148 | while (seg1->mr_nsegs--) | ||
149 | rpcrdma_unmap_one(device, seg++); | ||
150 | read_unlock(&ia->ri_qplock); | ||
151 | if (rc) | ||
152 | goto out_err; | ||
153 | return nsegs; | ||
154 | |||
155 | out_err: | ||
156 | dprintk("RPC: %s: ib_unmap_fmr status %i\n", __func__, rc); | ||
157 | return nsegs; | ||
158 | } | ||
159 | |||
160 | /* After a disconnect, unmap all FMRs. | ||
161 | * | ||
162 | * This is invoked only in the transport connect worker in order | ||
163 | * to serialize with rpcrdma_register_fmr_external(). | ||
164 | */ | ||
165 | static void | ||
166 | fmr_op_reset(struct rpcrdma_xprt *r_xprt) | ||
167 | { | ||
168 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
169 | struct rpcrdma_mw *r; | ||
170 | LIST_HEAD(list); | ||
171 | int rc; | ||
172 | |||
173 | list_for_each_entry(r, &buf->rb_all, mw_all) | ||
174 | list_add(&r->r.fmr->list, &list); | ||
175 | |||
176 | rc = ib_unmap_fmr(&list); | ||
177 | if (rc) | ||
178 | dprintk("RPC: %s: ib_unmap_fmr failed %i\n", | ||
179 | __func__, rc); | ||
180 | } | ||
181 | |||
182 | static void | ||
183 | fmr_op_destroy(struct rpcrdma_buffer *buf) | ||
184 | { | ||
185 | struct rpcrdma_mw *r; | ||
186 | int rc; | ||
187 | |||
188 | while (!list_empty(&buf->rb_all)) { | ||
189 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
190 | list_del(&r->mw_all); | ||
191 | rc = ib_dealloc_fmr(r->r.fmr); | ||
192 | if (rc) | ||
193 | dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", | ||
194 | __func__, rc); | ||
195 | kfree(r); | ||
196 | } | ||
197 | } | ||
198 | |||
199 | const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { | ||
200 | .ro_map = fmr_op_map, | ||
201 | .ro_unmap = fmr_op_unmap, | ||
202 | .ro_open = fmr_op_open, | ||
203 | .ro_maxpages = fmr_op_maxpages, | ||
204 | .ro_init = fmr_op_init, | ||
205 | .ro_reset = fmr_op_reset, | ||
206 | .ro_destroy = fmr_op_destroy, | ||
207 | .ro_displayname = "fmr", | ||
208 | }; | ||
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c new file mode 100644 index 000000000000..0a7b9df70133 --- /dev/null +++ b/net/sunrpc/xprtrdma/frwr_ops.c | |||
@@ -0,0 +1,353 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2015 Oracle. All rights reserved. | ||
3 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
4 | */ | ||
5 | |||
6 | /* Lightweight memory registration using Fast Registration Work | ||
7 | * Requests (FRWR). Also referred to sometimes as FRMR mode. | ||
8 | * | ||
9 | * FRWR features ordered asynchronous registration and deregistration | ||
10 | * of arbitrarily sized memory regions. This is the fastest and safest | ||
11 | * but most complex memory registration mode. | ||
12 | */ | ||
13 | |||
14 | #include "xprt_rdma.h" | ||
15 | |||
16 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
17 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
18 | #endif | ||
19 | |||
20 | static int | ||
21 | __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, | ||
22 | unsigned int depth) | ||
23 | { | ||
24 | struct rpcrdma_frmr *f = &r->r.frmr; | ||
25 | int rc; | ||
26 | |||
27 | f->fr_mr = ib_alloc_fast_reg_mr(pd, depth); | ||
28 | if (IS_ERR(f->fr_mr)) | ||
29 | goto out_mr_err; | ||
30 | f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth); | ||
31 | if (IS_ERR(f->fr_pgl)) | ||
32 | goto out_list_err; | ||
33 | return 0; | ||
34 | |||
35 | out_mr_err: | ||
36 | rc = PTR_ERR(f->fr_mr); | ||
37 | dprintk("RPC: %s: ib_alloc_fast_reg_mr status %i\n", | ||
38 | __func__, rc); | ||
39 | return rc; | ||
40 | |||
41 | out_list_err: | ||
42 | rc = PTR_ERR(f->fr_pgl); | ||
43 | dprintk("RPC: %s: ib_alloc_fast_reg_page_list status %i\n", | ||
44 | __func__, rc); | ||
45 | ib_dereg_mr(f->fr_mr); | ||
46 | return rc; | ||
47 | } | ||
48 | |||
49 | static void | ||
50 | __frwr_release(struct rpcrdma_mw *r) | ||
51 | { | ||
52 | int rc; | ||
53 | |||
54 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
55 | if (rc) | ||
56 | dprintk("RPC: %s: ib_dereg_mr status %i\n", | ||
57 | __func__, rc); | ||
58 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
59 | } | ||
60 | |||
61 | static int | ||
62 | frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||
63 | struct rpcrdma_create_data_internal *cdata) | ||
64 | { | ||
65 | struct ib_device_attr *devattr = &ia->ri_devattr; | ||
66 | int depth, delta; | ||
67 | |||
68 | ia->ri_max_frmr_depth = | ||
69 | min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
70 | devattr->max_fast_reg_page_list_len); | ||
71 | dprintk("RPC: %s: device's max FR page list len = %u\n", | ||
72 | __func__, ia->ri_max_frmr_depth); | ||
73 | |||
74 | /* Add room for frmr register and invalidate WRs. | ||
75 | * 1. FRMR reg WR for head | ||
76 | * 2. FRMR invalidate WR for head | ||
77 | * 3. N FRMR reg WRs for pagelist | ||
78 | * 4. N FRMR invalidate WRs for pagelist | ||
79 | * 5. FRMR reg WR for tail | ||
80 | * 6. FRMR invalidate WR for tail | ||
81 | * 7. The RDMA_SEND WR | ||
82 | */ | ||
83 | depth = 7; | ||
84 | |||
85 | /* Calculate N if the device max FRMR depth is smaller than | ||
86 | * RPCRDMA_MAX_DATA_SEGS. | ||
87 | */ | ||
88 | if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { | ||
89 | delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth; | ||
90 | do { | ||
91 | depth += 2; /* FRMR reg + invalidate */ | ||
92 | delta -= ia->ri_max_frmr_depth; | ||
93 | } while (delta > 0); | ||
94 | } | ||
95 | |||
96 | ep->rep_attr.cap.max_send_wr *= depth; | ||
97 | if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) { | ||
98 | cdata->max_requests = devattr->max_qp_wr / depth; | ||
99 | if (!cdata->max_requests) | ||
100 | return -EINVAL; | ||
101 | ep->rep_attr.cap.max_send_wr = cdata->max_requests * | ||
102 | depth; | ||
103 | } | ||
104 | |||
105 | return 0; | ||
106 | } | ||
107 | |||
108 | /* FRWR mode conveys a list of pages per chunk segment. The | ||
109 | * maximum length of that list is the FRWR page list depth. | ||
110 | */ | ||
111 | static size_t | ||
112 | frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) | ||
113 | { | ||
114 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
115 | |||
116 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
117 | rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth); | ||
118 | } | ||
119 | |||
120 | /* If FAST_REG or LOCAL_INV failed, indicate the frmr needs to be reset. */ | ||
121 | static void | ||
122 | frwr_sendcompletion(struct ib_wc *wc) | ||
123 | { | ||
124 | struct rpcrdma_mw *r; | ||
125 | |||
126 | if (likely(wc->status == IB_WC_SUCCESS)) | ||
127 | return; | ||
128 | |||
129 | /* WARNING: Only wr_id and status are reliable at this point */ | ||
130 | r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; | ||
131 | dprintk("RPC: %s: frmr %p (stale), status %d\n", | ||
132 | __func__, r, wc->status); | ||
133 | r->r.frmr.fr_state = FRMR_IS_STALE; | ||
134 | } | ||
135 | |||
136 | static int | ||
137 | frwr_op_init(struct rpcrdma_xprt *r_xprt) | ||
138 | { | ||
139 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
140 | struct ib_device *device = r_xprt->rx_ia.ri_id->device; | ||
141 | unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; | ||
142 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | ||
143 | int i; | ||
144 | |||
145 | INIT_LIST_HEAD(&buf->rb_mws); | ||
146 | INIT_LIST_HEAD(&buf->rb_all); | ||
147 | |||
148 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
149 | dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i); | ||
150 | |||
151 | while (i--) { | ||
152 | struct rpcrdma_mw *r; | ||
153 | int rc; | ||
154 | |||
155 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
156 | if (!r) | ||
157 | return -ENOMEM; | ||
158 | |||
159 | rc = __frwr_init(r, pd, device, depth); | ||
160 | if (rc) { | ||
161 | kfree(r); | ||
162 | return rc; | ||
163 | } | ||
164 | |||
165 | list_add(&r->mw_list, &buf->rb_mws); | ||
166 | list_add(&r->mw_all, &buf->rb_all); | ||
167 | r->mw_sendcompletion = frwr_sendcompletion; | ||
168 | } | ||
169 | |||
170 | return 0; | ||
171 | } | ||
172 | |||
173 | /* Post a FAST_REG Work Request to register a memory region | ||
174 | * for remote access via RDMA READ or RDMA WRITE. | ||
175 | */ | ||
176 | static int | ||
177 | frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | ||
178 | int nsegs, bool writing) | ||
179 | { | ||
180 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
181 | struct ib_device *device = ia->ri_id->device; | ||
182 | enum dma_data_direction direction = rpcrdma_data_dir(writing); | ||
183 | struct rpcrdma_mr_seg *seg1 = seg; | ||
184 | struct rpcrdma_mw *mw = seg1->rl_mw; | ||
185 | struct rpcrdma_frmr *frmr = &mw->r.frmr; | ||
186 | struct ib_mr *mr = frmr->fr_mr; | ||
187 | struct ib_send_wr fastreg_wr, *bad_wr; | ||
188 | u8 key; | ||
189 | int len, pageoff; | ||
190 | int i, rc; | ||
191 | int seg_len; | ||
192 | u64 pa; | ||
193 | int page_no; | ||
194 | |||
195 | pageoff = offset_in_page(seg1->mr_offset); | ||
196 | seg1->mr_offset -= pageoff; /* start of page */ | ||
197 | seg1->mr_len += pageoff; | ||
198 | len = -pageoff; | ||
199 | if (nsegs > ia->ri_max_frmr_depth) | ||
200 | nsegs = ia->ri_max_frmr_depth; | ||
201 | for (page_no = i = 0; i < nsegs;) { | ||
202 | rpcrdma_map_one(device, seg, direction); | ||
203 | pa = seg->mr_dma; | ||
204 | for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { | ||
205 | frmr->fr_pgl->page_list[page_no++] = pa; | ||
206 | pa += PAGE_SIZE; | ||
207 | } | ||
208 | len += seg->mr_len; | ||
209 | ++seg; | ||
210 | ++i; | ||
211 | /* Check for holes */ | ||
212 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
213 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
214 | break; | ||
215 | } | ||
216 | dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", | ||
217 | __func__, mw, i, len); | ||
218 | |||
219 | frmr->fr_state = FRMR_IS_VALID; | ||
220 | |||
221 | memset(&fastreg_wr, 0, sizeof(fastreg_wr)); | ||
222 | fastreg_wr.wr_id = (unsigned long)(void *)mw; | ||
223 | fastreg_wr.opcode = IB_WR_FAST_REG_MR; | ||
224 | fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma + pageoff; | ||
225 | fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl; | ||
226 | fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | ||
227 | fastreg_wr.wr.fast_reg.page_list_len = page_no; | ||
228 | fastreg_wr.wr.fast_reg.length = len; | ||
229 | fastreg_wr.wr.fast_reg.access_flags = writing ? | ||
230 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | ||
231 | IB_ACCESS_REMOTE_READ; | ||
232 | key = (u8)(mr->rkey & 0x000000FF); | ||
233 | ib_update_fast_reg_key(mr, ++key); | ||
234 | fastreg_wr.wr.fast_reg.rkey = mr->rkey; | ||
235 | |||
236 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
237 | rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr); | ||
238 | if (rc) | ||
239 | goto out_senderr; | ||
240 | |||
241 | seg1->mr_rkey = mr->rkey; | ||
242 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
243 | seg1->mr_nsegs = i; | ||
244 | seg1->mr_len = len; | ||
245 | return i; | ||
246 | |||
247 | out_senderr: | ||
248 | dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); | ||
249 | ib_update_fast_reg_key(mr, --key); | ||
250 | frmr->fr_state = FRMR_IS_INVALID; | ||
251 | while (i--) | ||
252 | rpcrdma_unmap_one(device, --seg); | ||
253 | return rc; | ||
254 | } | ||
255 | |||
256 | /* Post a LOCAL_INV Work Request to prevent further remote access | ||
257 | * via RDMA READ or RDMA WRITE. | ||
258 | */ | ||
259 | static int | ||
260 | frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | ||
261 | { | ||
262 | struct rpcrdma_mr_seg *seg1 = seg; | ||
263 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
264 | struct ib_send_wr invalidate_wr, *bad_wr; | ||
265 | int rc, nsegs = seg->mr_nsegs; | ||
266 | struct ib_device *device; | ||
267 | |||
268 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; | ||
269 | |||
270 | memset(&invalidate_wr, 0, sizeof(invalidate_wr)); | ||
271 | invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw; | ||
272 | invalidate_wr.opcode = IB_WR_LOCAL_INV; | ||
273 | invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey; | ||
274 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
275 | |||
276 | read_lock(&ia->ri_qplock); | ||
277 | device = ia->ri_id->device; | ||
278 | while (seg1->mr_nsegs--) | ||
279 | rpcrdma_unmap_one(device, seg++); | ||
280 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); | ||
281 | read_unlock(&ia->ri_qplock); | ||
282 | if (rc) | ||
283 | goto out_err; | ||
284 | return nsegs; | ||
285 | |||
286 | out_err: | ||
287 | /* Force rpcrdma_buffer_get() to retry */ | ||
288 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE; | ||
289 | dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); | ||
290 | return nsegs; | ||
291 | } | ||
292 | |||
293 | /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in | ||
294 | * an unusable state. Find FRMRs in this state and dereg / reg | ||
295 | * each. FRMRs that are VALID and attached to an rpcrdma_req are | ||
296 | * also torn down. | ||
297 | * | ||
298 | * This gives all in-use FRMRs a fresh rkey and leaves them INVALID. | ||
299 | * | ||
300 | * This is invoked only in the transport connect worker in order | ||
301 | * to serialize with rpcrdma_register_frmr_external(). | ||
302 | */ | ||
303 | static void | ||
304 | frwr_op_reset(struct rpcrdma_xprt *r_xprt) | ||
305 | { | ||
306 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
307 | struct ib_device *device = r_xprt->rx_ia.ri_id->device; | ||
308 | unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; | ||
309 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | ||
310 | struct rpcrdma_mw *r; | ||
311 | int rc; | ||
312 | |||
313 | list_for_each_entry(r, &buf->rb_all, mw_all) { | ||
314 | if (r->r.frmr.fr_state == FRMR_IS_INVALID) | ||
315 | continue; | ||
316 | |||
317 | __frwr_release(r); | ||
318 | rc = __frwr_init(r, pd, device, depth); | ||
319 | if (rc) { | ||
320 | dprintk("RPC: %s: mw %p left %s\n", | ||
321 | __func__, r, | ||
322 | (r->r.frmr.fr_state == FRMR_IS_STALE ? | ||
323 | "stale" : "valid")); | ||
324 | continue; | ||
325 | } | ||
326 | |||
327 | r->r.frmr.fr_state = FRMR_IS_INVALID; | ||
328 | } | ||
329 | } | ||
330 | |||
331 | static void | ||
332 | frwr_op_destroy(struct rpcrdma_buffer *buf) | ||
333 | { | ||
334 | struct rpcrdma_mw *r; | ||
335 | |||
336 | while (!list_empty(&buf->rb_all)) { | ||
337 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
338 | list_del(&r->mw_all); | ||
339 | __frwr_release(r); | ||
340 | kfree(r); | ||
341 | } | ||
342 | } | ||
343 | |||
344 | const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { | ||
345 | .ro_map = frwr_op_map, | ||
346 | .ro_unmap = frwr_op_unmap, | ||
347 | .ro_open = frwr_op_open, | ||
348 | .ro_maxpages = frwr_op_maxpages, | ||
349 | .ro_init = frwr_op_init, | ||
350 | .ro_reset = frwr_op_reset, | ||
351 | .ro_destroy = frwr_op_destroy, | ||
352 | .ro_displayname = "frwr", | ||
353 | }; | ||
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c new file mode 100644 index 000000000000..ba518af16787 --- /dev/null +++ b/net/sunrpc/xprtrdma/physical_ops.c | |||
@@ -0,0 +1,94 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2015 Oracle. All rights reserved. | ||
3 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
4 | */ | ||
5 | |||
6 | /* No-op chunk preparation. All client memory is pre-registered. | ||
7 | * Sometimes referred to as ALLPHYSICAL mode. | ||
8 | * | ||
9 | * Physical registration is simple because all client memory is | ||
10 | * pre-registered and never deregistered. This mode is good for | ||
11 | * adapter bring up, but is considered not safe: the server is | ||
12 | * trusted not to abuse its access to client memory not involved | ||
13 | * in RDMA I/O. | ||
14 | */ | ||
15 | |||
16 | #include "xprt_rdma.h" | ||
17 | |||
18 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
19 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
20 | #endif | ||
21 | |||
22 | static int | ||
23 | physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||
24 | struct rpcrdma_create_data_internal *cdata) | ||
25 | { | ||
26 | return 0; | ||
27 | } | ||
28 | |||
29 | /* PHYSICAL memory registration conveys one page per chunk segment. | ||
30 | */ | ||
31 | static size_t | ||
32 | physical_op_maxpages(struct rpcrdma_xprt *r_xprt) | ||
33 | { | ||
34 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
35 | rpcrdma_max_segments(r_xprt)); | ||
36 | } | ||
37 | |||
38 | static int | ||
39 | physical_op_init(struct rpcrdma_xprt *r_xprt) | ||
40 | { | ||
41 | return 0; | ||
42 | } | ||
43 | |||
44 | /* The client's physical memory is already exposed for | ||
45 | * remote access via RDMA READ or RDMA WRITE. | ||
46 | */ | ||
47 | static int | ||
48 | physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | ||
49 | int nsegs, bool writing) | ||
50 | { | ||
51 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
52 | |||
53 | rpcrdma_map_one(ia->ri_id->device, seg, | ||
54 | rpcrdma_data_dir(writing)); | ||
55 | seg->mr_rkey = ia->ri_bind_mem->rkey; | ||
56 | seg->mr_base = seg->mr_dma; | ||
57 | seg->mr_nsegs = 1; | ||
58 | return 1; | ||
59 | } | ||
60 | |||
61 | /* Unmap a memory region, but leave it registered. | ||
62 | */ | ||
63 | static int | ||
64 | physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | ||
65 | { | ||
66 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
67 | |||
68 | read_lock(&ia->ri_qplock); | ||
69 | rpcrdma_unmap_one(ia->ri_id->device, seg); | ||
70 | read_unlock(&ia->ri_qplock); | ||
71 | |||
72 | return 1; | ||
73 | } | ||
74 | |||
75 | static void | ||
76 | physical_op_reset(struct rpcrdma_xprt *r_xprt) | ||
77 | { | ||
78 | } | ||
79 | |||
80 | static void | ||
81 | physical_op_destroy(struct rpcrdma_buffer *buf) | ||
82 | { | ||
83 | } | ||
84 | |||
85 | const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { | ||
86 | .ro_map = physical_op_map, | ||
87 | .ro_unmap = physical_op_unmap, | ||
88 | .ro_open = physical_op_open, | ||
89 | .ro_maxpages = physical_op_maxpages, | ||
90 | .ro_init = physical_op_init, | ||
91 | .ro_reset = physical_op_reset, | ||
92 | .ro_destroy = physical_op_destroy, | ||
93 | .ro_displayname = "physical", | ||
94 | }; | ||
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 91ffde82fa0c..2c53ea9e1b83 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
@@ -53,6 +53,14 @@ | |||
53 | # define RPCDBG_FACILITY RPCDBG_TRANS | 53 | # define RPCDBG_FACILITY RPCDBG_TRANS |
54 | #endif | 54 | #endif |
55 | 55 | ||
56 | enum rpcrdma_chunktype { | ||
57 | rpcrdma_noch = 0, | ||
58 | rpcrdma_readch, | ||
59 | rpcrdma_areadch, | ||
60 | rpcrdma_writech, | ||
61 | rpcrdma_replych | ||
62 | }; | ||
63 | |||
56 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 64 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
57 | static const char transfertypes[][12] = { | 65 | static const char transfertypes[][12] = { |
58 | "pure inline", /* no chunks */ | 66 | "pure inline", /* no chunks */ |
@@ -179,6 +187,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
179 | struct rpcrdma_write_array *warray = NULL; | 187 | struct rpcrdma_write_array *warray = NULL; |
180 | struct rpcrdma_write_chunk *cur_wchunk = NULL; | 188 | struct rpcrdma_write_chunk *cur_wchunk = NULL; |
181 | __be32 *iptr = headerp->rm_body.rm_chunks; | 189 | __be32 *iptr = headerp->rm_body.rm_chunks; |
190 | int (*map)(struct rpcrdma_xprt *, struct rpcrdma_mr_seg *, int, bool); | ||
182 | 191 | ||
183 | if (type == rpcrdma_readch || type == rpcrdma_areadch) { | 192 | if (type == rpcrdma_readch || type == rpcrdma_areadch) { |
184 | /* a read chunk - server will RDMA Read our memory */ | 193 | /* a read chunk - server will RDMA Read our memory */ |
@@ -201,9 +210,9 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
201 | if (nsegs < 0) | 210 | if (nsegs < 0) |
202 | return nsegs; | 211 | return nsegs; |
203 | 212 | ||
213 | map = r_xprt->rx_ia.ri_ops->ro_map; | ||
204 | do { | 214 | do { |
205 | n = rpcrdma_register_external(seg, nsegs, | 215 | n = map(r_xprt, seg, nsegs, cur_wchunk != NULL); |
206 | cur_wchunk != NULL, r_xprt); | ||
207 | if (n <= 0) | 216 | if (n <= 0) |
208 | goto out; | 217 | goto out; |
209 | if (cur_rchunk) { /* read */ | 218 | if (cur_rchunk) { /* read */ |
@@ -275,34 +284,13 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
275 | return (unsigned char *)iptr - (unsigned char *)headerp; | 284 | return (unsigned char *)iptr - (unsigned char *)headerp; |
276 | 285 | ||
277 | out: | 286 | out: |
278 | if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) { | 287 | if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) |
279 | for (pos = 0; nchunks--;) | 288 | return n; |
280 | pos += rpcrdma_deregister_external( | ||
281 | &req->rl_segments[pos], r_xprt); | ||
282 | } | ||
283 | return n; | ||
284 | } | ||
285 | 289 | ||
286 | /* | 290 | for (pos = 0; nchunks--;) |
287 | * Marshal chunks. This routine returns the header length | 291 | pos += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, |
288 | * consumed by marshaling. | 292 | &req->rl_segments[pos]); |
289 | * | 293 | return n; |
290 | * Returns positive RPC/RDMA header size, or negative errno. | ||
291 | */ | ||
292 | |||
293 | ssize_t | ||
294 | rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result) | ||
295 | { | ||
296 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | ||
297 | struct rpcrdma_msg *headerp = rdmab_to_msg(req->rl_rdmabuf); | ||
298 | |||
299 | if (req->rl_rtype != rpcrdma_noch) | ||
300 | result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf, | ||
301 | headerp, req->rl_rtype); | ||
302 | else if (req->rl_wtype != rpcrdma_noch) | ||
303 | result = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf, | ||
304 | headerp, req->rl_wtype); | ||
305 | return result; | ||
306 | } | 294 | } |
307 | 295 | ||
308 | /* | 296 | /* |
@@ -397,6 +385,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
397 | char *base; | 385 | char *base; |
398 | size_t rpclen, padlen; | 386 | size_t rpclen, padlen; |
399 | ssize_t hdrlen; | 387 | ssize_t hdrlen; |
388 | enum rpcrdma_chunktype rtype, wtype; | ||
400 | struct rpcrdma_msg *headerp; | 389 | struct rpcrdma_msg *headerp; |
401 | 390 | ||
402 | /* | 391 | /* |
@@ -433,13 +422,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
433 | * into pages; otherwise use reply chunks. | 422 | * into pages; otherwise use reply chunks. |
434 | */ | 423 | */ |
435 | if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst)) | 424 | if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst)) |
436 | req->rl_wtype = rpcrdma_noch; | 425 | wtype = rpcrdma_noch; |
437 | else if (rqst->rq_rcv_buf.page_len == 0) | 426 | else if (rqst->rq_rcv_buf.page_len == 0) |
438 | req->rl_wtype = rpcrdma_replych; | 427 | wtype = rpcrdma_replych; |
439 | else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) | 428 | else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) |
440 | req->rl_wtype = rpcrdma_writech; | 429 | wtype = rpcrdma_writech; |
441 | else | 430 | else |
442 | req->rl_wtype = rpcrdma_replych; | 431 | wtype = rpcrdma_replych; |
443 | 432 | ||
444 | /* | 433 | /* |
445 | * Chunks needed for arguments? | 434 | * Chunks needed for arguments? |
@@ -456,16 +445,16 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
456 | * TBD check NFSv4 setacl | 445 | * TBD check NFSv4 setacl |
457 | */ | 446 | */ |
458 | if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) | 447 | if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) |
459 | req->rl_rtype = rpcrdma_noch; | 448 | rtype = rpcrdma_noch; |
460 | else if (rqst->rq_snd_buf.page_len == 0) | 449 | else if (rqst->rq_snd_buf.page_len == 0) |
461 | req->rl_rtype = rpcrdma_areadch; | 450 | rtype = rpcrdma_areadch; |
462 | else | 451 | else |
463 | req->rl_rtype = rpcrdma_readch; | 452 | rtype = rpcrdma_readch; |
464 | 453 | ||
465 | /* The following simplification is not true forever */ | 454 | /* The following simplification is not true forever */ |
466 | if (req->rl_rtype != rpcrdma_noch && req->rl_wtype == rpcrdma_replych) | 455 | if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) |
467 | req->rl_wtype = rpcrdma_noch; | 456 | wtype = rpcrdma_noch; |
468 | if (req->rl_rtype != rpcrdma_noch && req->rl_wtype != rpcrdma_noch) { | 457 | if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) { |
469 | dprintk("RPC: %s: cannot marshal multiple chunk lists\n", | 458 | dprintk("RPC: %s: cannot marshal multiple chunk lists\n", |
470 | __func__); | 459 | __func__); |
471 | return -EIO; | 460 | return -EIO; |
@@ -479,7 +468,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
479 | * When padding is in use and applies to the transfer, insert | 468 | * When padding is in use and applies to the transfer, insert |
480 | * it and change the message type. | 469 | * it and change the message type. |
481 | */ | 470 | */ |
482 | if (req->rl_rtype == rpcrdma_noch) { | 471 | if (rtype == rpcrdma_noch) { |
483 | 472 | ||
484 | padlen = rpcrdma_inline_pullup(rqst, | 473 | padlen = rpcrdma_inline_pullup(rqst, |
485 | RPCRDMA_INLINE_PAD_VALUE(rqst)); | 474 | RPCRDMA_INLINE_PAD_VALUE(rqst)); |
@@ -494,7 +483,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
494 | headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; | 483 | headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; |
495 | headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; | 484 | headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; |
496 | hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ | 485 | hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ |
497 | if (req->rl_wtype != rpcrdma_noch) { | 486 | if (wtype != rpcrdma_noch) { |
498 | dprintk("RPC: %s: invalid chunk list\n", | 487 | dprintk("RPC: %s: invalid chunk list\n", |
499 | __func__); | 488 | __func__); |
500 | return -EIO; | 489 | return -EIO; |
@@ -515,18 +504,26 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
515 | * on receive. Therefore, we request a reply chunk | 504 | * on receive. Therefore, we request a reply chunk |
516 | * for non-writes wherever feasible and efficient. | 505 | * for non-writes wherever feasible and efficient. |
517 | */ | 506 | */ |
518 | if (req->rl_wtype == rpcrdma_noch) | 507 | if (wtype == rpcrdma_noch) |
519 | req->rl_wtype = rpcrdma_replych; | 508 | wtype = rpcrdma_replych; |
520 | } | 509 | } |
521 | } | 510 | } |
522 | 511 | ||
523 | hdrlen = rpcrdma_marshal_chunks(rqst, hdrlen); | 512 | if (rtype != rpcrdma_noch) { |
513 | hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf, | ||
514 | headerp, rtype); | ||
515 | wtype = rtype; /* simplify dprintk */ | ||
516 | |||
517 | } else if (wtype != rpcrdma_noch) { | ||
518 | hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf, | ||
519 | headerp, wtype); | ||
520 | } | ||
524 | if (hdrlen < 0) | 521 | if (hdrlen < 0) |
525 | return hdrlen; | 522 | return hdrlen; |
526 | 523 | ||
527 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" | 524 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" |
528 | " headerp 0x%p base 0x%p lkey 0x%x\n", | 525 | " headerp 0x%p base 0x%p lkey 0x%x\n", |
529 | __func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen, | 526 | __func__, transfertypes[wtype], hdrlen, rpclen, padlen, |
530 | headerp, base, rdmab_lkey(req->rl_rdmabuf)); | 527 | headerp, base, rdmab_lkey(req->rl_rdmabuf)); |
531 | 528 | ||
532 | /* | 529 | /* |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 2e192baa59f3..54f23b1be986 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -157,12 +157,47 @@ static struct ctl_table sunrpc_table[] = { | |||
157 | static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ | 157 | static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ |
158 | 158 | ||
159 | static void | 159 | static void |
160 | xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap) | ||
161 | { | ||
162 | struct sockaddr_in *sin = (struct sockaddr_in *)sap; | ||
163 | char buf[20]; | ||
164 | |||
165 | snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); | ||
166 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); | ||
167 | |||
168 | xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA; | ||
169 | } | ||
170 | |||
171 | static void | ||
172 | xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap) | ||
173 | { | ||
174 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; | ||
175 | char buf[40]; | ||
176 | |||
177 | snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); | ||
178 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); | ||
179 | |||
180 | xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6; | ||
181 | } | ||
182 | |||
183 | static void | ||
160 | xprt_rdma_format_addresses(struct rpc_xprt *xprt) | 184 | xprt_rdma_format_addresses(struct rpc_xprt *xprt) |
161 | { | 185 | { |
162 | struct sockaddr *sap = (struct sockaddr *) | 186 | struct sockaddr *sap = (struct sockaddr *) |
163 | &rpcx_to_rdmad(xprt).addr; | 187 | &rpcx_to_rdmad(xprt).addr; |
164 | struct sockaddr_in *sin = (struct sockaddr_in *)sap; | 188 | char buf[128]; |
165 | char buf[64]; | 189 | |
190 | switch (sap->sa_family) { | ||
191 | case AF_INET: | ||
192 | xprt_rdma_format_addresses4(xprt, sap); | ||
193 | break; | ||
194 | case AF_INET6: | ||
195 | xprt_rdma_format_addresses6(xprt, sap); | ||
196 | break; | ||
197 | default: | ||
198 | pr_err("rpcrdma: Unrecognized address family\n"); | ||
199 | return; | ||
200 | } | ||
166 | 201 | ||
167 | (void)rpc_ntop(sap, buf, sizeof(buf)); | 202 | (void)rpc_ntop(sap, buf, sizeof(buf)); |
168 | xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); | 203 | xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); |
@@ -170,16 +205,10 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt) | |||
170 | snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); | 205 | snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); |
171 | xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); | 206 | xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); |
172 | 207 | ||
173 | xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; | ||
174 | |||
175 | snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); | ||
176 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); | ||
177 | |||
178 | snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); | 208 | snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); |
179 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); | 209 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); |
180 | 210 | ||
181 | /* netid */ | 211 | xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; |
182 | xprt->address_strings[RPC_DISPLAY_NETID] = "rdma"; | ||
183 | } | 212 | } |
184 | 213 | ||
185 | static void | 214 | static void |
@@ -377,7 +406,10 @@ xprt_setup_rdma(struct xprt_create *args) | |||
377 | xprt_rdma_connect_worker); | 406 | xprt_rdma_connect_worker); |
378 | 407 | ||
379 | xprt_rdma_format_addresses(xprt); | 408 | xprt_rdma_format_addresses(xprt); |
380 | xprt->max_payload = rpcrdma_max_payload(new_xprt); | 409 | xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); |
410 | if (xprt->max_payload == 0) | ||
411 | goto out4; | ||
412 | xprt->max_payload <<= PAGE_SHIFT; | ||
381 | dprintk("RPC: %s: transport data payload maximum: %zu bytes\n", | 413 | dprintk("RPC: %s: transport data payload maximum: %zu bytes\n", |
382 | __func__, xprt->max_payload); | 414 | __func__, xprt->max_payload); |
383 | 415 | ||
@@ -552,8 +584,8 @@ xprt_rdma_free(void *buffer) | |||
552 | 584 | ||
553 | for (i = 0; req->rl_nchunks;) { | 585 | for (i = 0; req->rl_nchunks;) { |
554 | --req->rl_nchunks; | 586 | --req->rl_nchunks; |
555 | i += rpcrdma_deregister_external( | 587 | i += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, |
556 | &req->rl_segments[i], r_xprt); | 588 | &req->rl_segments[i]); |
557 | } | 589 | } |
558 | 590 | ||
559 | rpcrdma_buffer_put(req); | 591 | rpcrdma_buffer_put(req); |
@@ -579,10 +611,7 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
579 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 611 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
580 | int rc = 0; | 612 | int rc = 0; |
581 | 613 | ||
582 | if (req->rl_niovs == 0) | 614 | rc = rpcrdma_marshal_req(rqst); |
583 | rc = rpcrdma_marshal_req(rqst); | ||
584 | else if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_ALLPHYSICAL) | ||
585 | rc = rpcrdma_marshal_chunks(rqst, 0); | ||
586 | if (rc < 0) | 615 | if (rc < 0) |
587 | goto failed_marshal; | 616 | goto failed_marshal; |
588 | 617 | ||
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 124676c13780..4870d272e006 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <linux/interrupt.h> | 50 | #include <linux/interrupt.h> |
51 | #include <linux/slab.h> | 51 | #include <linux/slab.h> |
52 | #include <linux/prefetch.h> | 52 | #include <linux/prefetch.h> |
53 | #include <linux/sunrpc/addr.h> | ||
53 | #include <asm/bitops.h> | 54 | #include <asm/bitops.h> |
54 | 55 | ||
55 | #include "xprt_rdma.h" | 56 | #include "xprt_rdma.h" |
@@ -62,9 +63,6 @@ | |||
62 | # define RPCDBG_FACILITY RPCDBG_TRANS | 63 | # define RPCDBG_FACILITY RPCDBG_TRANS |
63 | #endif | 64 | #endif |
64 | 65 | ||
65 | static void rpcrdma_reset_frmrs(struct rpcrdma_ia *); | ||
66 | static void rpcrdma_reset_fmrs(struct rpcrdma_ia *); | ||
67 | |||
68 | /* | 66 | /* |
69 | * internal functions | 67 | * internal functions |
70 | */ | 68 | */ |
@@ -188,7 +186,7 @@ static const char * const wc_status[] = { | |||
188 | "remote access error", | 186 | "remote access error", |
189 | "remote operation error", | 187 | "remote operation error", |
190 | "transport retry counter exceeded", | 188 | "transport retry counter exceeded", |
191 | "RNR retrycounter exceeded", | 189 | "RNR retry counter exceeded", |
192 | "local RDD violation error", | 190 | "local RDD violation error", |
193 | "remove invalid RD request", | 191 | "remove invalid RD request", |
194 | "operation aborted", | 192 | "operation aborted", |
@@ -206,21 +204,17 @@ static const char * const wc_status[] = { | |||
206 | static void | 204 | static void |
207 | rpcrdma_sendcq_process_wc(struct ib_wc *wc) | 205 | rpcrdma_sendcq_process_wc(struct ib_wc *wc) |
208 | { | 206 | { |
209 | if (likely(wc->status == IB_WC_SUCCESS)) | ||
210 | return; | ||
211 | |||
212 | /* WARNING: Only wr_id and status are reliable at this point */ | 207 | /* WARNING: Only wr_id and status are reliable at this point */ |
213 | if (wc->wr_id == 0ULL) { | 208 | if (wc->wr_id == RPCRDMA_IGNORE_COMPLETION) { |
214 | if (wc->status != IB_WC_WR_FLUSH_ERR) | 209 | if (wc->status != IB_WC_SUCCESS && |
210 | wc->status != IB_WC_WR_FLUSH_ERR) | ||
215 | pr_err("RPC: %s: SEND: %s\n", | 211 | pr_err("RPC: %s: SEND: %s\n", |
216 | __func__, COMPLETION_MSG(wc->status)); | 212 | __func__, COMPLETION_MSG(wc->status)); |
217 | } else { | 213 | } else { |
218 | struct rpcrdma_mw *r; | 214 | struct rpcrdma_mw *r; |
219 | 215 | ||
220 | r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; | 216 | r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; |
221 | r->r.frmr.fr_state = FRMR_IS_STALE; | 217 | r->mw_sendcompletion(wc); |
222 | pr_err("RPC: %s: frmr %p (stale): %s\n", | ||
223 | __func__, r, COMPLETION_MSG(wc->status)); | ||
224 | } | 218 | } |
225 | } | 219 | } |
226 | 220 | ||
@@ -424,7 +418,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) | |||
424 | struct rpcrdma_ia *ia = &xprt->rx_ia; | 418 | struct rpcrdma_ia *ia = &xprt->rx_ia; |
425 | struct rpcrdma_ep *ep = &xprt->rx_ep; | 419 | struct rpcrdma_ep *ep = &xprt->rx_ep; |
426 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 420 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
427 | struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; | 421 | struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr; |
428 | #endif | 422 | #endif |
429 | struct ib_qp_attr *attr = &ia->ri_qp_attr; | 423 | struct ib_qp_attr *attr = &ia->ri_qp_attr; |
430 | struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr; | 424 | struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr; |
@@ -480,9 +474,8 @@ connected: | |||
480 | wake_up_all(&ep->rep_connect_wait); | 474 | wake_up_all(&ep->rep_connect_wait); |
481 | /*FALLTHROUGH*/ | 475 | /*FALLTHROUGH*/ |
482 | default: | 476 | default: |
483 | dprintk("RPC: %s: %pI4:%u (ep 0x%p): %s\n", | 477 | dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n", |
484 | __func__, &addr->sin_addr.s_addr, | 478 | __func__, sap, rpc_get_port(sap), ep, |
485 | ntohs(addr->sin_port), ep, | ||
486 | CONNECTION_MSG(event->event)); | 479 | CONNECTION_MSG(event->event)); |
487 | break; | 480 | break; |
488 | } | 481 | } |
@@ -491,19 +484,16 @@ connected: | |||
491 | if (connstate == 1) { | 484 | if (connstate == 1) { |
492 | int ird = attr->max_dest_rd_atomic; | 485 | int ird = attr->max_dest_rd_atomic; |
493 | int tird = ep->rep_remote_cma.responder_resources; | 486 | int tird = ep->rep_remote_cma.responder_resources; |
494 | printk(KERN_INFO "rpcrdma: connection to %pI4:%u " | 487 | |
495 | "on %s, memreg %d slots %d ird %d%s\n", | 488 | pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n", |
496 | &addr->sin_addr.s_addr, | 489 | sap, rpc_get_port(sap), |
497 | ntohs(addr->sin_port), | ||
498 | ia->ri_id->device->name, | 490 | ia->ri_id->device->name, |
499 | ia->ri_memreg_strategy, | 491 | ia->ri_ops->ro_displayname, |
500 | xprt->rx_buf.rb_max_requests, | 492 | xprt->rx_buf.rb_max_requests, |
501 | ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); | 493 | ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); |
502 | } else if (connstate < 0) { | 494 | } else if (connstate < 0) { |
503 | printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n", | 495 | pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n", |
504 | &addr->sin_addr.s_addr, | 496 | sap, rpc_get_port(sap), connstate); |
505 | ntohs(addr->sin_port), | ||
506 | connstate); | ||
507 | } | 497 | } |
508 | #endif | 498 | #endif |
509 | 499 | ||
@@ -621,17 +611,13 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
621 | 611 | ||
622 | if (memreg == RPCRDMA_FRMR) { | 612 | if (memreg == RPCRDMA_FRMR) { |
623 | /* Requires both frmr reg and local dma lkey */ | 613 | /* Requires both frmr reg and local dma lkey */ |
624 | if ((devattr->device_cap_flags & | 614 | if (((devattr->device_cap_flags & |
625 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != | 615 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != |
626 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { | 616 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) || |
617 | (devattr->max_fast_reg_page_list_len == 0)) { | ||
627 | dprintk("RPC: %s: FRMR registration " | 618 | dprintk("RPC: %s: FRMR registration " |
628 | "not supported by HCA\n", __func__); | 619 | "not supported by HCA\n", __func__); |
629 | memreg = RPCRDMA_MTHCAFMR; | 620 | memreg = RPCRDMA_MTHCAFMR; |
630 | } else { | ||
631 | /* Mind the ia limit on FRMR page list depth */ | ||
632 | ia->ri_max_frmr_depth = min_t(unsigned int, | ||
633 | RPCRDMA_MAX_DATA_SEGS, | ||
634 | devattr->max_fast_reg_page_list_len); | ||
635 | } | 621 | } |
636 | } | 622 | } |
637 | if (memreg == RPCRDMA_MTHCAFMR) { | 623 | if (memreg == RPCRDMA_MTHCAFMR) { |
@@ -652,13 +638,16 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
652 | */ | 638 | */ |
653 | switch (memreg) { | 639 | switch (memreg) { |
654 | case RPCRDMA_FRMR: | 640 | case RPCRDMA_FRMR: |
641 | ia->ri_ops = &rpcrdma_frwr_memreg_ops; | ||
655 | break; | 642 | break; |
656 | case RPCRDMA_ALLPHYSICAL: | 643 | case RPCRDMA_ALLPHYSICAL: |
644 | ia->ri_ops = &rpcrdma_physical_memreg_ops; | ||
657 | mem_priv = IB_ACCESS_LOCAL_WRITE | | 645 | mem_priv = IB_ACCESS_LOCAL_WRITE | |
658 | IB_ACCESS_REMOTE_WRITE | | 646 | IB_ACCESS_REMOTE_WRITE | |
659 | IB_ACCESS_REMOTE_READ; | 647 | IB_ACCESS_REMOTE_READ; |
660 | goto register_setup; | 648 | goto register_setup; |
661 | case RPCRDMA_MTHCAFMR: | 649 | case RPCRDMA_MTHCAFMR: |
650 | ia->ri_ops = &rpcrdma_fmr_memreg_ops; | ||
662 | if (ia->ri_have_dma_lkey) | 651 | if (ia->ri_have_dma_lkey) |
663 | break; | 652 | break; |
664 | mem_priv = IB_ACCESS_LOCAL_WRITE; | 653 | mem_priv = IB_ACCESS_LOCAL_WRITE; |
@@ -678,8 +667,8 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
678 | rc = -ENOMEM; | 667 | rc = -ENOMEM; |
679 | goto out3; | 668 | goto out3; |
680 | } | 669 | } |
681 | dprintk("RPC: %s: memory registration strategy is %d\n", | 670 | dprintk("RPC: %s: memory registration strategy is '%s'\n", |
682 | __func__, memreg); | 671 | __func__, ia->ri_ops->ro_displayname); |
683 | 672 | ||
684 | /* Else will do memory reg/dereg for each chunk */ | 673 | /* Else will do memory reg/dereg for each chunk */ |
685 | ia->ri_memreg_strategy = memreg; | 674 | ia->ri_memreg_strategy = memreg; |
@@ -743,49 +732,11 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
743 | 732 | ||
744 | ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; | 733 | ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; |
745 | ep->rep_attr.qp_context = ep; | 734 | ep->rep_attr.qp_context = ep; |
746 | /* send_cq and recv_cq initialized below */ | ||
747 | ep->rep_attr.srq = NULL; | 735 | ep->rep_attr.srq = NULL; |
748 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; | 736 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; |
749 | switch (ia->ri_memreg_strategy) { | 737 | rc = ia->ri_ops->ro_open(ia, ep, cdata); |
750 | case RPCRDMA_FRMR: { | 738 | if (rc) |
751 | int depth = 7; | 739 | return rc; |
752 | |||
753 | /* Add room for frmr register and invalidate WRs. | ||
754 | * 1. FRMR reg WR for head | ||
755 | * 2. FRMR invalidate WR for head | ||
756 | * 3. N FRMR reg WRs for pagelist | ||
757 | * 4. N FRMR invalidate WRs for pagelist | ||
758 | * 5. FRMR reg WR for tail | ||
759 | * 6. FRMR invalidate WR for tail | ||
760 | * 7. The RDMA_SEND WR | ||
761 | */ | ||
762 | |||
763 | /* Calculate N if the device max FRMR depth is smaller than | ||
764 | * RPCRDMA_MAX_DATA_SEGS. | ||
765 | */ | ||
766 | if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { | ||
767 | int delta = RPCRDMA_MAX_DATA_SEGS - | ||
768 | ia->ri_max_frmr_depth; | ||
769 | |||
770 | do { | ||
771 | depth += 2; /* FRMR reg + invalidate */ | ||
772 | delta -= ia->ri_max_frmr_depth; | ||
773 | } while (delta > 0); | ||
774 | |||
775 | } | ||
776 | ep->rep_attr.cap.max_send_wr *= depth; | ||
777 | if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) { | ||
778 | cdata->max_requests = devattr->max_qp_wr / depth; | ||
779 | if (!cdata->max_requests) | ||
780 | return -EINVAL; | ||
781 | ep->rep_attr.cap.max_send_wr = cdata->max_requests * | ||
782 | depth; | ||
783 | } | ||
784 | break; | ||
785 | } | ||
786 | default: | ||
787 | break; | ||
788 | } | ||
789 | ep->rep_attr.cap.max_recv_wr = cdata->max_requests; | 740 | ep->rep_attr.cap.max_recv_wr = cdata->max_requests; |
790 | ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2); | 741 | ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2); |
791 | ep->rep_attr.cap.max_recv_sge = 1; | 742 | ep->rep_attr.cap.max_recv_sge = 1; |
@@ -944,21 +895,9 @@ retry: | |||
944 | rpcrdma_ep_disconnect(ep, ia); | 895 | rpcrdma_ep_disconnect(ep, ia); |
945 | rpcrdma_flush_cqs(ep); | 896 | rpcrdma_flush_cqs(ep); |
946 | 897 | ||
947 | switch (ia->ri_memreg_strategy) { | ||
948 | case RPCRDMA_FRMR: | ||
949 | rpcrdma_reset_frmrs(ia); | ||
950 | break; | ||
951 | case RPCRDMA_MTHCAFMR: | ||
952 | rpcrdma_reset_fmrs(ia); | ||
953 | break; | ||
954 | case RPCRDMA_ALLPHYSICAL: | ||
955 | break; | ||
956 | default: | ||
957 | rc = -EIO; | ||
958 | goto out; | ||
959 | } | ||
960 | |||
961 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); | 898 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); |
899 | ia->ri_ops->ro_reset(xprt); | ||
900 | |||
962 | id = rpcrdma_create_id(xprt, ia, | 901 | id = rpcrdma_create_id(xprt, ia, |
963 | (struct sockaddr *)&xprt->rx_data.addr); | 902 | (struct sockaddr *)&xprt->rx_data.addr); |
964 | if (IS_ERR(id)) { | 903 | if (IS_ERR(id)) { |
@@ -1123,91 +1062,6 @@ out: | |||
1123 | return ERR_PTR(rc); | 1062 | return ERR_PTR(rc); |
1124 | } | 1063 | } |
1125 | 1064 | ||
1126 | static int | ||
1127 | rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) | ||
1128 | { | ||
1129 | int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; | ||
1130 | struct ib_fmr_attr fmr_attr = { | ||
1131 | .max_pages = RPCRDMA_MAX_DATA_SEGS, | ||
1132 | .max_maps = 1, | ||
1133 | .page_shift = PAGE_SHIFT | ||
1134 | }; | ||
1135 | struct rpcrdma_mw *r; | ||
1136 | int i, rc; | ||
1137 | |||
1138 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
1139 | dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i); | ||
1140 | |||
1141 | while (i--) { | ||
1142 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
1143 | if (r == NULL) | ||
1144 | return -ENOMEM; | ||
1145 | |||
1146 | r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr); | ||
1147 | if (IS_ERR(r->r.fmr)) { | ||
1148 | rc = PTR_ERR(r->r.fmr); | ||
1149 | dprintk("RPC: %s: ib_alloc_fmr failed %i\n", | ||
1150 | __func__, rc); | ||
1151 | goto out_free; | ||
1152 | } | ||
1153 | |||
1154 | list_add(&r->mw_list, &buf->rb_mws); | ||
1155 | list_add(&r->mw_all, &buf->rb_all); | ||
1156 | } | ||
1157 | return 0; | ||
1158 | |||
1159 | out_free: | ||
1160 | kfree(r); | ||
1161 | return rc; | ||
1162 | } | ||
1163 | |||
1164 | static int | ||
1165 | rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) | ||
1166 | { | ||
1167 | struct rpcrdma_frmr *f; | ||
1168 | struct rpcrdma_mw *r; | ||
1169 | int i, rc; | ||
1170 | |||
1171 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
1172 | dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i); | ||
1173 | |||
1174 | while (i--) { | ||
1175 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
1176 | if (r == NULL) | ||
1177 | return -ENOMEM; | ||
1178 | f = &r->r.frmr; | ||
1179 | |||
1180 | f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, | ||
1181 | ia->ri_max_frmr_depth); | ||
1182 | if (IS_ERR(f->fr_mr)) { | ||
1183 | rc = PTR_ERR(f->fr_mr); | ||
1184 | dprintk("RPC: %s: ib_alloc_fast_reg_mr " | ||
1185 | "failed %i\n", __func__, rc); | ||
1186 | goto out_free; | ||
1187 | } | ||
1188 | |||
1189 | f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device, | ||
1190 | ia->ri_max_frmr_depth); | ||
1191 | if (IS_ERR(f->fr_pgl)) { | ||
1192 | rc = PTR_ERR(f->fr_pgl); | ||
1193 | dprintk("RPC: %s: ib_alloc_fast_reg_page_list " | ||
1194 | "failed %i\n", __func__, rc); | ||
1195 | |||
1196 | ib_dereg_mr(f->fr_mr); | ||
1197 | goto out_free; | ||
1198 | } | ||
1199 | |||
1200 | list_add(&r->mw_list, &buf->rb_mws); | ||
1201 | list_add(&r->mw_all, &buf->rb_all); | ||
1202 | } | ||
1203 | |||
1204 | return 0; | ||
1205 | |||
1206 | out_free: | ||
1207 | kfree(r); | ||
1208 | return rc; | ||
1209 | } | ||
1210 | |||
1211 | int | 1065 | int |
1212 | rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | 1066 | rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) |
1213 | { | 1067 | { |
@@ -1244,22 +1098,9 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | |||
1244 | buf->rb_recv_bufs = (struct rpcrdma_rep **) p; | 1098 | buf->rb_recv_bufs = (struct rpcrdma_rep **) p; |
1245 | p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; | 1099 | p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; |
1246 | 1100 | ||
1247 | INIT_LIST_HEAD(&buf->rb_mws); | 1101 | rc = ia->ri_ops->ro_init(r_xprt); |
1248 | INIT_LIST_HEAD(&buf->rb_all); | 1102 | if (rc) |
1249 | switch (ia->ri_memreg_strategy) { | 1103 | goto out; |
1250 | case RPCRDMA_FRMR: | ||
1251 | rc = rpcrdma_init_frmrs(ia, buf); | ||
1252 | if (rc) | ||
1253 | goto out; | ||
1254 | break; | ||
1255 | case RPCRDMA_MTHCAFMR: | ||
1256 | rc = rpcrdma_init_fmrs(ia, buf); | ||
1257 | if (rc) | ||
1258 | goto out; | ||
1259 | break; | ||
1260 | default: | ||
1261 | break; | ||
1262 | } | ||
1263 | 1104 | ||
1264 | for (i = 0; i < buf->rb_max_requests; i++) { | 1105 | for (i = 0; i < buf->rb_max_requests; i++) { |
1265 | struct rpcrdma_req *req; | 1106 | struct rpcrdma_req *req; |
@@ -1311,47 +1152,6 @@ rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req) | |||
1311 | kfree(req); | 1152 | kfree(req); |
1312 | } | 1153 | } |
1313 | 1154 | ||
1314 | static void | ||
1315 | rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf) | ||
1316 | { | ||
1317 | struct rpcrdma_mw *r; | ||
1318 | int rc; | ||
1319 | |||
1320 | while (!list_empty(&buf->rb_all)) { | ||
1321 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
1322 | list_del(&r->mw_all); | ||
1323 | list_del(&r->mw_list); | ||
1324 | |||
1325 | rc = ib_dealloc_fmr(r->r.fmr); | ||
1326 | if (rc) | ||
1327 | dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", | ||
1328 | __func__, rc); | ||
1329 | |||
1330 | kfree(r); | ||
1331 | } | ||
1332 | } | ||
1333 | |||
1334 | static void | ||
1335 | rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf) | ||
1336 | { | ||
1337 | struct rpcrdma_mw *r; | ||
1338 | int rc; | ||
1339 | |||
1340 | while (!list_empty(&buf->rb_all)) { | ||
1341 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
1342 | list_del(&r->mw_all); | ||
1343 | list_del(&r->mw_list); | ||
1344 | |||
1345 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
1346 | if (rc) | ||
1347 | dprintk("RPC: %s: ib_dereg_mr failed %i\n", | ||
1348 | __func__, rc); | ||
1349 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
1350 | |||
1351 | kfree(r); | ||
1352 | } | ||
1353 | } | ||
1354 | |||
1355 | void | 1155 | void |
1356 | rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | 1156 | rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) |
1357 | { | 1157 | { |
@@ -1372,104 +1172,11 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
1372 | rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]); | 1172 | rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]); |
1373 | } | 1173 | } |
1374 | 1174 | ||
1375 | switch (ia->ri_memreg_strategy) { | 1175 | ia->ri_ops->ro_destroy(buf); |
1376 | case RPCRDMA_FRMR: | ||
1377 | rpcrdma_destroy_frmrs(buf); | ||
1378 | break; | ||
1379 | case RPCRDMA_MTHCAFMR: | ||
1380 | rpcrdma_destroy_fmrs(buf); | ||
1381 | break; | ||
1382 | default: | ||
1383 | break; | ||
1384 | } | ||
1385 | 1176 | ||
1386 | kfree(buf->rb_pool); | 1177 | kfree(buf->rb_pool); |
1387 | } | 1178 | } |
1388 | 1179 | ||
1389 | /* After a disconnect, unmap all FMRs. | ||
1390 | * | ||
1391 | * This is invoked only in the transport connect worker in order | ||
1392 | * to serialize with rpcrdma_register_fmr_external(). | ||
1393 | */ | ||
1394 | static void | ||
1395 | rpcrdma_reset_fmrs(struct rpcrdma_ia *ia) | ||
1396 | { | ||
1397 | struct rpcrdma_xprt *r_xprt = | ||
1398 | container_of(ia, struct rpcrdma_xprt, rx_ia); | ||
1399 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
1400 | struct list_head *pos; | ||
1401 | struct rpcrdma_mw *r; | ||
1402 | LIST_HEAD(l); | ||
1403 | int rc; | ||
1404 | |||
1405 | list_for_each(pos, &buf->rb_all) { | ||
1406 | r = list_entry(pos, struct rpcrdma_mw, mw_all); | ||
1407 | |||
1408 | INIT_LIST_HEAD(&l); | ||
1409 | list_add(&r->r.fmr->list, &l); | ||
1410 | rc = ib_unmap_fmr(&l); | ||
1411 | if (rc) | ||
1412 | dprintk("RPC: %s: ib_unmap_fmr failed %i\n", | ||
1413 | __func__, rc); | ||
1414 | } | ||
1415 | } | ||
1416 | |||
1417 | /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in | ||
1418 | * an unusable state. Find FRMRs in this state and dereg / reg | ||
1419 | * each. FRMRs that are VALID and attached to an rpcrdma_req are | ||
1420 | * also torn down. | ||
1421 | * | ||
1422 | * This gives all in-use FRMRs a fresh rkey and leaves them INVALID. | ||
1423 | * | ||
1424 | * This is invoked only in the transport connect worker in order | ||
1425 | * to serialize with rpcrdma_register_frmr_external(). | ||
1426 | */ | ||
1427 | static void | ||
1428 | rpcrdma_reset_frmrs(struct rpcrdma_ia *ia) | ||
1429 | { | ||
1430 | struct rpcrdma_xprt *r_xprt = | ||
1431 | container_of(ia, struct rpcrdma_xprt, rx_ia); | ||
1432 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
1433 | struct list_head *pos; | ||
1434 | struct rpcrdma_mw *r; | ||
1435 | int rc; | ||
1436 | |||
1437 | list_for_each(pos, &buf->rb_all) { | ||
1438 | r = list_entry(pos, struct rpcrdma_mw, mw_all); | ||
1439 | |||
1440 | if (r->r.frmr.fr_state == FRMR_IS_INVALID) | ||
1441 | continue; | ||
1442 | |||
1443 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
1444 | if (rc) | ||
1445 | dprintk("RPC: %s: ib_dereg_mr failed %i\n", | ||
1446 | __func__, rc); | ||
1447 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
1448 | |||
1449 | r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, | ||
1450 | ia->ri_max_frmr_depth); | ||
1451 | if (IS_ERR(r->r.frmr.fr_mr)) { | ||
1452 | rc = PTR_ERR(r->r.frmr.fr_mr); | ||
1453 | dprintk("RPC: %s: ib_alloc_fast_reg_mr" | ||
1454 | " failed %i\n", __func__, rc); | ||
1455 | continue; | ||
1456 | } | ||
1457 | r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list( | ||
1458 | ia->ri_id->device, | ||
1459 | ia->ri_max_frmr_depth); | ||
1460 | if (IS_ERR(r->r.frmr.fr_pgl)) { | ||
1461 | rc = PTR_ERR(r->r.frmr.fr_pgl); | ||
1462 | dprintk("RPC: %s: " | ||
1463 | "ib_alloc_fast_reg_page_list " | ||
1464 | "failed %i\n", __func__, rc); | ||
1465 | |||
1466 | ib_dereg_mr(r->r.frmr.fr_mr); | ||
1467 | continue; | ||
1468 | } | ||
1469 | r->r.frmr.fr_state = FRMR_IS_INVALID; | ||
1470 | } | ||
1471 | } | ||
1472 | |||
1473 | /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving | 1180 | /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving |
1474 | * some req segments uninitialized. | 1181 | * some req segments uninitialized. |
1475 | */ | 1182 | */ |
@@ -1509,7 +1216,7 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) | |||
1509 | } | 1216 | } |
1510 | } | 1217 | } |
1511 | 1218 | ||
1512 | /* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external(). | 1219 | /* rpcrdma_unmap_one() was already done during deregistration. |
1513 | * Redo only the ib_post_send(). | 1220 | * Redo only the ib_post_send(). |
1514 | */ | 1221 | */ |
1515 | static void | 1222 | static void |
@@ -1729,6 +1436,14 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) | |||
1729 | * Wrappers for internal-use kmalloc memory registration, used by buffer code. | 1436 | * Wrappers for internal-use kmalloc memory registration, used by buffer code. |
1730 | */ | 1437 | */ |
1731 | 1438 | ||
1439 | void | ||
1440 | rpcrdma_mapping_error(struct rpcrdma_mr_seg *seg) | ||
1441 | { | ||
1442 | dprintk("RPC: map_one: offset %p iova %llx len %zu\n", | ||
1443 | seg->mr_offset, | ||
1444 | (unsigned long long)seg->mr_dma, seg->mr_dmalen); | ||
1445 | } | ||
1446 | |||
1732 | static int | 1447 | static int |
1733 | rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, | 1448 | rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, |
1734 | struct ib_mr **mrp, struct ib_sge *iov) | 1449 | struct ib_mr **mrp, struct ib_sge *iov) |
@@ -1854,287 +1569,6 @@ rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) | |||
1854 | } | 1569 | } |
1855 | 1570 | ||
1856 | /* | 1571 | /* |
1857 | * Wrappers for chunk registration, shared by read/write chunk code. | ||
1858 | */ | ||
1859 | |||
1860 | static void | ||
1861 | rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing) | ||
1862 | { | ||
1863 | seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; | ||
1864 | seg->mr_dmalen = seg->mr_len; | ||
1865 | if (seg->mr_page) | ||
1866 | seg->mr_dma = ib_dma_map_page(ia->ri_id->device, | ||
1867 | seg->mr_page, offset_in_page(seg->mr_offset), | ||
1868 | seg->mr_dmalen, seg->mr_dir); | ||
1869 | else | ||
1870 | seg->mr_dma = ib_dma_map_single(ia->ri_id->device, | ||
1871 | seg->mr_offset, | ||
1872 | seg->mr_dmalen, seg->mr_dir); | ||
1873 | if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) { | ||
1874 | dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n", | ||
1875 | __func__, | ||
1876 | (unsigned long long)seg->mr_dma, | ||
1877 | seg->mr_offset, seg->mr_dmalen); | ||
1878 | } | ||
1879 | } | ||
1880 | |||
1881 | static void | ||
1882 | rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg) | ||
1883 | { | ||
1884 | if (seg->mr_page) | ||
1885 | ib_dma_unmap_page(ia->ri_id->device, | ||
1886 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
1887 | else | ||
1888 | ib_dma_unmap_single(ia->ri_id->device, | ||
1889 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
1890 | } | ||
1891 | |||
1892 | static int | ||
1893 | rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, | ||
1894 | int *nsegs, int writing, struct rpcrdma_ia *ia, | ||
1895 | struct rpcrdma_xprt *r_xprt) | ||
1896 | { | ||
1897 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1898 | struct rpcrdma_mw *mw = seg1->rl_mw; | ||
1899 | struct rpcrdma_frmr *frmr = &mw->r.frmr; | ||
1900 | struct ib_mr *mr = frmr->fr_mr; | ||
1901 | struct ib_send_wr fastreg_wr, *bad_wr; | ||
1902 | u8 key; | ||
1903 | int len, pageoff; | ||
1904 | int i, rc; | ||
1905 | int seg_len; | ||
1906 | u64 pa; | ||
1907 | int page_no; | ||
1908 | |||
1909 | pageoff = offset_in_page(seg1->mr_offset); | ||
1910 | seg1->mr_offset -= pageoff; /* start of page */ | ||
1911 | seg1->mr_len += pageoff; | ||
1912 | len = -pageoff; | ||
1913 | if (*nsegs > ia->ri_max_frmr_depth) | ||
1914 | *nsegs = ia->ri_max_frmr_depth; | ||
1915 | for (page_no = i = 0; i < *nsegs;) { | ||
1916 | rpcrdma_map_one(ia, seg, writing); | ||
1917 | pa = seg->mr_dma; | ||
1918 | for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { | ||
1919 | frmr->fr_pgl->page_list[page_no++] = pa; | ||
1920 | pa += PAGE_SIZE; | ||
1921 | } | ||
1922 | len += seg->mr_len; | ||
1923 | ++seg; | ||
1924 | ++i; | ||
1925 | /* Check for holes */ | ||
1926 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
1927 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
1928 | break; | ||
1929 | } | ||
1930 | dprintk("RPC: %s: Using frmr %p to map %d segments\n", | ||
1931 | __func__, mw, i); | ||
1932 | |||
1933 | frmr->fr_state = FRMR_IS_VALID; | ||
1934 | |||
1935 | memset(&fastreg_wr, 0, sizeof(fastreg_wr)); | ||
1936 | fastreg_wr.wr_id = (unsigned long)(void *)mw; | ||
1937 | fastreg_wr.opcode = IB_WR_FAST_REG_MR; | ||
1938 | fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma; | ||
1939 | fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl; | ||
1940 | fastreg_wr.wr.fast_reg.page_list_len = page_no; | ||
1941 | fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | ||
1942 | fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT; | ||
1943 | if (fastreg_wr.wr.fast_reg.length < len) { | ||
1944 | rc = -EIO; | ||
1945 | goto out_err; | ||
1946 | } | ||
1947 | |||
1948 | /* Bump the key */ | ||
1949 | key = (u8)(mr->rkey & 0x000000FF); | ||
1950 | ib_update_fast_reg_key(mr, ++key); | ||
1951 | |||
1952 | fastreg_wr.wr.fast_reg.access_flags = (writing ? | ||
1953 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | ||
1954 | IB_ACCESS_REMOTE_READ); | ||
1955 | fastreg_wr.wr.fast_reg.rkey = mr->rkey; | ||
1956 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1957 | |||
1958 | rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr); | ||
1959 | if (rc) { | ||
1960 | dprintk("RPC: %s: failed ib_post_send for register," | ||
1961 | " status %i\n", __func__, rc); | ||
1962 | ib_update_fast_reg_key(mr, --key); | ||
1963 | goto out_err; | ||
1964 | } else { | ||
1965 | seg1->mr_rkey = mr->rkey; | ||
1966 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
1967 | seg1->mr_nsegs = i; | ||
1968 | seg1->mr_len = len; | ||
1969 | } | ||
1970 | *nsegs = i; | ||
1971 | return 0; | ||
1972 | out_err: | ||
1973 | frmr->fr_state = FRMR_IS_INVALID; | ||
1974 | while (i--) | ||
1975 | rpcrdma_unmap_one(ia, --seg); | ||
1976 | return rc; | ||
1977 | } | ||
1978 | |||
1979 | static int | ||
1980 | rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, | ||
1981 | struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt) | ||
1982 | { | ||
1983 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1984 | struct ib_send_wr invalidate_wr, *bad_wr; | ||
1985 | int rc; | ||
1986 | |||
1987 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; | ||
1988 | |||
1989 | memset(&invalidate_wr, 0, sizeof invalidate_wr); | ||
1990 | invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw; | ||
1991 | invalidate_wr.opcode = IB_WR_LOCAL_INV; | ||
1992 | invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey; | ||
1993 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1994 | |||
1995 | read_lock(&ia->ri_qplock); | ||
1996 | while (seg1->mr_nsegs--) | ||
1997 | rpcrdma_unmap_one(ia, seg++); | ||
1998 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); | ||
1999 | read_unlock(&ia->ri_qplock); | ||
2000 | if (rc) { | ||
2001 | /* Force rpcrdma_buffer_get() to retry */ | ||
2002 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE; | ||
2003 | dprintk("RPC: %s: failed ib_post_send for invalidate," | ||
2004 | " status %i\n", __func__, rc); | ||
2005 | } | ||
2006 | return rc; | ||
2007 | } | ||
2008 | |||
2009 | static int | ||
2010 | rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, | ||
2011 | int *nsegs, int writing, struct rpcrdma_ia *ia) | ||
2012 | { | ||
2013 | struct rpcrdma_mr_seg *seg1 = seg; | ||
2014 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
2015 | int len, pageoff, i, rc; | ||
2016 | |||
2017 | pageoff = offset_in_page(seg1->mr_offset); | ||
2018 | seg1->mr_offset -= pageoff; /* start of page */ | ||
2019 | seg1->mr_len += pageoff; | ||
2020 | len = -pageoff; | ||
2021 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
2022 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
2023 | for (i = 0; i < *nsegs;) { | ||
2024 | rpcrdma_map_one(ia, seg, writing); | ||
2025 | physaddrs[i] = seg->mr_dma; | ||
2026 | len += seg->mr_len; | ||
2027 | ++seg; | ||
2028 | ++i; | ||
2029 | /* Check for holes */ | ||
2030 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
2031 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
2032 | break; | ||
2033 | } | ||
2034 | rc = ib_map_phys_fmr(seg1->rl_mw->r.fmr, physaddrs, i, seg1->mr_dma); | ||
2035 | if (rc) { | ||
2036 | dprintk("RPC: %s: failed ib_map_phys_fmr " | ||
2037 | "%u@0x%llx+%i (%d)... status %i\n", __func__, | ||
2038 | len, (unsigned long long)seg1->mr_dma, | ||
2039 | pageoff, i, rc); | ||
2040 | while (i--) | ||
2041 | rpcrdma_unmap_one(ia, --seg); | ||
2042 | } else { | ||
2043 | seg1->mr_rkey = seg1->rl_mw->r.fmr->rkey; | ||
2044 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
2045 | seg1->mr_nsegs = i; | ||
2046 | seg1->mr_len = len; | ||
2047 | } | ||
2048 | *nsegs = i; | ||
2049 | return rc; | ||
2050 | } | ||
2051 | |||
2052 | static int | ||
2053 | rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg, | ||
2054 | struct rpcrdma_ia *ia) | ||
2055 | { | ||
2056 | struct rpcrdma_mr_seg *seg1 = seg; | ||
2057 | LIST_HEAD(l); | ||
2058 | int rc; | ||
2059 | |||
2060 | list_add(&seg1->rl_mw->r.fmr->list, &l); | ||
2061 | rc = ib_unmap_fmr(&l); | ||
2062 | read_lock(&ia->ri_qplock); | ||
2063 | while (seg1->mr_nsegs--) | ||
2064 | rpcrdma_unmap_one(ia, seg++); | ||
2065 | read_unlock(&ia->ri_qplock); | ||
2066 | if (rc) | ||
2067 | dprintk("RPC: %s: failed ib_unmap_fmr," | ||
2068 | " status %i\n", __func__, rc); | ||
2069 | return rc; | ||
2070 | } | ||
2071 | |||
2072 | int | ||
2073 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | ||
2074 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) | ||
2075 | { | ||
2076 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
2077 | int rc = 0; | ||
2078 | |||
2079 | switch (ia->ri_memreg_strategy) { | ||
2080 | |||
2081 | case RPCRDMA_ALLPHYSICAL: | ||
2082 | rpcrdma_map_one(ia, seg, writing); | ||
2083 | seg->mr_rkey = ia->ri_bind_mem->rkey; | ||
2084 | seg->mr_base = seg->mr_dma; | ||
2085 | seg->mr_nsegs = 1; | ||
2086 | nsegs = 1; | ||
2087 | break; | ||
2088 | |||
2089 | /* Registration using frmr registration */ | ||
2090 | case RPCRDMA_FRMR: | ||
2091 | rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt); | ||
2092 | break; | ||
2093 | |||
2094 | /* Registration using fmr memory registration */ | ||
2095 | case RPCRDMA_MTHCAFMR: | ||
2096 | rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); | ||
2097 | break; | ||
2098 | |||
2099 | default: | ||
2100 | return -EIO; | ||
2101 | } | ||
2102 | if (rc) | ||
2103 | return rc; | ||
2104 | |||
2105 | return nsegs; | ||
2106 | } | ||
2107 | |||
2108 | int | ||
2109 | rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | ||
2110 | struct rpcrdma_xprt *r_xprt) | ||
2111 | { | ||
2112 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
2113 | int nsegs = seg->mr_nsegs, rc; | ||
2114 | |||
2115 | switch (ia->ri_memreg_strategy) { | ||
2116 | |||
2117 | case RPCRDMA_ALLPHYSICAL: | ||
2118 | read_lock(&ia->ri_qplock); | ||
2119 | rpcrdma_unmap_one(ia, seg); | ||
2120 | read_unlock(&ia->ri_qplock); | ||
2121 | break; | ||
2122 | |||
2123 | case RPCRDMA_FRMR: | ||
2124 | rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt); | ||
2125 | break; | ||
2126 | |||
2127 | case RPCRDMA_MTHCAFMR: | ||
2128 | rc = rpcrdma_deregister_fmr_external(seg, ia); | ||
2129 | break; | ||
2130 | |||
2131 | default: | ||
2132 | break; | ||
2133 | } | ||
2134 | return nsegs; | ||
2135 | } | ||
2136 | |||
2137 | /* | ||
2138 | * Prepost any receive buffer, then post send. | 1572 | * Prepost any receive buffer, then post send. |
2139 | * | 1573 | * |
2140 | * Receive buffer is donated to hardware, reclaimed upon recv completion. | 1574 | * Receive buffer is donated to hardware, reclaimed upon recv completion. |
@@ -2156,7 +1590,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, | |||
2156 | } | 1590 | } |
2157 | 1591 | ||
2158 | send_wr.next = NULL; | 1592 | send_wr.next = NULL; |
2159 | send_wr.wr_id = 0ULL; /* no send cookie */ | 1593 | send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION; |
2160 | send_wr.sg_list = req->rl_send_iov; | 1594 | send_wr.sg_list = req->rl_send_iov; |
2161 | send_wr.num_sge = req->rl_niovs; | 1595 | send_wr.num_sge = req->rl_niovs; |
2162 | send_wr.opcode = IB_WR_SEND; | 1596 | send_wr.opcode = IB_WR_SEND; |
@@ -2215,43 +1649,24 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, | |||
2215 | return rc; | 1649 | return rc; |
2216 | } | 1650 | } |
2217 | 1651 | ||
2218 | /* Physical mapping means one Read/Write list entry per-page. | 1652 | /* How many chunk list items fit within our inline buffers? |
2219 | * All list entries must fit within an inline buffer | ||
2220 | * | ||
2221 | * NB: The server must return a Write list for NFS READ, | ||
2222 | * which has the same constraint. Factor in the inline | ||
2223 | * rsize as well. | ||
2224 | */ | 1653 | */ |
2225 | static size_t | 1654 | unsigned int |
2226 | rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt) | 1655 | rpcrdma_max_segments(struct rpcrdma_xprt *r_xprt) |
2227 | { | 1656 | { |
2228 | struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; | 1657 | struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; |
2229 | unsigned int inline_size, pages; | 1658 | int bytes, segments; |
2230 | 1659 | ||
2231 | inline_size = min_t(unsigned int, | 1660 | bytes = min_t(unsigned int, cdata->inline_wsize, cdata->inline_rsize); |
2232 | cdata->inline_wsize, cdata->inline_rsize); | 1661 | bytes -= RPCRDMA_HDRLEN_MIN; |
2233 | inline_size -= RPCRDMA_HDRLEN_MIN; | 1662 | if (bytes < sizeof(struct rpcrdma_segment) * 2) { |
2234 | pages = inline_size / sizeof(struct rpcrdma_segment); | 1663 | pr_warn("RPC: %s: inline threshold too small\n", |
2235 | return pages << PAGE_SHIFT; | 1664 | __func__); |
2236 | } | 1665 | return 0; |
2237 | |||
2238 | static size_t | ||
2239 | rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt) | ||
2240 | { | ||
2241 | return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT; | ||
2242 | } | ||
2243 | |||
2244 | size_t | ||
2245 | rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt) | ||
2246 | { | ||
2247 | size_t result; | ||
2248 | |||
2249 | switch (r_xprt->rx_ia.ri_memreg_strategy) { | ||
2250 | case RPCRDMA_ALLPHYSICAL: | ||
2251 | result = rpcrdma_physical_max_payload(r_xprt); | ||
2252 | break; | ||
2253 | default: | ||
2254 | result = rpcrdma_mr_max_payload(r_xprt); | ||
2255 | } | 1666 | } |
2256 | return result; | 1667 | |
1668 | segments = 1 << (fls(bytes / sizeof(struct rpcrdma_segment)) - 1); | ||
1669 | dprintk("RPC: %s: max chunk list size = %d segments\n", | ||
1670 | __func__, segments); | ||
1671 | return segments; | ||
2257 | } | 1672 | } |
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 0a16fb6f0885..78e0b8beaa36 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -60,6 +60,7 @@ | |||
60 | * Interface Adapter -- one per transport instance | 60 | * Interface Adapter -- one per transport instance |
61 | */ | 61 | */ |
62 | struct rpcrdma_ia { | 62 | struct rpcrdma_ia { |
63 | const struct rpcrdma_memreg_ops *ri_ops; | ||
63 | rwlock_t ri_qplock; | 64 | rwlock_t ri_qplock; |
64 | struct rdma_cm_id *ri_id; | 65 | struct rdma_cm_id *ri_id; |
65 | struct ib_pd *ri_pd; | 66 | struct ib_pd *ri_pd; |
@@ -105,6 +106,10 @@ struct rpcrdma_ep { | |||
105 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) | 106 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) |
106 | #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) | 107 | #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) |
107 | 108 | ||
109 | /* Force completion handler to ignore the signal | ||
110 | */ | ||
111 | #define RPCRDMA_IGNORE_COMPLETION (0ULL) | ||
112 | |||
108 | /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV | 113 | /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV |
109 | * | 114 | * |
110 | * The below structure appears at the front of a large region of kmalloc'd | 115 | * The below structure appears at the front of a large region of kmalloc'd |
@@ -143,14 +148,6 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb) | |||
143 | return (struct rpcrdma_msg *)rb->rg_base; | 148 | return (struct rpcrdma_msg *)rb->rg_base; |
144 | } | 149 | } |
145 | 150 | ||
146 | enum rpcrdma_chunktype { | ||
147 | rpcrdma_noch = 0, | ||
148 | rpcrdma_readch, | ||
149 | rpcrdma_areadch, | ||
150 | rpcrdma_writech, | ||
151 | rpcrdma_replych | ||
152 | }; | ||
153 | |||
154 | /* | 151 | /* |
155 | * struct rpcrdma_rep -- this structure encapsulates state required to recv | 152 | * struct rpcrdma_rep -- this structure encapsulates state required to recv |
156 | * and complete a reply, asychronously. It needs several pieces of | 153 | * and complete a reply, asychronously. It needs several pieces of |
@@ -213,6 +210,7 @@ struct rpcrdma_mw { | |||
213 | struct ib_fmr *fmr; | 210 | struct ib_fmr *fmr; |
214 | struct rpcrdma_frmr frmr; | 211 | struct rpcrdma_frmr frmr; |
215 | } r; | 212 | } r; |
213 | void (*mw_sendcompletion)(struct ib_wc *); | ||
216 | struct list_head mw_list; | 214 | struct list_head mw_list; |
217 | struct list_head mw_all; | 215 | struct list_head mw_all; |
218 | }; | 216 | }; |
@@ -258,7 +256,6 @@ struct rpcrdma_req { | |||
258 | unsigned int rl_niovs; /* 0, 2 or 4 */ | 256 | unsigned int rl_niovs; /* 0, 2 or 4 */ |
259 | unsigned int rl_nchunks; /* non-zero if chunks */ | 257 | unsigned int rl_nchunks; /* non-zero if chunks */ |
260 | unsigned int rl_connect_cookie; /* retry detection */ | 258 | unsigned int rl_connect_cookie; /* retry detection */ |
261 | enum rpcrdma_chunktype rl_rtype, rl_wtype; | ||
262 | struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ | 259 | struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ |
263 | struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ | 260 | struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ |
264 | struct ib_sge rl_send_iov[4]; /* for active requests */ | 261 | struct ib_sge rl_send_iov[4]; /* for active requests */ |
@@ -340,6 +337,29 @@ struct rpcrdma_stats { | |||
340 | }; | 337 | }; |
341 | 338 | ||
342 | /* | 339 | /* |
340 | * Per-registration mode operations | ||
341 | */ | ||
342 | struct rpcrdma_xprt; | ||
343 | struct rpcrdma_memreg_ops { | ||
344 | int (*ro_map)(struct rpcrdma_xprt *, | ||
345 | struct rpcrdma_mr_seg *, int, bool); | ||
346 | int (*ro_unmap)(struct rpcrdma_xprt *, | ||
347 | struct rpcrdma_mr_seg *); | ||
348 | int (*ro_open)(struct rpcrdma_ia *, | ||
349 | struct rpcrdma_ep *, | ||
350 | struct rpcrdma_create_data_internal *); | ||
351 | size_t (*ro_maxpages)(struct rpcrdma_xprt *); | ||
352 | int (*ro_init)(struct rpcrdma_xprt *); | ||
353 | void (*ro_reset)(struct rpcrdma_xprt *); | ||
354 | void (*ro_destroy)(struct rpcrdma_buffer *); | ||
355 | const char *ro_displayname; | ||
356 | }; | ||
357 | |||
358 | extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops; | ||
359 | extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops; | ||
360 | extern const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops; | ||
361 | |||
362 | /* | ||
343 | * RPCRDMA transport -- encapsulates the structures above for | 363 | * RPCRDMA transport -- encapsulates the structures above for |
344 | * integration with RPC. | 364 | * integration with RPC. |
345 | * | 365 | * |
@@ -398,16 +418,56 @@ void rpcrdma_buffer_put(struct rpcrdma_req *); | |||
398 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); | 418 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); |
399 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); | 419 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); |
400 | 420 | ||
401 | int rpcrdma_register_external(struct rpcrdma_mr_seg *, | ||
402 | int, int, struct rpcrdma_xprt *); | ||
403 | int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, | ||
404 | struct rpcrdma_xprt *); | ||
405 | |||
406 | struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *, | 421 | struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *, |
407 | size_t, gfp_t); | 422 | size_t, gfp_t); |
408 | void rpcrdma_free_regbuf(struct rpcrdma_ia *, | 423 | void rpcrdma_free_regbuf(struct rpcrdma_ia *, |
409 | struct rpcrdma_regbuf *); | 424 | struct rpcrdma_regbuf *); |
410 | 425 | ||
426 | unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *); | ||
427 | |||
428 | /* | ||
429 | * Wrappers for chunk registration, shared by read/write chunk code. | ||
430 | */ | ||
431 | |||
432 | void rpcrdma_mapping_error(struct rpcrdma_mr_seg *); | ||
433 | |||
434 | static inline enum dma_data_direction | ||
435 | rpcrdma_data_dir(bool writing) | ||
436 | { | ||
437 | return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; | ||
438 | } | ||
439 | |||
440 | static inline void | ||
441 | rpcrdma_map_one(struct ib_device *device, struct rpcrdma_mr_seg *seg, | ||
442 | enum dma_data_direction direction) | ||
443 | { | ||
444 | seg->mr_dir = direction; | ||
445 | seg->mr_dmalen = seg->mr_len; | ||
446 | |||
447 | if (seg->mr_page) | ||
448 | seg->mr_dma = ib_dma_map_page(device, | ||
449 | seg->mr_page, offset_in_page(seg->mr_offset), | ||
450 | seg->mr_dmalen, seg->mr_dir); | ||
451 | else | ||
452 | seg->mr_dma = ib_dma_map_single(device, | ||
453 | seg->mr_offset, | ||
454 | seg->mr_dmalen, seg->mr_dir); | ||
455 | |||
456 | if (ib_dma_mapping_error(device, seg->mr_dma)) | ||
457 | rpcrdma_mapping_error(seg); | ||
458 | } | ||
459 | |||
460 | static inline void | ||
461 | rpcrdma_unmap_one(struct ib_device *device, struct rpcrdma_mr_seg *seg) | ||
462 | { | ||
463 | if (seg->mr_page) | ||
464 | ib_dma_unmap_page(device, | ||
465 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
466 | else | ||
467 | ib_dma_unmap_single(device, | ||
468 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
469 | } | ||
470 | |||
411 | /* | 471 | /* |
412 | * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c | 472 | * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c |
413 | */ | 473 | */ |
@@ -418,9 +478,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *); | |||
418 | /* | 478 | /* |
419 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c | 479 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c |
420 | */ | 480 | */ |
421 | ssize_t rpcrdma_marshal_chunks(struct rpc_rqst *, ssize_t); | ||
422 | int rpcrdma_marshal_req(struct rpc_rqst *); | 481 | int rpcrdma_marshal_req(struct rpc_rqst *); |
423 | size_t rpcrdma_max_payload(struct rpcrdma_xprt *); | ||
424 | 482 | ||
425 | /* Temporary NFS request map cache. Created in svc_rdma.c */ | 483 | /* Temporary NFS request map cache. Created in svc_rdma.c */ |
426 | extern struct kmem_cache *svc_rdma_map_cachep; | 484 | extern struct kmem_cache *svc_rdma_map_cachep; |
diff --git a/net/tipc/link.c b/net/tipc/link.c index a4cf364316de..14f09b3cb87c 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c | |||
@@ -464,10 +464,11 @@ void tipc_link_reset(struct tipc_link *l_ptr) | |||
464 | /* Clean up all queues, except inputq: */ | 464 | /* Clean up all queues, except inputq: */ |
465 | __skb_queue_purge(&l_ptr->outqueue); | 465 | __skb_queue_purge(&l_ptr->outqueue); |
466 | __skb_queue_purge(&l_ptr->deferred_queue); | 466 | __skb_queue_purge(&l_ptr->deferred_queue); |
467 | skb_queue_splice_init(&l_ptr->wakeupq, &l_ptr->inputq); | 467 | if (!owner->inputq) |
468 | if (!skb_queue_empty(&l_ptr->inputq)) | 468 | owner->inputq = &l_ptr->inputq; |
469 | skb_queue_splice_init(&l_ptr->wakeupq, owner->inputq); | ||
470 | if (!skb_queue_empty(owner->inputq)) | ||
469 | owner->action_flags |= TIPC_MSG_EVT; | 471 | owner->action_flags |= TIPC_MSG_EVT; |
470 | owner->inputq = &l_ptr->inputq; | ||
471 | l_ptr->next_out = NULL; | 472 | l_ptr->next_out = NULL; |
472 | l_ptr->unacked_window = 0; | 473 | l_ptr->unacked_window = 0; |
473 | l_ptr->checkpoint = 1; | 474 | l_ptr->checkpoint = 1; |
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index be2501538011..b6f84f6a2a09 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c | |||
@@ -4400,6 +4400,16 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) | |||
4400 | if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) | 4400 | if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) |
4401 | return -EINVAL; | 4401 | return -EINVAL; |
4402 | 4402 | ||
4403 | /* HT/VHT requires QoS, but if we don't have that just ignore HT/VHT | ||
4404 | * as userspace might just pass through the capabilities from the IEs | ||
4405 | * directly, rather than enforcing this restriction and returning an | ||
4406 | * error in this case. | ||
4407 | */ | ||
4408 | if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME))) { | ||
4409 | params.ht_capa = NULL; | ||
4410 | params.vht_capa = NULL; | ||
4411 | } | ||
4412 | |||
4403 | /* When you run into this, adjust the code below for the new flag */ | 4413 | /* When you run into this, adjust the code below for the new flag */ |
4404 | BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7); | 4414 | BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7); |
4405 | 4415 | ||
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index cee479bc655c..638af0655aaf 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c | |||
@@ -2269,11 +2269,9 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, | |||
2269 | * have the xfrm_state's. We need to wait for KM to | 2269 | * have the xfrm_state's. We need to wait for KM to |
2270 | * negotiate new SA's or bail out with error.*/ | 2270 | * negotiate new SA's or bail out with error.*/ |
2271 | if (net->xfrm.sysctl_larval_drop) { | 2271 | if (net->xfrm.sysctl_larval_drop) { |
2272 | dst_release(dst); | ||
2273 | xfrm_pols_put(pols, drop_pols); | ||
2274 | XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); | 2272 | XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); |
2275 | 2273 | err = -EREMOTE; | |
2276 | return ERR_PTR(-EREMOTE); | 2274 | goto error; |
2277 | } | 2275 | } |
2278 | 2276 | ||
2279 | err = -EAGAIN; | 2277 | err = -EAGAIN; |
@@ -2324,7 +2322,8 @@ nopol: | |||
2324 | error: | 2322 | error: |
2325 | dst_release(dst); | 2323 | dst_release(dst); |
2326 | dropdst: | 2324 | dropdst: |
2327 | dst_release(dst_orig); | 2325 | if (!(flags & XFRM_LOOKUP_KEEP_DST_REF)) |
2326 | dst_release(dst_orig); | ||
2328 | xfrm_pols_put(pols, drop_pols); | 2327 | xfrm_pols_put(pols, drop_pols); |
2329 | return ERR_PTR(err); | 2328 | return ERR_PTR(err); |
2330 | } | 2329 | } |
@@ -2338,7 +2337,8 @@ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, | |||
2338 | struct sock *sk, int flags) | 2337 | struct sock *sk, int flags) |
2339 | { | 2338 | { |
2340 | struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, | 2339 | struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, |
2341 | flags | XFRM_LOOKUP_QUEUE); | 2340 | flags | XFRM_LOOKUP_QUEUE | |
2341 | XFRM_LOOKUP_KEEP_DST_REF); | ||
2342 | 2342 | ||
2343 | if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE) | 2343 | if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE) |
2344 | return make_blackhole(net, dst_orig->ops->family, dst_orig); | 2344 | return make_blackhole(net, dst_orig->ops->family, dst_orig); |