aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c27
-rw-r--r--net/ipv4/ip_gre.c46
-rw-r--r--net/ipv4/netfilter/Kconfig1
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c4
-rw-r--r--net/ipv4/tcp_dctcp.c2
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv4/xfrm4_output.c2
-rw-r--r--net/ipv6/fib6_rules.c19
-rw-r--r--net/ipv6/ip6_fib.c12
-rw-r--r--net/ipv6/ip6_output.c6
-rw-r--r--net/ipv6/netfilter/Kconfig1
-rw-r--r--net/ipv6/route.c6
-rw-r--r--net/ipv6/xfrm6_output.c18
-rw-r--r--net/ipv6/xfrm6_policy.c6
-rw-r--r--net/irda/irlmp.c2
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/netfilter/core.c2
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c2
-rw-r--r--net/netlink/af_netlink.c4
-rw-r--r--net/openvswitch/actions.c9
-rw-r--r--net/openvswitch/conntrack.c24
-rw-r--r--net/openvswitch/conntrack.h16
-rw-r--r--net/openvswitch/datapath.c5
-rw-r--r--net/openvswitch/datapath.h1
-rw-r--r--net/openvswitch/flow_netlink.c26
-rw-r--r--net/openvswitch/flow_netlink.h6
-rw-r--r--net/openvswitch/vport-geneve.c13
-rw-r--r--net/openvswitch/vport-gre.c8
-rw-r--r--net/openvswitch/vport-internal_dev.c46
-rw-r--r--net/openvswitch/vport-vxlan.c20
-rw-r--r--net/openvswitch/vport.c58
-rw-r--r--net/openvswitch/vport.h16
-rw-r--r--net/sysctl_net.c6
-rw-r--r--net/tipc/bcast.c8
-rw-r--r--net/tipc/msg.c12
-rw-r--r--net/tipc/udp_media.c5
-rw-r--r--net/vmw_vsock/af_vsock.c7
-rw-r--r--net/vmw_vsock/vmci_transport.c173
-rw-r--r--net/vmw_vsock/vmci_transport.h4
-rw-r--r--net/xfrm/xfrm_user.c4
40 files changed, 329 insertions, 302 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 1225b4be8ed6..13f49f81ae13 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -99,6 +99,7 @@
99#include <linux/rtnetlink.h> 99#include <linux/rtnetlink.h>
100#include <linux/stat.h> 100#include <linux/stat.h>
101#include <net/dst.h> 101#include <net/dst.h>
102#include <net/dst_metadata.h>
102#include <net/pkt_sched.h> 103#include <net/pkt_sched.h>
103#include <net/checksum.h> 104#include <net/checksum.h>
104#include <net/xfrm.h> 105#include <net/xfrm.h>
@@ -682,6 +683,32 @@ int dev_get_iflink(const struct net_device *dev)
682EXPORT_SYMBOL(dev_get_iflink); 683EXPORT_SYMBOL(dev_get_iflink);
683 684
684/** 685/**
686 * dev_fill_metadata_dst - Retrieve tunnel egress information.
687 * @dev: targeted interface
688 * @skb: The packet.
689 *
690 * For better visibility of tunnel traffic OVS needs to retrieve
691 * egress tunnel information for a packet. Following API allows
692 * user to get this info.
693 */
694int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
695{
696 struct ip_tunnel_info *info;
697
698 if (!dev->netdev_ops || !dev->netdev_ops->ndo_fill_metadata_dst)
699 return -EINVAL;
700
701 info = skb_tunnel_info_unclone(skb);
702 if (!info)
703 return -ENOMEM;
704 if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
705 return -EINVAL;
706
707 return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
708}
709EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
710
711/**
685 * __dev_get_by_name - find a device by its name 712 * __dev_get_by_name - find a device by its name
686 * @net: the applicable net namespace 713 * @net: the applicable net namespace
687 * @name: name to find 714 * @name: name to find
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index bd0679d90519..614521437e30 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -498,10 +498,26 @@ static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
498 csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); 498 csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
499} 499}
500 500
501static struct rtable *gre_get_rt(struct sk_buff *skb,
502 struct net_device *dev,
503 struct flowi4 *fl,
504 const struct ip_tunnel_key *key)
505{
506 struct net *net = dev_net(dev);
507
508 memset(fl, 0, sizeof(*fl));
509 fl->daddr = key->u.ipv4.dst;
510 fl->saddr = key->u.ipv4.src;
511 fl->flowi4_tos = RT_TOS(key->tos);
512 fl->flowi4_mark = skb->mark;
513 fl->flowi4_proto = IPPROTO_GRE;
514
515 return ip_route_output_key(net, fl);
516}
517
501static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) 518static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
502{ 519{
503 struct ip_tunnel_info *tun_info; 520 struct ip_tunnel_info *tun_info;
504 struct net *net = dev_net(dev);
505 const struct ip_tunnel_key *key; 521 const struct ip_tunnel_key *key;
506 struct flowi4 fl; 522 struct flowi4 fl;
507 struct rtable *rt; 523 struct rtable *rt;
@@ -516,14 +532,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
516 goto err_free_skb; 532 goto err_free_skb;
517 533
518 key = &tun_info->key; 534 key = &tun_info->key;
519 memset(&fl, 0, sizeof(fl)); 535 rt = gre_get_rt(skb, dev, &fl, key);
520 fl.daddr = key->u.ipv4.dst;
521 fl.saddr = key->u.ipv4.src;
522 fl.flowi4_tos = RT_TOS(key->tos);
523 fl.flowi4_mark = skb->mark;
524 fl.flowi4_proto = IPPROTO_GRE;
525
526 rt = ip_route_output_key(net, &fl);
527 if (IS_ERR(rt)) 536 if (IS_ERR(rt))
528 goto err_free_skb; 537 goto err_free_skb;
529 538
@@ -566,6 +575,24 @@ err_free_skb:
566 dev->stats.tx_dropped++; 575 dev->stats.tx_dropped++;
567} 576}
568 577
578static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
579{
580 struct ip_tunnel_info *info = skb_tunnel_info(skb);
581 struct rtable *rt;
582 struct flowi4 fl4;
583
584 if (ip_tunnel_info_af(info) != AF_INET)
585 return -EINVAL;
586
587 rt = gre_get_rt(skb, dev, &fl4, &info->key);
588 if (IS_ERR(rt))
589 return PTR_ERR(rt);
590
591 ip_rt_put(rt);
592 info->key.u.ipv4.src = fl4.saddr;
593 return 0;
594}
595
569static netdev_tx_t ipgre_xmit(struct sk_buff *skb, 596static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
570 struct net_device *dev) 597 struct net_device *dev)
571{ 598{
@@ -1023,6 +1050,7 @@ static const struct net_device_ops gre_tap_netdev_ops = {
1023 .ndo_change_mtu = ip_tunnel_change_mtu, 1050 .ndo_change_mtu = ip_tunnel_change_mtu,
1024 .ndo_get_stats64 = ip_tunnel_get_stats64, 1051 .ndo_get_stats64 = ip_tunnel_get_stats64,
1025 .ndo_get_iflink = ip_tunnel_get_iflink, 1052 .ndo_get_iflink = ip_tunnel_get_iflink,
1053 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1026}; 1054};
1027 1055
1028static void ipgre_tap_setup(struct net_device *dev) 1056static void ipgre_tap_setup(struct net_device *dev)
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 690d27d3f2f9..a35584176535 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -75,6 +75,7 @@ endif # NF_TABLES
75 75
76config NF_DUP_IPV4 76config NF_DUP_IPV4
77 tristate "Netfilter IPv4 packet duplication to alternate destination" 77 tristate "Netfilter IPv4 packet duplication to alternate destination"
78 depends on !NF_CONNTRACK || NF_CONNTRACK
78 help 79 help
79 This option enables the nf_dup_ipv4 core, which duplicates an IPv4 80 This option enables the nf_dup_ipv4 core, which duplicates an IPv4
80 packet to be rerouted to another destination. 81 packet to be rerouted to another destination.
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 74dd6671b66d..78cc64eddfc1 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -60,9 +60,7 @@ static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4,
60 if (FIB_RES_DEV(res) == dev) 60 if (FIB_RES_DEV(res) == dev)
61 dev_match = true; 61 dev_match = true;
62#endif 62#endif
63 if (dev_match || flags & XT_RPFILTER_LOOSE) 63 return dev_match || flags & XT_RPFILTER_LOOSE;
64 return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST;
65 return dev_match;
66} 64}
67 65
68static bool rpfilter_is_local(const struct sk_buff *skb) 66static bool rpfilter_is_local(const struct sk_buff *skb)
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 7092a61c4dc8..7e538f71f5fb 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -209,7 +209,7 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags)
209 209
210 /* alpha = (1 - g) * alpha + g * F */ 210 /* alpha = (1 - g) * alpha + g * F */
211 211
212 alpha -= alpha >> dctcp_shift_g; 212 alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g);
213 if (bytes_ecn) { 213 if (bytes_ecn) {
214 /* If dctcp_shift_g == 1, a 32bit value would overflow 214 /* If dctcp_shift_g == 1, a 32bit value would overflow
215 * after 8 Mbytes. 215 * after 8 Mbytes.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f6f7f9b4901b..f4f9793eb025 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3410,7 +3410,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
3410 */ 3410 */
3411 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); 3411 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
3412 skb_mstamp_get(&skb->skb_mstamp); 3412 skb_mstamp_get(&skb->skb_mstamp);
3413 NET_INC_STATS_BH(sock_net(sk), mib); 3413 NET_INC_STATS(sock_net(sk), mib);
3414 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); 3414 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
3415} 3415}
3416 3416
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 9f298d0dc9a1..7ee6518afa86 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -30,6 +30,8 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
30 30
31 mtu = dst_mtu(skb_dst(skb)); 31 mtu = dst_mtu(skb_dst(skb));
32 if (skb->len > mtu) { 32 if (skb->len > mtu) {
33 skb->protocol = htons(ETH_P_IP);
34
33 if (skb->sk) 35 if (skb->sk)
34 xfrm_local_error(skb, mtu); 36 xfrm_local_error(skb, mtu);
35 else 37 else
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 9f777ec59a59..ed33abf57abd 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -32,6 +32,7 @@ struct fib6_rule {
32struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, 32struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
33 int flags, pol_lookup_t lookup) 33 int flags, pol_lookup_t lookup)
34{ 34{
35 struct rt6_info *rt;
35 struct fib_lookup_arg arg = { 36 struct fib_lookup_arg arg = {
36 .lookup_ptr = lookup, 37 .lookup_ptr = lookup,
37 .flags = FIB_LOOKUP_NOREF, 38 .flags = FIB_LOOKUP_NOREF,
@@ -40,11 +41,21 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
40 fib_rules_lookup(net->ipv6.fib6_rules_ops, 41 fib_rules_lookup(net->ipv6.fib6_rules_ops,
41 flowi6_to_flowi(fl6), flags, &arg); 42 flowi6_to_flowi(fl6), flags, &arg);
42 43
43 if (arg.result) 44 rt = arg.result;
44 return arg.result;
45 45
46 dst_hold(&net->ipv6.ip6_null_entry->dst); 46 if (!rt) {
47 return &net->ipv6.ip6_null_entry->dst; 47 dst_hold(&net->ipv6.ip6_null_entry->dst);
48 return &net->ipv6.ip6_null_entry->dst;
49 }
50
51 if (rt->rt6i_flags & RTF_REJECT &&
52 rt->dst.error == -EAGAIN) {
53 ip6_rt_put(rt);
54 rt = net->ipv6.ip6_null_entry;
55 dst_hold(&rt->dst);
56 }
57
58 return &rt->dst;
48} 59}
49 60
50static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, 61static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 09fddf70cca4..0c7e276c230e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -286,7 +286,17 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
286struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, 286struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
287 int flags, pol_lookup_t lookup) 287 int flags, pol_lookup_t lookup)
288{ 288{
289 return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); 289 struct rt6_info *rt;
290
291 rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
292 if (rt->rt6i_flags & RTF_REJECT &&
293 rt->dst.error == -EAGAIN) {
294 ip6_rt_put(rt);
295 rt = net->ipv6.ip6_null_entry;
296 dst_hold(&rt->dst);
297 }
298
299 return &rt->dst;
290} 300}
291 301
292static void __net_init fib6_tables_init(struct net *net) 302static void __net_init fib6_tables_init(struct net *net)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0c89671e0767..c2650688aca7 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -28,6 +28,7 @@
28 28
29#include <linux/errno.h> 29#include <linux/errno.h>
30#include <linux/kernel.h> 30#include <linux/kernel.h>
31#include <linux/overflow-arith.h>
31#include <linux/string.h> 32#include <linux/string.h>
32#include <linux/socket.h> 33#include <linux/socket.h>
33#include <linux/net.h> 34#include <linux/net.h>
@@ -596,7 +597,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
596 if (np->frag_size) 597 if (np->frag_size)
597 mtu = np->frag_size; 598 mtu = np->frag_size;
598 } 599 }
599 mtu -= hlen + sizeof(struct frag_hdr); 600
601 if (overflow_usub(mtu, hlen + sizeof(struct frag_hdr), &mtu) ||
602 mtu <= 7)
603 goto fail_toobig;
600 604
601 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, 605 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
602 &ipv6_hdr(skb)->saddr); 606 &ipv6_hdr(skb)->saddr);
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 96833e4b3193..f6a024e141e5 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -58,6 +58,7 @@ endif # NF_TABLES
58 58
59config NF_DUP_IPV6 59config NF_DUP_IPV6
60 tristate "Netfilter IPv6 packet duplication to alternate destination" 60 tristate "Netfilter IPv6 packet duplication to alternate destination"
61 depends on !NF_CONNTRACK || NF_CONNTRACK
61 help 62 help
62 This option enables the nf_dup_ipv6 core, which duplicates an IPv6 63 This option enables the nf_dup_ipv6 core, which duplicates an IPv6
63 packet to be rerouted to another destination. 64 packet to be rerouted to another destination.
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d0619632723a..2701cb3d88e9 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1171,6 +1171,7 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1171{ 1171{
1172 struct dst_entry *dst; 1172 struct dst_entry *dst;
1173 int flags = 0; 1173 int flags = 0;
1174 bool any_src;
1174 1175
1175 dst = l3mdev_rt6_dst_by_oif(net, fl6); 1176 dst = l3mdev_rt6_dst_by_oif(net, fl6);
1176 if (dst) 1177 if (dst)
@@ -1178,11 +1179,12 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1178 1179
1179 fl6->flowi6_iif = LOOPBACK_IFINDEX; 1180 fl6->flowi6_iif = LOOPBACK_IFINDEX;
1180 1181
1182 any_src = ipv6_addr_any(&fl6->saddr);
1181 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || 1183 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1182 fl6->flowi6_oif) 1184 (fl6->flowi6_oif && any_src))
1183 flags |= RT6_LOOKUP_F_IFACE; 1185 flags |= RT6_LOOKUP_F_IFACE;
1184 1186
1185 if (!ipv6_addr_any(&fl6->saddr)) 1187 if (!any_src)
1186 flags |= RT6_LOOKUP_F_HAS_SADDR; 1188 flags |= RT6_LOOKUP_F_HAS_SADDR;
1187 else if (sk) 1189 else if (sk)
1188 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 1190 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 9db067a11b52..4d09ce6fa90e 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -79,6 +79,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
79 79
80 if (!skb->ignore_df && skb->len > mtu) { 80 if (!skb->ignore_df && skb->len > mtu) {
81 skb->dev = dst->dev; 81 skb->dev = dst->dev;
82 skb->protocol = htons(ETH_P_IPV6);
82 83
83 if (xfrm6_local_dontfrag(skb)) 84 if (xfrm6_local_dontfrag(skb))
84 xfrm6_local_rxpmtu(skb, mtu); 85 xfrm6_local_rxpmtu(skb, mtu);
@@ -143,6 +144,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
143 struct dst_entry *dst = skb_dst(skb); 144 struct dst_entry *dst = skb_dst(skb);
144 struct xfrm_state *x = dst->xfrm; 145 struct xfrm_state *x = dst->xfrm;
145 int mtu; 146 int mtu;
147 bool toobig;
146 148
147#ifdef CONFIG_NETFILTER 149#ifdef CONFIG_NETFILTER
148 if (!x) { 150 if (!x) {
@@ -151,25 +153,29 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
151 } 153 }
152#endif 154#endif
153 155
156 if (x->props.mode != XFRM_MODE_TUNNEL)
157 goto skip_frag;
158
154 if (skb->protocol == htons(ETH_P_IPV6)) 159 if (skb->protocol == htons(ETH_P_IPV6))
155 mtu = ip6_skb_dst_mtu(skb); 160 mtu = ip6_skb_dst_mtu(skb);
156 else 161 else
157 mtu = dst_mtu(skb_dst(skb)); 162 mtu = dst_mtu(skb_dst(skb));
158 163
159 if (skb->len > mtu && xfrm6_local_dontfrag(skb)) { 164 toobig = skb->len > mtu && !skb_is_gso(skb);
165
166 if (toobig && xfrm6_local_dontfrag(skb)) {
160 xfrm6_local_rxpmtu(skb, mtu); 167 xfrm6_local_rxpmtu(skb, mtu);
161 return -EMSGSIZE; 168 return -EMSGSIZE;
162 } else if (!skb->ignore_df && skb->len > mtu && skb->sk) { 169 } else if (!skb->ignore_df && toobig && skb->sk) {
163 xfrm_local_error(skb, mtu); 170 xfrm_local_error(skb, mtu);
164 return -EMSGSIZE; 171 return -EMSGSIZE;
165 } 172 }
166 173
167 if (x->props.mode == XFRM_MODE_TUNNEL && 174 if (toobig || dst_allfrag(skb_dst(skb)))
168 ((skb->len > mtu && !skb_is_gso(skb)) ||
169 dst_allfrag(skb_dst(skb)))) {
170 return ip6_fragment(net, sk, skb, 175 return ip6_fragment(net, sk, skb,
171 __xfrm6_output_finish); 176 __xfrm6_output_finish);
172 } 177
178skip_frag:
173 return x->outer_mode->afinfo->output_finish(sk, skb); 179 return x->outer_mode->afinfo->output_finish(sk, skb);
174} 180}
175 181
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 08c9c93f3527..2cc5840f943d 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -177,7 +177,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
177 return; 177 return;
178 178
179 case IPPROTO_ICMPV6: 179 case IPPROTO_ICMPV6:
180 if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) { 180 if (!onlyproto && (nh + offset + 2 < skb->data ||
181 pskb_may_pull(skb, nh + offset + 2 - skb->data))) {
181 u8 *icmp; 182 u8 *icmp;
182 183
183 nh = skb_network_header(skb); 184 nh = skb_network_header(skb);
@@ -191,7 +192,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
191#if IS_ENABLED(CONFIG_IPV6_MIP6) 192#if IS_ENABLED(CONFIG_IPV6_MIP6)
192 case IPPROTO_MH: 193 case IPPROTO_MH:
193 offset += ipv6_optlen(exthdr); 194 offset += ipv6_optlen(exthdr);
194 if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) { 195 if (!onlyproto && (nh + offset + 3 < skb->data ||
196 pskb_may_pull(skb, nh + offset + 3 - skb->data))) {
195 struct ip6_mh *mh; 197 struct ip6_mh *mh;
196 198
197 nh = skb_network_header(skb); 199 nh = skb_network_header(skb);
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index a26c401ef4a4..43964594aa12 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -1839,7 +1839,7 @@ static void *irlmp_seq_hb_idx(struct irlmp_iter_state *iter, loff_t *off)
1839 for (element = hashbin_get_first(iter->hashbin); 1839 for (element = hashbin_get_first(iter->hashbin);
1840 element != NULL; 1840 element != NULL;
1841 element = hashbin_get_next(iter->hashbin)) { 1841 element = hashbin_get_next(iter->hashbin)) {
1842 if (!off || *off-- == 0) { 1842 if (!off || (*off)-- == 0) {
1843 /* NB: hashbin left locked */ 1843 /* NB: hashbin left locked */
1844 return element; 1844 return element;
1845 } 1845 }
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 83a70688784b..f9c9ecb0cdd3 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -261,7 +261,7 @@ static int pfkey_broadcast(struct sk_buff *skb,
261 261
262 err2 = pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk); 262 err2 = pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk);
263 263
264 /* Error is cleare after succecful sending to at least one 264 /* Error is cleared after successful sending to at least one
265 * registered KM */ 265 * registered KM */
266 if ((broadcast_flags & BROADCAST_REGISTERED) && err) 266 if ((broadcast_flags & BROADCAST_REGISTERED) && err)
267 err = err2; 267 err = err2;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 09e661c3ae58..f39276d1c2d7 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -152,6 +152,8 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
152#endif 152#endif
153 synchronize_net(); 153 synchronize_net();
154 nf_queue_nf_hook_drop(net, &entry->ops); 154 nf_queue_nf_hook_drop(net, &entry->ops);
155 /* other cpu might still process nfqueue verdict that used reg */
156 synchronize_net();
155 kfree(entry); 157 kfree(entry);
156} 158}
157EXPORT_SYMBOL(nf_unregister_net_hook); 159EXPORT_SYMBOL(nf_unregister_net_hook);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index a1fe5377a2b3..5a30ce6e8c90 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -297,7 +297,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
297 ip_set_timeout_expired(ext_timeout(n, set)))) 297 ip_set_timeout_expired(ext_timeout(n, set))))
298 n = NULL; 298 n = NULL;
299 299
300 e = kzalloc(set->dsize, GFP_KERNEL); 300 e = kzalloc(set->dsize, GFP_ATOMIC);
301 if (!e) 301 if (!e)
302 return -ENOMEM; 302 return -ENOMEM;
303 e->id = d->id; 303 e->id = d->id;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 0a49a8c7c564..fafe33bdb619 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2371,7 +2371,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
2371 int pos, idx, shift; 2371 int pos, idx, shift;
2372 2372
2373 err = 0; 2373 err = 0;
2374 netlink_table_grab(); 2374 netlink_lock_table();
2375 for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) { 2375 for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) {
2376 if (len - pos < sizeof(u32)) 2376 if (len - pos < sizeof(u32))
2377 break; 2377 break;
@@ -2386,7 +2386,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
2386 } 2386 }
2387 if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen)) 2387 if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen))
2388 err = -EFAULT; 2388 err = -EFAULT;
2389 netlink_table_ungrab(); 2389 netlink_unlock_table();
2390 break; 2390 break;
2391 } 2391 }
2392 case NETLINK_CAP_ACK: 2392 case NETLINK_CAP_ACK:
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index c6087233d7fc..221fa8b37a47 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -769,7 +769,6 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
769 struct sw_flow_key *key, const struct nlattr *attr, 769 struct sw_flow_key *key, const struct nlattr *attr,
770 const struct nlattr *actions, int actions_len) 770 const struct nlattr *actions, int actions_len)
771{ 771{
772 struct ip_tunnel_info info;
773 struct dp_upcall_info upcall; 772 struct dp_upcall_info upcall;
774 const struct nlattr *a; 773 const struct nlattr *a;
775 int rem; 774 int rem;
@@ -797,11 +796,9 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
797 if (vport) { 796 if (vport) {
798 int err; 797 int err;
799 798
800 upcall.egress_tun_info = &info; 799 err = dev_fill_metadata_dst(vport->dev, skb);
801 err = ovs_vport_get_egress_tun_info(vport, skb, 800 if (!err)
802 &upcall); 801 upcall.egress_tun_info = skb_tunnel_info(skb);
803 if (err)
804 upcall.egress_tun_info = NULL;
805 } 802 }
806 803
807 break; 804 break;
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 9ed833e9bb7d..bd165ee2bb16 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -151,6 +151,8 @@ static void ovs_ct_update_key(const struct sk_buff *skb,
151 ct = nf_ct_get(skb, &ctinfo); 151 ct = nf_ct_get(skb, &ctinfo);
152 if (ct) { 152 if (ct) {
153 state = ovs_ct_get_state(ctinfo); 153 state = ovs_ct_get_state(ctinfo);
154 if (!nf_ct_is_confirmed(ct))
155 state |= OVS_CS_F_NEW;
154 if (ct->master) 156 if (ct->master)
155 state |= OVS_CS_F_RELATED; 157 state |= OVS_CS_F_RELATED;
156 zone = nf_ct_zone(ct); 158 zone = nf_ct_zone(ct);
@@ -222,9 +224,6 @@ static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key,
222 struct nf_conn *ct; 224 struct nf_conn *ct;
223 int err; 225 int err;
224 226
225 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS))
226 return -ENOTSUPP;
227
228 /* The connection could be invalid, in which case set_label is no-op.*/ 227 /* The connection could be invalid, in which case set_label is no-op.*/
229 ct = nf_ct_get(skb, &ctinfo); 228 ct = nf_ct_get(skb, &ctinfo);
230 if (!ct) 229 if (!ct)
@@ -377,7 +376,7 @@ static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb,
377 return true; 376 return true;
378} 377}
379 378
380static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key, 379static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
381 const struct ovs_conntrack_info *info, 380 const struct ovs_conntrack_info *info,
382 struct sk_buff *skb) 381 struct sk_buff *skb)
383{ 382{
@@ -408,6 +407,8 @@ static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key,
408 } 407 }
409 } 408 }
410 409
410 ovs_ct_update_key(skb, key, true);
411
411 return 0; 412 return 0;
412} 413}
413 414
@@ -430,8 +431,6 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
430 err = __ovs_ct_lookup(net, key, info, skb); 431 err = __ovs_ct_lookup(net, key, info, skb);
431 if (err) 432 if (err)
432 return err; 433 return err;
433
434 ovs_ct_update_key(skb, key, true);
435 } 434 }
436 435
437 return 0; 436 return 0;
@@ -460,8 +459,6 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
460 if (nf_conntrack_confirm(skb) != NF_ACCEPT) 459 if (nf_conntrack_confirm(skb) != NF_ACCEPT)
461 return -EINVAL; 460 return -EINVAL;
462 461
463 ovs_ct_update_key(skb, key, true);
464
465 return 0; 462 return 0;
466} 463}
467 464
@@ -587,6 +584,10 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
587 case OVS_CT_ATTR_MARK: { 584 case OVS_CT_ATTR_MARK: {
588 struct md_mark *mark = nla_data(a); 585 struct md_mark *mark = nla_data(a);
589 586
587 if (!mark->mask) {
588 OVS_NLERR(log, "ct_mark mask cannot be 0");
589 return -EINVAL;
590 }
590 info->mark = *mark; 591 info->mark = *mark;
591 break; 592 break;
592 } 593 }
@@ -595,6 +596,10 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
595 case OVS_CT_ATTR_LABELS: { 596 case OVS_CT_ATTR_LABELS: {
596 struct md_labels *labels = nla_data(a); 597 struct md_labels *labels = nla_data(a);
597 598
599 if (!labels_nonzero(&labels->mask)) {
600 OVS_NLERR(log, "ct_labels mask cannot be 0");
601 return -EINVAL;
602 }
598 info->labels = *labels; 603 info->labels = *labels;
599 break; 604 break;
600 } 605 }
@@ -705,11 +710,12 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
705 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 710 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
706 nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id)) 711 nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
707 return -EMSGSIZE; 712 return -EMSGSIZE;
708 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && 713 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask &&
709 nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark), 714 nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark),
710 &ct_info->mark)) 715 &ct_info->mark))
711 return -EMSGSIZE; 716 return -EMSGSIZE;
712 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 717 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
718 labels_nonzero(&ct_info->labels.mask) &&
713 nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels), 719 nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels),
714 &ct_info->labels)) 720 &ct_info->labels))
715 return -EMSGSIZE; 721 return -EMSGSIZE;
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
index da8714942c95..82e0dfc66028 100644
--- a/net/openvswitch/conntrack.h
+++ b/net/openvswitch/conntrack.h
@@ -35,12 +35,9 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
35int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb); 35int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb);
36void ovs_ct_free_action(const struct nlattr *a); 36void ovs_ct_free_action(const struct nlattr *a);
37 37
38static inline bool ovs_ct_state_supported(u32 state) 38#define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \
39{ 39 OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | \
40 return !(state & ~(OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | 40 OVS_CS_F_INVALID | OVS_CS_F_TRACKED)
41 OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR |
42 OVS_CS_F_INVALID | OVS_CS_F_TRACKED));
43}
44#else 41#else
45#include <linux/errno.h> 42#include <linux/errno.h>
46 43
@@ -53,11 +50,6 @@ static inline bool ovs_ct_verify(struct net *net, int attr)
53 return false; 50 return false;
54} 51}
55 52
56static inline bool ovs_ct_state_supported(u32 state)
57{
58 return false;
59}
60
61static inline int ovs_ct_copy_action(struct net *net, const struct nlattr *nla, 53static inline int ovs_ct_copy_action(struct net *net, const struct nlattr *nla,
62 const struct sw_flow_key *key, 54 const struct sw_flow_key *key,
63 struct sw_flow_actions **acts, bool log) 55 struct sw_flow_actions **acts, bool log)
@@ -94,5 +86,7 @@ static inline int ovs_ct_put_key(const struct sw_flow_key *key,
94} 86}
95 87
96static inline void ovs_ct_free_action(const struct nlattr *a) { } 88static inline void ovs_ct_free_action(const struct nlattr *a) { }
89
90#define CT_SUPPORTED_MASK 0
97#endif /* CONFIG_NF_CONNTRACK */ 91#endif /* CONFIG_NF_CONNTRACK */
98#endif /* ovs_conntrack.h */ 92#endif /* ovs_conntrack.h */
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index a75828091e21..5633172b791a 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -489,9 +489,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
489 489
490 if (upcall_info->egress_tun_info) { 490 if (upcall_info->egress_tun_info) {
491 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY); 491 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
492 err = ovs_nla_put_egress_tunnel_key(user_skb, 492 err = ovs_nla_put_tunnel_info(user_skb,
493 upcall_info->egress_tun_info, 493 upcall_info->egress_tun_info);
494 upcall_info->egress_tun_opts);
495 BUG_ON(err); 494 BUG_ON(err);
496 nla_nest_end(user_skb, nla); 495 nla_nest_end(user_skb, nla);
497 } 496 }
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index f88038a99f44..67bdecd9fdc1 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -117,7 +117,6 @@ struct ovs_skb_cb {
117 */ 117 */
118struct dp_upcall_info { 118struct dp_upcall_info {
119 struct ip_tunnel_info *egress_tun_info; 119 struct ip_tunnel_info *egress_tun_info;
120 const void *egress_tun_opts;
121 const struct nlattr *userdata; 120 const struct nlattr *userdata;
122 const struct nlattr *actions; 121 const struct nlattr *actions;
123 int actions_len; 122 int actions_len;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 80e1f09397c0..907d6fd28ede 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -764,7 +764,7 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
764 if ((output->tun_flags & TUNNEL_OAM) && 764 if ((output->tun_flags & TUNNEL_OAM) &&
765 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) 765 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
766 return -EMSGSIZE; 766 return -EMSGSIZE;
767 if (tun_opts) { 767 if (swkey_tun_opts_len) {
768 if (output->tun_flags & TUNNEL_GENEVE_OPT && 768 if (output->tun_flags & TUNNEL_GENEVE_OPT &&
769 nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, 769 nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
770 swkey_tun_opts_len, tun_opts)) 770 swkey_tun_opts_len, tun_opts))
@@ -798,14 +798,13 @@ static int ip_tun_to_nlattr(struct sk_buff *skb,
798 return 0; 798 return 0;
799} 799}
800 800
801int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb, 801int ovs_nla_put_tunnel_info(struct sk_buff *skb,
802 const struct ip_tunnel_info *egress_tun_info, 802 struct ip_tunnel_info *tun_info)
803 const void *egress_tun_opts)
804{ 803{
805 return __ip_tun_to_nlattr(skb, &egress_tun_info->key, 804 return __ip_tun_to_nlattr(skb, &tun_info->key,
806 egress_tun_opts, 805 ip_tunnel_info_opts(tun_info),
807 egress_tun_info->options_len, 806 tun_info->options_len,
808 ip_tunnel_info_af(egress_tun_info)); 807 ip_tunnel_info_af(tun_info));
809} 808}
810 809
811static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, 810static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
@@ -866,7 +865,7 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
866 ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) { 865 ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
867 u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]); 866 u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]);
868 867
869 if (!is_mask && !ovs_ct_state_supported(ct_state)) { 868 if (ct_state & ~CT_SUPPORTED_MASK) {
870 OVS_NLERR(log, "ct_state flags %08x unsupported", 869 OVS_NLERR(log, "ct_state flags %08x unsupported",
871 ct_state); 870 ct_state);
872 return -EINVAL; 871 return -EINVAL;
@@ -1149,6 +1148,9 @@ static void nlattr_set(struct nlattr *attr, u8 val,
1149 } else { 1148 } else {
1150 memset(nla_data(nla), val, nla_len(nla)); 1149 memset(nla_data(nla), val, nla_len(nla));
1151 } 1150 }
1151
1152 if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
1153 *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
1152 } 1154 }
1153} 1155}
1154 1156
@@ -2432,11 +2434,7 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
2432 if (!start) 2434 if (!start)
2433 return -EMSGSIZE; 2435 return -EMSGSIZE;
2434 2436
2435 err = ip_tun_to_nlattr(skb, &tun_info->key, 2437 err = ovs_nla_put_tunnel_info(skb, tun_info);
2436 tun_info->options_len ?
2437 ip_tunnel_info_opts(tun_info) : NULL,
2438 tun_info->options_len,
2439 ip_tunnel_info_af(tun_info));
2440 if (err) 2438 if (err)
2441 return err; 2439 return err;
2442 nla_nest_end(skb, start); 2440 nla_nest_end(skb, start);
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 6ca3f0baf449..47dd142eca1c 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -55,9 +55,9 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb);
55int ovs_nla_get_match(struct net *, struct sw_flow_match *, 55int ovs_nla_get_match(struct net *, struct sw_flow_match *,
56 const struct nlattr *key, const struct nlattr *mask, 56 const struct nlattr *key, const struct nlattr *mask,
57 bool log); 57 bool log);
58int ovs_nla_put_egress_tunnel_key(struct sk_buff *, 58
59 const struct ip_tunnel_info *, 59int ovs_nla_put_tunnel_info(struct sk_buff *skb,
60 const void *egress_tun_opts); 60 struct ip_tunnel_info *tun_info);
61 61
62bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log); 62bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log);
63int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, 63int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 7a568ca8da54..efb736bb6855 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -52,18 +52,6 @@ static int geneve_get_options(const struct vport *vport,
52 return 0; 52 return 0;
53} 53}
54 54
55static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
56 struct dp_upcall_info *upcall)
57{
58 struct geneve_port *geneve_port = geneve_vport(vport);
59 struct net *net = ovs_dp_get_net(vport->dp);
60 __be16 dport = htons(geneve_port->port_no);
61 __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
62
63 return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
64 skb, IPPROTO_UDP, sport, dport);
65}
66
67static struct vport *geneve_tnl_create(const struct vport_parms *parms) 55static struct vport *geneve_tnl_create(const struct vport_parms *parms)
68{ 56{
69 struct net *net = ovs_dp_get_net(parms->dp); 57 struct net *net = ovs_dp_get_net(parms->dp);
@@ -130,7 +118,6 @@ static struct vport_ops ovs_geneve_vport_ops = {
130 .get_options = geneve_get_options, 118 .get_options = geneve_get_options,
131 .send = dev_queue_xmit, 119 .send = dev_queue_xmit,
132 .owner = THIS_MODULE, 120 .owner = THIS_MODULE,
133 .get_egress_tun_info = geneve_get_egress_tun_info,
134}; 121};
135 122
136static int __init ovs_geneve_tnl_init(void) 123static int __init ovs_geneve_tnl_init(void)
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index cdb758ab01cf..c3257d78d3d2 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -84,18 +84,10 @@ static struct vport *gre_create(const struct vport_parms *parms)
84 return ovs_netdev_link(vport, parms->name); 84 return ovs_netdev_link(vport, parms->name);
85} 85}
86 86
87static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
88 struct dp_upcall_info *upcall)
89{
90 return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
91 skb, IPPROTO_GRE, 0, 0);
92}
93
94static struct vport_ops ovs_gre_vport_ops = { 87static struct vport_ops ovs_gre_vport_ops = {
95 .type = OVS_VPORT_TYPE_GRE, 88 .type = OVS_VPORT_TYPE_GRE,
96 .create = gre_create, 89 .create = gre_create,
97 .send = dev_queue_xmit, 90 .send = dev_queue_xmit,
98 .get_egress_tun_info = gre_get_egress_tun_info,
99 .destroy = ovs_netdev_tunnel_destroy, 91 .destroy = ovs_netdev_tunnel_destroy,
100 .owner = THIS_MODULE, 92 .owner = THIS_MODULE,
101}; 93};
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 7f0a8bd08857..ec76398a792f 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -106,12 +106,45 @@ static void internal_dev_destructor(struct net_device *dev)
106 free_netdev(dev); 106 free_netdev(dev);
107} 107}
108 108
109static struct rtnl_link_stats64 *
110internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
111{
112 int i;
113
114 memset(stats, 0, sizeof(*stats));
115 stats->rx_errors = dev->stats.rx_errors;
116 stats->tx_errors = dev->stats.tx_errors;
117 stats->tx_dropped = dev->stats.tx_dropped;
118 stats->rx_dropped = dev->stats.rx_dropped;
119
120 for_each_possible_cpu(i) {
121 const struct pcpu_sw_netstats *percpu_stats;
122 struct pcpu_sw_netstats local_stats;
123 unsigned int start;
124
125 percpu_stats = per_cpu_ptr(dev->tstats, i);
126
127 do {
128 start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
129 local_stats = *percpu_stats;
130 } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
131
132 stats->rx_bytes += local_stats.rx_bytes;
133 stats->rx_packets += local_stats.rx_packets;
134 stats->tx_bytes += local_stats.tx_bytes;
135 stats->tx_packets += local_stats.tx_packets;
136 }
137
138 return stats;
139}
140
109static const struct net_device_ops internal_dev_netdev_ops = { 141static const struct net_device_ops internal_dev_netdev_ops = {
110 .ndo_open = internal_dev_open, 142 .ndo_open = internal_dev_open,
111 .ndo_stop = internal_dev_stop, 143 .ndo_stop = internal_dev_stop,
112 .ndo_start_xmit = internal_dev_xmit, 144 .ndo_start_xmit = internal_dev_xmit,
113 .ndo_set_mac_address = eth_mac_addr, 145 .ndo_set_mac_address = eth_mac_addr,
114 .ndo_change_mtu = internal_dev_change_mtu, 146 .ndo_change_mtu = internal_dev_change_mtu,
147 .ndo_get_stats64 = internal_get_stats,
115}; 148};
116 149
117static struct rtnl_link_ops internal_dev_link_ops __read_mostly = { 150static struct rtnl_link_ops internal_dev_link_ops __read_mostly = {
@@ -161,6 +194,11 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
161 err = -ENOMEM; 194 err = -ENOMEM;
162 goto error_free_vport; 195 goto error_free_vport;
163 } 196 }
197 vport->dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
198 if (!vport->dev->tstats) {
199 err = -ENOMEM;
200 goto error_free_netdev;
201 }
164 202
165 dev_net_set(vport->dev, ovs_dp_get_net(vport->dp)); 203 dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
166 internal_dev = internal_dev_priv(vport->dev); 204 internal_dev = internal_dev_priv(vport->dev);
@@ -173,7 +211,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
173 rtnl_lock(); 211 rtnl_lock();
174 err = register_netdevice(vport->dev); 212 err = register_netdevice(vport->dev);
175 if (err) 213 if (err)
176 goto error_free_netdev; 214 goto error_unlock;
177 215
178 dev_set_promiscuity(vport->dev, 1); 216 dev_set_promiscuity(vport->dev, 1);
179 rtnl_unlock(); 217 rtnl_unlock();
@@ -181,8 +219,10 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
181 219
182 return vport; 220 return vport;
183 221
184error_free_netdev: 222error_unlock:
185 rtnl_unlock(); 223 rtnl_unlock();
224 free_percpu(vport->dev->tstats);
225error_free_netdev:
186 free_netdev(vport->dev); 226 free_netdev(vport->dev);
187error_free_vport: 227error_free_vport:
188 ovs_vport_free(vport); 228 ovs_vport_free(vport);
@@ -198,7 +238,7 @@ static void internal_dev_destroy(struct vport *vport)
198 238
199 /* unregister_netdevice() waits for an RCU grace period. */ 239 /* unregister_netdevice() waits for an RCU grace period. */
200 unregister_netdevice(vport->dev); 240 unregister_netdevice(vport->dev);
201 241 free_percpu(vport->dev->tstats);
202 rtnl_unlock(); 242 rtnl_unlock();
203} 243}
204 244
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 6f700710d413..1605691d9414 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -146,32 +146,12 @@ static struct vport *vxlan_create(const struct vport_parms *parms)
146 return ovs_netdev_link(vport, parms->name); 146 return ovs_netdev_link(vport, parms->name);
147} 147}
148 148
149static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
150 struct dp_upcall_info *upcall)
151{
152 struct vxlan_dev *vxlan = netdev_priv(vport->dev);
153 struct net *net = ovs_dp_get_net(vport->dp);
154 unsigned short family = ip_tunnel_info_af(upcall->egress_tun_info);
155 __be16 dst_port = vxlan_dev_dst_port(vxlan, family);
156 __be16 src_port;
157 int port_min;
158 int port_max;
159
160 inet_get_local_port_range(net, &port_min, &port_max);
161 src_port = udp_flow_src_port(net, skb, 0, 0, true);
162
163 return ovs_tunnel_get_egress_info(upcall, net,
164 skb, IPPROTO_UDP,
165 src_port, dst_port);
166}
167
168static struct vport_ops ovs_vxlan_netdev_vport_ops = { 149static struct vport_ops ovs_vxlan_netdev_vport_ops = {
169 .type = OVS_VPORT_TYPE_VXLAN, 150 .type = OVS_VPORT_TYPE_VXLAN,
170 .create = vxlan_create, 151 .create = vxlan_create,
171 .destroy = ovs_netdev_tunnel_destroy, 152 .destroy = ovs_netdev_tunnel_destroy,
172 .get_options = vxlan_get_options, 153 .get_options = vxlan_get_options,
173 .send = dev_queue_xmit, 154 .send = dev_queue_xmit,
174 .get_egress_tun_info = vxlan_get_egress_tun_info,
175}; 155};
176 156
177static int __init ovs_vxlan_tnl_init(void) 157static int __init ovs_vxlan_tnl_init(void)
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index ef19d0b77d13..0ac0fd004d7e 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -480,64 +480,6 @@ void ovs_vport_deferred_free(struct vport *vport)
480} 480}
481EXPORT_SYMBOL_GPL(ovs_vport_deferred_free); 481EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
482 482
483int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
484 struct net *net,
485 struct sk_buff *skb,
486 u8 ipproto,
487 __be16 tp_src,
488 __be16 tp_dst)
489{
490 struct ip_tunnel_info *egress_tun_info = upcall->egress_tun_info;
491 const struct ip_tunnel_info *tun_info = skb_tunnel_info(skb);
492 const struct ip_tunnel_key *tun_key;
493 u32 skb_mark = skb->mark;
494 struct rtable *rt;
495 struct flowi4 fl;
496
497 if (unlikely(!tun_info))
498 return -EINVAL;
499 if (ip_tunnel_info_af(tun_info) != AF_INET)
500 return -EINVAL;
501
502 tun_key = &tun_info->key;
503
504 /* Route lookup to get srouce IP address.
505 * The process may need to be changed if the corresponding process
506 * in vports ops changed.
507 */
508 rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto);
509 if (IS_ERR(rt))
510 return PTR_ERR(rt);
511
512 ip_rt_put(rt);
513
514 /* Generate egress_tun_info based on tun_info,
515 * saddr, tp_src and tp_dst
516 */
517 ip_tunnel_key_init(&egress_tun_info->key,
518 fl.saddr, tun_key->u.ipv4.dst,
519 tun_key->tos,
520 tun_key->ttl,
521 tp_src, tp_dst,
522 tun_key->tun_id,
523 tun_key->tun_flags);
524 egress_tun_info->options_len = tun_info->options_len;
525 egress_tun_info->mode = tun_info->mode;
526 upcall->egress_tun_opts = ip_tunnel_info_opts(egress_tun_info);
527 return 0;
528}
529EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info);
530
531int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
532 struct dp_upcall_info *upcall)
533{
534 /* get_egress_tun_info() is only implemented on tunnel ports. */
535 if (unlikely(!vport->ops->get_egress_tun_info))
536 return -EINVAL;
537
538 return vport->ops->get_egress_tun_info(vport, skb, upcall);
539}
540
541static unsigned int packet_length(const struct sk_buff *skb) 483static unsigned int packet_length(const struct sk_buff *skb)
542{ 484{
543 unsigned int length = skb->len - ETH_HLEN; 485 unsigned int length = skb->len - ETH_HLEN;
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 885607f28d56..bdfd82a7c064 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -27,7 +27,6 @@
27#include <linux/skbuff.h> 27#include <linux/skbuff.h>
28#include <linux/spinlock.h> 28#include <linux/spinlock.h>
29#include <linux/u64_stats_sync.h> 29#include <linux/u64_stats_sync.h>
30#include <net/route.h>
31 30
32#include "datapath.h" 31#include "datapath.h"
33 32
@@ -53,16 +52,6 @@ int ovs_vport_set_upcall_portids(struct vport *, const struct nlattr *pids);
53int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *); 52int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
54u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *); 53u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
55 54
56int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
57 struct net *net,
58 struct sk_buff *,
59 u8 ipproto,
60 __be16 tp_src,
61 __be16 tp_dst);
62
63int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
64 struct dp_upcall_info *upcall);
65
66/** 55/**
67 * struct vport_portids - array of netlink portids of a vport. 56 * struct vport_portids - array of netlink portids of a vport.
68 * must be protected by rcu. 57 * must be protected by rcu.
@@ -140,8 +129,6 @@ struct vport_parms {
140 * have any configuration. 129 * have any configuration.
141 * @send: Send a packet on the device. 130 * @send: Send a packet on the device.
142 * zero for dropped packets or negative for error. 131 * zero for dropped packets or negative for error.
143 * @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for
144 * a packet.
145 */ 132 */
146struct vport_ops { 133struct vport_ops {
147 enum ovs_vport_type type; 134 enum ovs_vport_type type;
@@ -154,9 +141,6 @@ struct vport_ops {
154 int (*get_options)(const struct vport *, struct sk_buff *); 141 int (*get_options)(const struct vport *, struct sk_buff *);
155 142
156 netdev_tx_t (*send) (struct sk_buff *skb); 143 netdev_tx_t (*send) (struct sk_buff *skb);
157 int (*get_egress_tun_info)(struct vport *, struct sk_buff *,
158 struct dp_upcall_info *upcall);
159
160 struct module *owner; 144 struct module *owner;
161 struct list_head list; 145 struct list_head list;
162}; 146};
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index e7000be321b0..ed98c1fc3de1 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -94,10 +94,14 @@ __init int net_sysctl_init(void)
94 goto out; 94 goto out;
95 ret = register_pernet_subsys(&sysctl_pernet_ops); 95 ret = register_pernet_subsys(&sysctl_pernet_ops);
96 if (ret) 96 if (ret)
97 goto out; 97 goto out1;
98 register_sysctl_root(&net_sysctl_root); 98 register_sysctl_root(&net_sysctl_root);
99out: 99out:
100 return ret; 100 return ret;
101out1:
102 unregister_sysctl_table(net_header);
103 net_header = NULL;
104 goto out;
101} 105}
102 106
103struct ctl_table_header *register_net_sysctl(struct net *net, 107struct ctl_table_header *register_net_sysctl(struct net *net,
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 41042de3ae9b..eadba62afa85 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -42,7 +42,8 @@
42#include "core.h" 42#include "core.h"
43 43
44#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ 44#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */
45#define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */ 45#define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */
46#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */
46 47
47const char tipc_bclink_name[] = "broadcast-link"; 48const char tipc_bclink_name[] = "broadcast-link";
48 49
@@ -908,9 +909,10 @@ int tipc_bclink_set_queue_limits(struct net *net, u32 limit)
908 909
909 if (!bcl) 910 if (!bcl)
910 return -ENOPROTOOPT; 911 return -ENOPROTOOPT;
911 if ((limit < TIPC_MIN_LINK_WIN) || (limit > TIPC_MAX_LINK_WIN)) 912 if (limit < BCLINK_WIN_MIN)
913 limit = BCLINK_WIN_MIN;
914 if (limit > TIPC_MAX_LINK_WIN)
912 return -EINVAL; 915 return -EINVAL;
913
914 tipc_bclink_lock(net); 916 tipc_bclink_lock(net);
915 tipc_link_set_queue_limits(bcl, limit); 917 tipc_link_set_queue_limits(bcl, limit);
916 tipc_bclink_unlock(net); 918 tipc_bclink_unlock(net);
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 454f5ec275c8..26d38b3d8760 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -121,7 +121,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
121{ 121{
122 struct sk_buff *head = *headbuf; 122 struct sk_buff *head = *headbuf;
123 struct sk_buff *frag = *buf; 123 struct sk_buff *frag = *buf;
124 struct sk_buff *tail; 124 struct sk_buff *tail = NULL;
125 struct tipc_msg *msg; 125 struct tipc_msg *msg;
126 u32 fragid; 126 u32 fragid;
127 int delta; 127 int delta;
@@ -141,9 +141,15 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
141 if (unlikely(skb_unclone(frag, GFP_ATOMIC))) 141 if (unlikely(skb_unclone(frag, GFP_ATOMIC)))
142 goto err; 142 goto err;
143 head = *headbuf = frag; 143 head = *headbuf = frag;
144 skb_frag_list_init(head);
145 TIPC_SKB_CB(head)->tail = NULL;
146 *buf = NULL; 144 *buf = NULL;
145 TIPC_SKB_CB(head)->tail = NULL;
146 if (skb_is_nonlinear(head)) {
147 skb_walk_frags(head, tail) {
148 TIPC_SKB_CB(head)->tail = tail;
149 }
150 } else {
151 skb_frag_list_init(head);
152 }
147 return 0; 153 return 0;
148 } 154 }
149 155
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 9bc0b1e515fa..0021c01dec17 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -52,6 +52,8 @@
52/* IANA assigned UDP port */ 52/* IANA assigned UDP port */
53#define UDP_PORT_DEFAULT 6118 53#define UDP_PORT_DEFAULT 6118
54 54
55#define UDP_MIN_HEADROOM 28
56
55static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = { 57static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
56 [TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC}, 58 [TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC},
57 [TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY, 59 [TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY,
@@ -156,6 +158,9 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
156 struct sk_buff *clone; 158 struct sk_buff *clone;
157 struct rtable *rt; 159 struct rtable *rt;
158 160
161 if (skb_headroom(skb) < UDP_MIN_HEADROOM)
162 pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
163
159 clone = skb_clone(skb, GFP_ATOMIC); 164 clone = skb_clone(skb, GFP_ATOMIC);
160 skb_set_inner_protocol(clone, htons(ETH_P_TIPC)); 165 skb_set_inner_protocol(clone, htons(ETH_P_TIPC));
161 ub = rcu_dereference_rtnl(b->media_ptr); 166 ub = rcu_dereference_rtnl(b->media_ptr);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index df5fc6b340f1..00e8a349aabc 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1948,13 +1948,13 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
1948 err = misc_register(&vsock_device); 1948 err = misc_register(&vsock_device);
1949 if (err) { 1949 if (err) {
1950 pr_err("Failed to register misc device\n"); 1950 pr_err("Failed to register misc device\n");
1951 return -ENOENT; 1951 goto err_reset_transport;
1952 } 1952 }
1953 1953
1954 err = proto_register(&vsock_proto, 1); /* we want our slab */ 1954 err = proto_register(&vsock_proto, 1); /* we want our slab */
1955 if (err) { 1955 if (err) {
1956 pr_err("Cannot register vsock protocol\n"); 1956 pr_err("Cannot register vsock protocol\n");
1957 goto err_misc_deregister; 1957 goto err_deregister_misc;
1958 } 1958 }
1959 1959
1960 err = sock_register(&vsock_family_ops); 1960 err = sock_register(&vsock_family_ops);
@@ -1969,8 +1969,9 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
1969 1969
1970err_unregister_proto: 1970err_unregister_proto:
1971 proto_unregister(&vsock_proto); 1971 proto_unregister(&vsock_proto);
1972err_misc_deregister: 1972err_deregister_misc:
1973 misc_deregister(&vsock_device); 1973 misc_deregister(&vsock_device);
1974err_reset_transport:
1974 transport = NULL; 1975 transport = NULL;
1975err_busy: 1976err_busy:
1976 mutex_unlock(&vsock_register_mutex); 1977 mutex_unlock(&vsock_register_mutex);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 1f63daff3965..7555cad83a75 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -40,13 +40,11 @@
40 40
41static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg); 41static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg);
42static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg); 42static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg);
43static void vmci_transport_peer_attach_cb(u32 sub_id,
44 const struct vmci_event_data *ed,
45 void *client_data);
46static void vmci_transport_peer_detach_cb(u32 sub_id, 43static void vmci_transport_peer_detach_cb(u32 sub_id,
47 const struct vmci_event_data *ed, 44 const struct vmci_event_data *ed,
48 void *client_data); 45 void *client_data);
49static void vmci_transport_recv_pkt_work(struct work_struct *work); 46static void vmci_transport_recv_pkt_work(struct work_struct *work);
47static void vmci_transport_cleanup(struct work_struct *work);
50static int vmci_transport_recv_listen(struct sock *sk, 48static int vmci_transport_recv_listen(struct sock *sk,
51 struct vmci_transport_packet *pkt); 49 struct vmci_transport_packet *pkt);
52static int vmci_transport_recv_connecting_server( 50static int vmci_transport_recv_connecting_server(
@@ -75,6 +73,10 @@ struct vmci_transport_recv_pkt_info {
75 struct vmci_transport_packet pkt; 73 struct vmci_transport_packet pkt;
76}; 74};
77 75
76static LIST_HEAD(vmci_transport_cleanup_list);
77static DEFINE_SPINLOCK(vmci_transport_cleanup_lock);
78static DECLARE_WORK(vmci_transport_cleanup_work, vmci_transport_cleanup);
79
78static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID, 80static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID,
79 VMCI_INVALID_ID }; 81 VMCI_INVALID_ID };
80static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; 82static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
@@ -791,44 +793,6 @@ out:
791 return err; 793 return err;
792} 794}
793 795
794static void vmci_transport_peer_attach_cb(u32 sub_id,
795 const struct vmci_event_data *e_data,
796 void *client_data)
797{
798 struct sock *sk = client_data;
799 const struct vmci_event_payload_qp *e_payload;
800 struct vsock_sock *vsk;
801
802 e_payload = vmci_event_data_const_payload(e_data);
803
804 vsk = vsock_sk(sk);
805
806 /* We don't ask for delayed CBs when we subscribe to this event (we
807 * pass 0 as flags to vmci_event_subscribe()). VMCI makes no
808 * guarantees in that case about what context we might be running in,
809 * so it could be BH or process, blockable or non-blockable. So we
810 * need to account for all possible contexts here.
811 */
812 local_bh_disable();
813 bh_lock_sock(sk);
814
815 /* XXX This is lame, we should provide a way to lookup sockets by
816 * qp_handle.
817 */
818 if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
819 e_payload->handle)) {
820 /* XXX This doesn't do anything, but in the future we may want
821 * to set a flag here to verify the attach really did occur and
822 * we weren't just sent a datagram claiming it was.
823 */
824 goto out;
825 }
826
827out:
828 bh_unlock_sock(sk);
829 local_bh_enable();
830}
831
832static void vmci_transport_handle_detach(struct sock *sk) 796static void vmci_transport_handle_detach(struct sock *sk)
833{ 797{
834 struct vsock_sock *vsk; 798 struct vsock_sock *vsk;
@@ -871,28 +835,38 @@ static void vmci_transport_peer_detach_cb(u32 sub_id,
871 const struct vmci_event_data *e_data, 835 const struct vmci_event_data *e_data,
872 void *client_data) 836 void *client_data)
873{ 837{
874 struct sock *sk = client_data; 838 struct vmci_transport *trans = client_data;
875 const struct vmci_event_payload_qp *e_payload; 839 const struct vmci_event_payload_qp *e_payload;
876 struct vsock_sock *vsk;
877 840
878 e_payload = vmci_event_data_const_payload(e_data); 841 e_payload = vmci_event_data_const_payload(e_data);
879 vsk = vsock_sk(sk);
880 if (vmci_handle_is_invalid(e_payload->handle))
881 return;
882
883 /* Same rules for locking as for peer_attach_cb(). */
884 local_bh_disable();
885 bh_lock_sock(sk);
886 842
887 /* XXX This is lame, we should provide a way to lookup sockets by 843 /* XXX This is lame, we should provide a way to lookup sockets by
888 * qp_handle. 844 * qp_handle.
889 */ 845 */
890 if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle, 846 if (vmci_handle_is_invalid(e_payload->handle) ||
891 e_payload->handle)) 847 vmci_handle_is_equal(trans->qp_handle, e_payload->handle))
892 vmci_transport_handle_detach(sk); 848 return;
893 849
894 bh_unlock_sock(sk); 850 /* We don't ask for delayed CBs when we subscribe to this event (we
895 local_bh_enable(); 851 * pass 0 as flags to vmci_event_subscribe()). VMCI makes no
852 * guarantees in that case about what context we might be running in,
853 * so it could be BH or process, blockable or non-blockable. So we
854 * need to account for all possible contexts here.
855 */
856 spin_lock_bh(&trans->lock);
857 if (!trans->sk)
858 goto out;
859
860 /* Apart from here, trans->lock is only grabbed as part of sk destruct,
861 * where trans->sk isn't locked.
862 */
863 bh_lock_sock(trans->sk);
864
865 vmci_transport_handle_detach(trans->sk);
866
867 bh_unlock_sock(trans->sk);
868 out:
869 spin_unlock_bh(&trans->lock);
896} 870}
897 871
898static void vmci_transport_qp_resumed_cb(u32 sub_id, 872static void vmci_transport_qp_resumed_cb(u32 sub_id,
@@ -1181,7 +1155,7 @@ vmci_transport_recv_connecting_server(struct sock *listener,
1181 */ 1155 */
1182 err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, 1156 err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
1183 vmci_transport_peer_detach_cb, 1157 vmci_transport_peer_detach_cb,
1184 pending, &detach_sub_id); 1158 vmci_trans(vpending), &detach_sub_id);
1185 if (err < VMCI_SUCCESS) { 1159 if (err < VMCI_SUCCESS) {
1186 vmci_transport_send_reset(pending, pkt); 1160 vmci_transport_send_reset(pending, pkt);
1187 err = vmci_transport_error_to_vsock_error(err); 1161 err = vmci_transport_error_to_vsock_error(err);
@@ -1321,7 +1295,6 @@ vmci_transport_recv_connecting_client(struct sock *sk,
1321 || vmci_trans(vsk)->qpair 1295 || vmci_trans(vsk)->qpair
1322 || vmci_trans(vsk)->produce_size != 0 1296 || vmci_trans(vsk)->produce_size != 0
1323 || vmci_trans(vsk)->consume_size != 0 1297 || vmci_trans(vsk)->consume_size != 0
1324 || vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID
1325 || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { 1298 || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
1326 skerr = EPROTO; 1299 skerr = EPROTO;
1327 err = -EINVAL; 1300 err = -EINVAL;
@@ -1389,7 +1362,6 @@ static int vmci_transport_recv_connecting_client_negotiate(
1389 struct vsock_sock *vsk; 1362 struct vsock_sock *vsk;
1390 struct vmci_handle handle; 1363 struct vmci_handle handle;
1391 struct vmci_qp *qpair; 1364 struct vmci_qp *qpair;
1392 u32 attach_sub_id;
1393 u32 detach_sub_id; 1365 u32 detach_sub_id;
1394 bool is_local; 1366 bool is_local;
1395 u32 flags; 1367 u32 flags;
@@ -1399,7 +1371,6 @@ static int vmci_transport_recv_connecting_client_negotiate(
1399 1371
1400 vsk = vsock_sk(sk); 1372 vsk = vsock_sk(sk);
1401 handle = VMCI_INVALID_HANDLE; 1373 handle = VMCI_INVALID_HANDLE;
1402 attach_sub_id = VMCI_INVALID_ID;
1403 detach_sub_id = VMCI_INVALID_ID; 1374 detach_sub_id = VMCI_INVALID_ID;
1404 1375
1405 /* If we have gotten here then we should be past the point where old 1376 /* If we have gotten here then we should be past the point where old
@@ -1444,23 +1415,15 @@ static int vmci_transport_recv_connecting_client_negotiate(
1444 goto destroy; 1415 goto destroy;
1445 } 1416 }
1446 1417
1447 /* Subscribe to attach and detach events first. 1418 /* Subscribe to detach events first.
1448 * 1419 *
1449 * XXX We attach once for each queue pair created for now so it is easy 1420 * XXX We attach once for each queue pair created for now so it is easy
1450 * to find the socket (it's provided), but later we should only 1421 * to find the socket (it's provided), but later we should only
1451 * subscribe once and add a way to lookup sockets by queue pair handle. 1422 * subscribe once and add a way to lookup sockets by queue pair handle.
1452 */ 1423 */
1453 err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_ATTACH,
1454 vmci_transport_peer_attach_cb,
1455 sk, &attach_sub_id);
1456 if (err < VMCI_SUCCESS) {
1457 err = vmci_transport_error_to_vsock_error(err);
1458 goto destroy;
1459 }
1460
1461 err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, 1424 err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
1462 vmci_transport_peer_detach_cb, 1425 vmci_transport_peer_detach_cb,
1463 sk, &detach_sub_id); 1426 vmci_trans(vsk), &detach_sub_id);
1464 if (err < VMCI_SUCCESS) { 1427 if (err < VMCI_SUCCESS) {
1465 err = vmci_transport_error_to_vsock_error(err); 1428 err = vmci_transport_error_to_vsock_error(err);
1466 goto destroy; 1429 goto destroy;
@@ -1496,7 +1459,6 @@ static int vmci_transport_recv_connecting_client_negotiate(
1496 vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 1459 vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size =
1497 pkt->u.size; 1460 pkt->u.size;
1498 1461
1499 vmci_trans(vsk)->attach_sub_id = attach_sub_id;
1500 vmci_trans(vsk)->detach_sub_id = detach_sub_id; 1462 vmci_trans(vsk)->detach_sub_id = detach_sub_id;
1501 1463
1502 vmci_trans(vsk)->notify_ops->process_negotiate(sk); 1464 vmci_trans(vsk)->notify_ops->process_negotiate(sk);
@@ -1504,9 +1466,6 @@ static int vmci_transport_recv_connecting_client_negotiate(
1504 return 0; 1466 return 0;
1505 1467
1506destroy: 1468destroy:
1507 if (attach_sub_id != VMCI_INVALID_ID)
1508 vmci_event_unsubscribe(attach_sub_id);
1509
1510 if (detach_sub_id != VMCI_INVALID_ID) 1469 if (detach_sub_id != VMCI_INVALID_ID)
1511 vmci_event_unsubscribe(detach_sub_id); 1470 vmci_event_unsubscribe(detach_sub_id);
1512 1471
@@ -1607,9 +1566,11 @@ static int vmci_transport_socket_init(struct vsock_sock *vsk,
1607 vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; 1566 vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
1608 vmci_trans(vsk)->qpair = NULL; 1567 vmci_trans(vsk)->qpair = NULL;
1609 vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0; 1568 vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0;
1610 vmci_trans(vsk)->attach_sub_id = vmci_trans(vsk)->detach_sub_id = 1569 vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID;
1611 VMCI_INVALID_ID;
1612 vmci_trans(vsk)->notify_ops = NULL; 1570 vmci_trans(vsk)->notify_ops = NULL;
1571 INIT_LIST_HEAD(&vmci_trans(vsk)->elem);
1572 vmci_trans(vsk)->sk = &vsk->sk;
1573 spin_lock_init(&vmci_trans(vsk)->lock);
1613 if (psk) { 1574 if (psk) {
1614 vmci_trans(vsk)->queue_pair_size = 1575 vmci_trans(vsk)->queue_pair_size =
1615 vmci_trans(psk)->queue_pair_size; 1576 vmci_trans(psk)->queue_pair_size;
@@ -1629,29 +1590,57 @@ static int vmci_transport_socket_init(struct vsock_sock *vsk,
1629 return 0; 1590 return 0;
1630} 1591}
1631 1592
1632static void vmci_transport_destruct(struct vsock_sock *vsk) 1593static void vmci_transport_free_resources(struct list_head *transport_list)
1633{ 1594{
1634 if (vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID) { 1595 while (!list_empty(transport_list)) {
1635 vmci_event_unsubscribe(vmci_trans(vsk)->attach_sub_id); 1596 struct vmci_transport *transport =
1636 vmci_trans(vsk)->attach_sub_id = VMCI_INVALID_ID; 1597 list_first_entry(transport_list, struct vmci_transport,
1637 } 1598 elem);
1599 list_del(&transport->elem);
1638 1600
1639 if (vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { 1601 if (transport->detach_sub_id != VMCI_INVALID_ID) {
1640 vmci_event_unsubscribe(vmci_trans(vsk)->detach_sub_id); 1602 vmci_event_unsubscribe(transport->detach_sub_id);
1641 vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID; 1603 transport->detach_sub_id = VMCI_INVALID_ID;
1642 } 1604 }
1643 1605
1644 if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { 1606 if (!vmci_handle_is_invalid(transport->qp_handle)) {
1645 vmci_qpair_detach(&vmci_trans(vsk)->qpair); 1607 vmci_qpair_detach(&transport->qpair);
1646 vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; 1608 transport->qp_handle = VMCI_INVALID_HANDLE;
1647 vmci_trans(vsk)->produce_size = 0; 1609 transport->produce_size = 0;
1648 vmci_trans(vsk)->consume_size = 0; 1610 transport->consume_size = 0;
1611 }
1612
1613 kfree(transport);
1649 } 1614 }
1615}
1616
1617static void vmci_transport_cleanup(struct work_struct *work)
1618{
1619 LIST_HEAD(pending);
1620
1621 spin_lock_bh(&vmci_transport_cleanup_lock);
1622 list_replace_init(&vmci_transport_cleanup_list, &pending);
1623 spin_unlock_bh(&vmci_transport_cleanup_lock);
1624 vmci_transport_free_resources(&pending);
1625}
1626
1627static void vmci_transport_destruct(struct vsock_sock *vsk)
1628{
1629 /* Ensure that the detach callback doesn't use the sk/vsk
1630 * we are about to destruct.
1631 */
1632 spin_lock_bh(&vmci_trans(vsk)->lock);
1633 vmci_trans(vsk)->sk = NULL;
1634 spin_unlock_bh(&vmci_trans(vsk)->lock);
1650 1635
1651 if (vmci_trans(vsk)->notify_ops) 1636 if (vmci_trans(vsk)->notify_ops)
1652 vmci_trans(vsk)->notify_ops->socket_destruct(vsk); 1637 vmci_trans(vsk)->notify_ops->socket_destruct(vsk);
1653 1638
1654 kfree(vsk->trans); 1639 spin_lock_bh(&vmci_transport_cleanup_lock);
1640 list_add(&vmci_trans(vsk)->elem, &vmci_transport_cleanup_list);
1641 spin_unlock_bh(&vmci_transport_cleanup_lock);
1642 schedule_work(&vmci_transport_cleanup_work);
1643
1655 vsk->trans = NULL; 1644 vsk->trans = NULL;
1656} 1645}
1657 1646
@@ -2146,6 +2135,9 @@ module_init(vmci_transport_init);
2146 2135
2147static void __exit vmci_transport_exit(void) 2136static void __exit vmci_transport_exit(void)
2148{ 2137{
2138 cancel_work_sync(&vmci_transport_cleanup_work);
2139 vmci_transport_free_resources(&vmci_transport_cleanup_list);
2140
2149 if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) { 2141 if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) {
2150 if (vmci_datagram_destroy_handle( 2142 if (vmci_datagram_destroy_handle(
2151 vmci_transport_stream_handle) != VMCI_SUCCESS) 2143 vmci_transport_stream_handle) != VMCI_SUCCESS)
@@ -2164,6 +2156,7 @@ module_exit(vmci_transport_exit);
2164 2156
2165MODULE_AUTHOR("VMware, Inc."); 2157MODULE_AUTHOR("VMware, Inc.");
2166MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); 2158MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
2159MODULE_VERSION("1.0.2.0-k");
2167MODULE_LICENSE("GPL v2"); 2160MODULE_LICENSE("GPL v2");
2168MODULE_ALIAS("vmware_vsock"); 2161MODULE_ALIAS("vmware_vsock");
2169MODULE_ALIAS_NETPROTO(PF_VSOCK); 2162MODULE_ALIAS_NETPROTO(PF_VSOCK);
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
index ce6c9623d5f0..2ad46f39649f 100644
--- a/net/vmw_vsock/vmci_transport.h
+++ b/net/vmw_vsock/vmci_transport.h
@@ -119,10 +119,12 @@ struct vmci_transport {
119 u64 queue_pair_size; 119 u64 queue_pair_size;
120 u64 queue_pair_min_size; 120 u64 queue_pair_min_size;
121 u64 queue_pair_max_size; 121 u64 queue_pair_max_size;
122 u32 attach_sub_id;
123 u32 detach_sub_id; 122 u32 detach_sub_id;
124 union vmci_transport_notify notify; 123 union vmci_transport_notify notify;
125 struct vmci_transport_notify_ops *notify_ops; 124 struct vmci_transport_notify_ops *notify_ops;
125 struct list_head elem;
126 struct sock *sk;
127 spinlock_t lock; /* protects sk. */
126}; 128};
127 129
128int vmci_transport_register(void); 130int vmci_transport_register(void);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index a8de9e300200..24e06a2377f6 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1928,8 +1928,10 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
1928 struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; 1928 struct nlattr *rp = attrs[XFRMA_REPLAY_VAL];
1929 struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL]; 1929 struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL];
1930 struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; 1930 struct nlattr *lt = attrs[XFRMA_LTIME_VAL];
1931 struct nlattr *et = attrs[XFRMA_ETIMER_THRESH];
1932 struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH];
1931 1933
1932 if (!lt && !rp && !re) 1934 if (!lt && !rp && !re && !et && !rt)
1933 return err; 1935 return err;
1934 1936
1935 /* pedantic mode - thou shalt sayeth replaceth */ 1937 /* pedantic mode - thou shalt sayeth replaceth */