aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-15 23:30:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-15 23:30:30 -0400
commit10ec34fcb100412ab186c141a9c3557d1270effd (patch)
tree7bf90280ecac27d6ffda091a171f41ea6e72ba78 /net
parent55101e2d6ce1c780f6ee8fee5f37306971aac6cd (diff)
parent6f1d7210376727d090e04b8635e6dda4d7eb7b0c (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: 1) Fix BPF filter validation of netlink attribute accesses, from Mathias Kruase. 2) Netfilter conntrack generation seqcount not initialized properly, from Andrey Vagin. 3) Fix comparison mask computation on big-endian in nft_cmp_fast(), from Patrick McHardy. 4) Properly limit MTU over ipv6, from Eric Dumazet. 5) Fix seccomp system call argument population on 32-bit, from Daniel Borkmann. 6) skb_network_protocol() should not use hard-coded ETH_HLEN, instead skb->mac_len needs to be used. From Vlad Yasevich. 7) We have several cases of using socket based communications to implement a tunnel. For example, some tunnels are encapsulations over UDP so we use an internal kernel UDP socket to do the transmits. These tunnels should behave just like other software devices and pass the packets on down to the next layer. Most importantly we want the top-level socket (eg TCP) that created the traffic to be charged for the SKB memory. However, once you get into the IP output path, we have code that assumed that whatever was attached to skb->sk is an IP socket. To keep the top-level socket being charged for the SKB memory, whilst satisfying the needs of the IP output path, we now pass in an explicit 'sk' argument. From Eric Dumazet. 8) ping_init_sock() leaks group info, from Xiaoming Wang. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (33 commits) cxgb4: use the correct max size for firmware flash qlcnic: Fix MSI-X initialization code ip6_gre: don't allow to remove the fb_tunnel_dev ipv4: add a sock pointer to dst->output() path. ipv4: add a sock pointer to ip_queue_xmit() driver/net: cosa driver uses udelay incorrectly at86rf230: fix __at86rf230_read_subreg function at86rf230: remove check if AVDD settled net: cadence: Add architecture dependencies net: Start with correct mac_len in skb_network_protocol Revert "net: sctp: Fix a_rwnd/rwnd management to reflect real state of the receiver's buffer" cxgb4: Save the correct mac addr for hw-loopback connections in the L2T net: filter: seccomp: fix wrong decoding of BPF_S_ANC_SECCOMP_LD_W seccomp: fix populating a0-a5 syscall args in 32-bit x86 BPF qlcnic: Do not disable SR-IOV when VFs are assigned to VMs qlcnic: Fix QLogic application/driver interface for virtual NIC configuration qlcnic: Fix PVID configuration on eSwitch port. qlcnic: Fix max ring count calculation qlcnic: Fix to send INIT_NIC_FUNC as first mailbox. qlcnic: Fix panic due to uninitialzed delayed_work struct in use. ...
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/dst.c15
-rw-r--r--net/core/filter.c9
-rw-r--r--net/dccp/output.c2
-rw-r--r--net/decnet/dn_route.c16
-rw-r--r--net/ipv4/ip_output.c16
-rw-r--r--net/ipv4/ip_tunnel.c2
-rw-r--r--net/ipv4/ip_tunnel_core.c4
-rw-r--r--net/ipv4/ping.c15
-rw-r--r--net/ipv4/route.c6
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv4/xfrm4_output.c2
-rw-r--r--net/ipv6/inet6_connection_sock.c3
-rw-r--r--net/ipv6/ip6_gre.c10
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/route.c19
-rw-r--r--net/ipv6/sit.c5
-rw-r--r--net/ipv6/xfrm6_output.c2
-rw-r--r--net/l2tp/l2tp_core.c4
-rw-r--r--net/l2tp/l2tp_ip.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c1
-rw-r--r--net/netfilter/nf_conntrack_pptp.c20
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c3
-rw-r--r--net/netfilter/nf_tables_core.c3
-rw-r--r--net/netfilter/nft_cmp.c2
-rw-r--r--net/openvswitch/vport-gre.c2
-rw-r--r--net/sctp/associola.c82
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/sctp/sm_statefuns.c2
-rw-r--r--net/sctp/socket.c6
-rw-r--r--net/sctp/ulpevent.c8
-rw-r--r--net/xfrm/xfrm_policy.c2
32 files changed, 170 insertions, 101 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 14dac0654f28..5b3042e69f85 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2284,7 +2284,7 @@ EXPORT_SYMBOL(skb_checksum_help);
2284__be16 skb_network_protocol(struct sk_buff *skb, int *depth) 2284__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
2285{ 2285{
2286 __be16 type = skb->protocol; 2286 __be16 type = skb->protocol;
2287 int vlan_depth = ETH_HLEN; 2287 int vlan_depth = skb->mac_len;
2288 2288
2289 /* Tunnel gso handlers can set protocol to ethernet. */ 2289 /* Tunnel gso handlers can set protocol to ethernet. */
2290 if (type == htons(ETH_P_TEB)) { 2290 if (type == htons(ETH_P_TEB)) {
diff --git a/net/core/dst.c b/net/core/dst.c
index ca4231ec7347..80d6286c8b62 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -142,12 +142,12 @@ loop:
142 mutex_unlock(&dst_gc_mutex); 142 mutex_unlock(&dst_gc_mutex);
143} 143}
144 144
145int dst_discard(struct sk_buff *skb) 145int dst_discard_sk(struct sock *sk, struct sk_buff *skb)
146{ 146{
147 kfree_skb(skb); 147 kfree_skb(skb);
148 return 0; 148 return 0;
149} 149}
150EXPORT_SYMBOL(dst_discard); 150EXPORT_SYMBOL(dst_discard_sk);
151 151
152const u32 dst_default_metrics[RTAX_MAX + 1] = { 152const u32 dst_default_metrics[RTAX_MAX + 1] = {
153 /* This initializer is needed to force linker to place this variable 153 /* This initializer is needed to force linker to place this variable
@@ -184,7 +184,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
184 dst->xfrm = NULL; 184 dst->xfrm = NULL;
185#endif 185#endif
186 dst->input = dst_discard; 186 dst->input = dst_discard;
187 dst->output = dst_discard; 187 dst->output = dst_discard_sk;
188 dst->error = 0; 188 dst->error = 0;
189 dst->obsolete = initial_obsolete; 189 dst->obsolete = initial_obsolete;
190 dst->header_len = 0; 190 dst->header_len = 0;
@@ -209,8 +209,10 @@ static void ___dst_free(struct dst_entry *dst)
209 /* The first case (dev==NULL) is required, when 209 /* The first case (dev==NULL) is required, when
210 protocol module is unloaded. 210 protocol module is unloaded.
211 */ 211 */
212 if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) 212 if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
213 dst->input = dst->output = dst_discard; 213 dst->input = dst_discard;
214 dst->output = dst_discard_sk;
215 }
214 dst->obsolete = DST_OBSOLETE_DEAD; 216 dst->obsolete = DST_OBSOLETE_DEAD;
215} 217}
216 218
@@ -361,7 +363,8 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
361 return; 363 return;
362 364
363 if (!unregister) { 365 if (!unregister) {
364 dst->input = dst->output = dst_discard; 366 dst->input = dst_discard;
367 dst->output = dst_discard_sk;
365 } else { 368 } else {
366 dst->dev = dev_net(dst->dev)->loopback_dev; 369 dst->dev = dev_net(dst->dev)->loopback_dev;
367 dev_hold(dst->dev); 370 dev_hold(dst->dev);
diff --git a/net/core/filter.c b/net/core/filter.c
index e08b3822c72a..cd58614660cf 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -600,6 +600,9 @@ static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
600 if (skb_is_nonlinear(skb)) 600 if (skb_is_nonlinear(skb))
601 return 0; 601 return 0;
602 602
603 if (skb->len < sizeof(struct nlattr))
604 return 0;
605
603 if (A > skb->len - sizeof(struct nlattr)) 606 if (A > skb->len - sizeof(struct nlattr))
604 return 0; 607 return 0;
605 608
@@ -618,11 +621,14 @@ static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
618 if (skb_is_nonlinear(skb)) 621 if (skb_is_nonlinear(skb))
619 return 0; 622 return 0;
620 623
624 if (skb->len < sizeof(struct nlattr))
625 return 0;
626
621 if (A > skb->len - sizeof(struct nlattr)) 627 if (A > skb->len - sizeof(struct nlattr))
622 return 0; 628 return 0;
623 629
624 nla = (struct nlattr *) &skb->data[A]; 630 nla = (struct nlattr *) &skb->data[A];
625 if (nla->nla_len > A - skb->len) 631 if (nla->nla_len > skb->len - A)
626 return 0; 632 return 0;
627 633
628 nla = nla_find_nested(nla, X); 634 nla = nla_find_nested(nla, X);
@@ -1737,7 +1743,6 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
1737 [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS, 1743 [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS,
1738 [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS, 1744 [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS,
1739 [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS, 1745 [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS,
1740 [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS,
1741 [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, 1746 [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS,
1742 [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, 1747 [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
1743 [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS, 1748 [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS,
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 8876078859da..0248e8a3460c 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -138,7 +138,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
138 138
139 DCCP_INC_STATS(DCCP_MIB_OUTSEGS); 139 DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
140 140
141 err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl); 141 err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
142 return net_xmit_eval(err); 142 return net_xmit_eval(err);
143 } 143 }
144 return -ENOBUFS; 144 return -ENOBUFS;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index ce0cbbfe0f43..daccc4a36d80 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -752,7 +752,7 @@ static int dn_to_neigh_output(struct sk_buff *skb)
752 return n->output(n, skb); 752 return n->output(n, skb);
753} 753}
754 754
755static int dn_output(struct sk_buff *skb) 755static int dn_output(struct sock *sk, struct sk_buff *skb)
756{ 756{
757 struct dst_entry *dst = skb_dst(skb); 757 struct dst_entry *dst = skb_dst(skb);
758 struct dn_route *rt = (struct dn_route *)dst; 758 struct dn_route *rt = (struct dn_route *)dst;
@@ -838,6 +838,18 @@ drop:
838 * Used to catch bugs. This should never normally get 838 * Used to catch bugs. This should never normally get
839 * called. 839 * called.
840 */ 840 */
841static int dn_rt_bug_sk(struct sock *sk, struct sk_buff *skb)
842{
843 struct dn_skb_cb *cb = DN_SKB_CB(skb);
844
845 net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n",
846 le16_to_cpu(cb->src), le16_to_cpu(cb->dst));
847
848 kfree_skb(skb);
849
850 return NET_RX_DROP;
851}
852
841static int dn_rt_bug(struct sk_buff *skb) 853static int dn_rt_bug(struct sk_buff *skb)
842{ 854{
843 struct dn_skb_cb *cb = DN_SKB_CB(skb); 855 struct dn_skb_cb *cb = DN_SKB_CB(skb);
@@ -1463,7 +1475,7 @@ make_route:
1463 1475
1464 rt->n = neigh; 1476 rt->n = neigh;
1465 rt->dst.lastuse = jiffies; 1477 rt->dst.lastuse = jiffies;
1466 rt->dst.output = dn_rt_bug; 1478 rt->dst.output = dn_rt_bug_sk;
1467 switch (res.type) { 1479 switch (res.type) {
1468 case RTN_UNICAST: 1480 case RTN_UNICAST:
1469 rt->dst.input = dn_forward; 1481 rt->dst.input = dn_forward;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1a0755fea491..1cbeba5edff9 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -101,17 +101,17 @@ int __ip_local_out(struct sk_buff *skb)
101 skb_dst(skb)->dev, dst_output); 101 skb_dst(skb)->dev, dst_output);
102} 102}
103 103
104int ip_local_out(struct sk_buff *skb) 104int ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
105{ 105{
106 int err; 106 int err;
107 107
108 err = __ip_local_out(skb); 108 err = __ip_local_out(skb);
109 if (likely(err == 1)) 109 if (likely(err == 1))
110 err = dst_output(skb); 110 err = dst_output_sk(sk, skb);
111 111
112 return err; 112 return err;
113} 113}
114EXPORT_SYMBOL_GPL(ip_local_out); 114EXPORT_SYMBOL_GPL(ip_local_out_sk);
115 115
116static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) 116static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
117{ 117{
@@ -226,9 +226,8 @@ static int ip_finish_output(struct sk_buff *skb)
226 return ip_finish_output2(skb); 226 return ip_finish_output2(skb);
227} 227}
228 228
229int ip_mc_output(struct sk_buff *skb) 229int ip_mc_output(struct sock *sk, struct sk_buff *skb)
230{ 230{
231 struct sock *sk = skb->sk;
232 struct rtable *rt = skb_rtable(skb); 231 struct rtable *rt = skb_rtable(skb);
233 struct net_device *dev = rt->dst.dev; 232 struct net_device *dev = rt->dst.dev;
234 233
@@ -287,7 +286,7 @@ int ip_mc_output(struct sk_buff *skb)
287 !(IPCB(skb)->flags & IPSKB_REROUTED)); 286 !(IPCB(skb)->flags & IPSKB_REROUTED));
288} 287}
289 288
290int ip_output(struct sk_buff *skb) 289int ip_output(struct sock *sk, struct sk_buff *skb)
291{ 290{
292 struct net_device *dev = skb_dst(skb)->dev; 291 struct net_device *dev = skb_dst(skb)->dev;
293 292
@@ -315,9 +314,9 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
315 sizeof(fl4->saddr) + sizeof(fl4->daddr)); 314 sizeof(fl4->saddr) + sizeof(fl4->daddr));
316} 315}
317 316
318int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) 317/* Note: skb->sk can be different from sk, in case of tunnels */
318int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
319{ 319{
320 struct sock *sk = skb->sk;
321 struct inet_sock *inet = inet_sk(sk); 320 struct inet_sock *inet = inet_sk(sk);
322 struct ip_options_rcu *inet_opt; 321 struct ip_options_rcu *inet_opt;
323 struct flowi4 *fl4; 322 struct flowi4 *fl4;
@@ -389,6 +388,7 @@ packet_routed:
389 ip_select_ident_more(skb, &rt->dst, sk, 388 ip_select_ident_more(skb, &rt->dst, sk,
390 (skb_shinfo(skb)->gso_segs ?: 1) - 1); 389 (skb_shinfo(skb)->gso_segs ?: 1) - 1);
391 390
391 /* TODO : should we use skb->sk here instead of sk ? */
392 skb->priority = sk->sk_priority; 392 skb->priority = sk->sk_priority;
393 skb->mark = sk->sk_mark; 393 skb->mark = sk->sk_mark;
394 394
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index e77381d1df9a..484d0ce27ef7 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -670,7 +670,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
670 return; 670 return;
671 } 671 }
672 672
673 err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol, 673 err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
674 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); 674 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
675 iptunnel_xmit_stats(err, &dev->stats, dev->tstats); 675 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
676 676
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index e0c2b1d2ea4e..bcf206c79005 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -46,7 +46,7 @@
46#include <net/netns/generic.h> 46#include <net/netns/generic.h>
47#include <net/rtnetlink.h> 47#include <net/rtnetlink.h>
48 48
49int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb, 49int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
50 __be32 src, __be32 dst, __u8 proto, 50 __be32 src, __be32 dst, __u8 proto,
51 __u8 tos, __u8 ttl, __be16 df, bool xnet) 51 __u8 tos, __u8 ttl, __be16 df, bool xnet)
52{ 52{
@@ -76,7 +76,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
76 iph->ttl = ttl; 76 iph->ttl = ttl;
77 __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1); 77 __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
78 78
79 err = ip_local_out(skb); 79 err = ip_local_out_sk(sk, skb);
80 if (unlikely(net_xmit_eval(err))) 80 if (unlikely(net_xmit_eval(err)))
81 pkt_len = 0; 81 pkt_len = 0;
82 return pkt_len; 82 return pkt_len;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index f4b19e5dde54..8210964a9f19 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -252,26 +252,33 @@ int ping_init_sock(struct sock *sk)
252{ 252{
253 struct net *net = sock_net(sk); 253 struct net *net = sock_net(sk);
254 kgid_t group = current_egid(); 254 kgid_t group = current_egid();
255 struct group_info *group_info = get_current_groups(); 255 struct group_info *group_info;
256 int i, j, count = group_info->ngroups; 256 int i, j, count;
257 kgid_t low, high; 257 kgid_t low, high;
258 int ret = 0;
258 259
259 inet_get_ping_group_range_net(net, &low, &high); 260 inet_get_ping_group_range_net(net, &low, &high);
260 if (gid_lte(low, group) && gid_lte(group, high)) 261 if (gid_lte(low, group) && gid_lte(group, high))
261 return 0; 262 return 0;
262 263
264 group_info = get_current_groups();
265 count = group_info->ngroups;
263 for (i = 0; i < group_info->nblocks; i++) { 266 for (i = 0; i < group_info->nblocks; i++) {
264 int cp_count = min_t(int, NGROUPS_PER_BLOCK, count); 267 int cp_count = min_t(int, NGROUPS_PER_BLOCK, count);
265 for (j = 0; j < cp_count; j++) { 268 for (j = 0; j < cp_count; j++) {
266 kgid_t gid = group_info->blocks[i][j]; 269 kgid_t gid = group_info->blocks[i][j];
267 if (gid_lte(low, gid) && gid_lte(gid, high)) 270 if (gid_lte(low, gid) && gid_lte(gid, high))
268 return 0; 271 goto out_release_group;
269 } 272 }
270 273
271 count -= cp_count; 274 count -= cp_count;
272 } 275 }
273 276
274 return -EACCES; 277 ret = -EACCES;
278
279out_release_group:
280 put_group_info(group_info);
281 return ret;
275} 282}
276EXPORT_SYMBOL_GPL(ping_init_sock); 283EXPORT_SYMBOL_GPL(ping_init_sock);
277 284
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 34d094cadb11..1485aafcad59 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1129,7 +1129,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
1129 dst_set_expires(&rt->dst, 0); 1129 dst_set_expires(&rt->dst, 0);
1130} 1130}
1131 1131
1132static int ip_rt_bug(struct sk_buff *skb) 1132static int ip_rt_bug(struct sock *sk, struct sk_buff *skb)
1133{ 1133{
1134 pr_debug("%s: %pI4 -> %pI4, %s\n", 1134 pr_debug("%s: %pI4 -> %pI4, %s\n",
1135 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, 1135 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
@@ -2218,7 +2218,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
2218 2218
2219 new->__use = 1; 2219 new->__use = 1;
2220 new->input = dst_discard; 2220 new->input = dst_discard;
2221 new->output = dst_discard; 2221 new->output = dst_discard_sk;
2222 2222
2223 new->dev = ort->dst.dev; 2223 new->dev = ort->dst.dev;
2224 if (new->dev) 2224 if (new->dev)
@@ -2357,7 +2357,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
2357 } 2357 }
2358 } else 2358 } else
2359#endif 2359#endif
2360 if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) 2360 if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex))
2361 goto nla_put_failure; 2361 goto nla_put_failure;
2362 } 2362 }
2363 2363
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 699fb102e971..025e25093984 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -981,7 +981,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
981 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, 981 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
982 tcp_skb_pcount(skb)); 982 tcp_skb_pcount(skb));
983 983
984 err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl); 984 err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
985 if (likely(err <= 0)) 985 if (likely(err <= 0))
986 return err; 986 return err;
987 987
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index baa0f63731fd..40e701f2e1e0 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -86,7 +86,7 @@ int xfrm4_output_finish(struct sk_buff *skb)
86 return xfrm_output(skb); 86 return xfrm_output(skb);
87} 87}
88 88
89int xfrm4_output(struct sk_buff *skb) 89int xfrm4_output(struct sock *sk, struct sk_buff *skb)
90{ 90{
91 struct dst_entry *dst = skb_dst(skb); 91 struct dst_entry *dst = skb_dst(skb);
92 struct xfrm_state *x = dst->xfrm; 92 struct xfrm_state *x = dst->xfrm;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index c9138189415a..d4ade34ab375 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -224,9 +224,8 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
224 return dst; 224 return dst;
225} 225}
226 226
227int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) 227int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused)
228{ 228{
229 struct sock *sk = skb->sk;
230 struct ipv6_pinfo *np = inet6_sk(sk); 229 struct ipv6_pinfo *np = inet6_sk(sk);
231 struct flowi6 fl6; 230 struct flowi6 fl6;
232 struct dst_entry *dst; 231 struct dst_entry *dst;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index c98338b81d30..9d921462b57f 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1559,6 +1559,15 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
1559 return 0; 1559 return 0;
1560} 1560}
1561 1561
1562static void ip6gre_dellink(struct net_device *dev, struct list_head *head)
1563{
1564 struct net *net = dev_net(dev);
1565 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
1566
1567 if (dev != ign->fb_tunnel_dev)
1568 unregister_netdevice_queue(dev, head);
1569}
1570
1562static size_t ip6gre_get_size(const struct net_device *dev) 1571static size_t ip6gre_get_size(const struct net_device *dev)
1563{ 1572{
1564 return 1573 return
@@ -1636,6 +1645,7 @@ static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
1636 .validate = ip6gre_tunnel_validate, 1645 .validate = ip6gre_tunnel_validate,
1637 .newlink = ip6gre_newlink, 1646 .newlink = ip6gre_newlink,
1638 .changelink = ip6gre_changelink, 1647 .changelink = ip6gre_changelink,
1648 .dellink = ip6gre_dellink,
1639 .get_size = ip6gre_get_size, 1649 .get_size = ip6gre_get_size,
1640 .fill_info = ip6gre_fill_info, 1650 .fill_info = ip6gre_fill_info,
1641}; 1651};
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 3284d61577c0..40e7581374f7 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -132,7 +132,7 @@ static int ip6_finish_output(struct sk_buff *skb)
132 return ip6_finish_output2(skb); 132 return ip6_finish_output2(skb);
133} 133}
134 134
135int ip6_output(struct sk_buff *skb) 135int ip6_output(struct sock *sk, struct sk_buff *skb)
136{ 136{
137 struct net_device *dev = skb_dst(skb)->dev; 137 struct net_device *dev = skb_dst(skb)->dev;
138 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 138 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 5015c50a5ba7..4011617cca68 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -84,9 +84,9 @@ static void ip6_dst_ifdown(struct dst_entry *,
84static int ip6_dst_gc(struct dst_ops *ops); 84static int ip6_dst_gc(struct dst_ops *ops);
85 85
86static int ip6_pkt_discard(struct sk_buff *skb); 86static int ip6_pkt_discard(struct sk_buff *skb);
87static int ip6_pkt_discard_out(struct sk_buff *skb); 87static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
88static int ip6_pkt_prohibit(struct sk_buff *skb); 88static int ip6_pkt_prohibit(struct sk_buff *skb);
89static int ip6_pkt_prohibit_out(struct sk_buff *skb); 89static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
90static void ip6_link_failure(struct sk_buff *skb); 90static void ip6_link_failure(struct sk_buff *skb);
91static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 91static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 struct sk_buff *skb, u32 mtu); 92 struct sk_buff *skb, u32 mtu);
@@ -290,7 +290,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
290 .obsolete = DST_OBSOLETE_FORCE_CHK, 290 .obsolete = DST_OBSOLETE_FORCE_CHK,
291 .error = -EINVAL, 291 .error = -EINVAL,
292 .input = dst_discard, 292 .input = dst_discard,
293 .output = dst_discard, 293 .output = dst_discard_sk,
294 }, 294 },
295 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 295 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
296 .rt6i_protocol = RTPROT_KERNEL, 296 .rt6i_protocol = RTPROT_KERNEL,
@@ -1058,7 +1058,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
1058 1058
1059 new->__use = 1; 1059 new->__use = 1;
1060 new->input = dst_discard; 1060 new->input = dst_discard;
1061 new->output = dst_discard; 1061 new->output = dst_discard_sk;
1062 1062
1063 if (dst_metrics_read_only(&ort->dst)) 1063 if (dst_metrics_read_only(&ort->dst))
1064 new->_metrics = ort->dst._metrics; 1064 new->_metrics = ort->dst._metrics;
@@ -1338,7 +1338,7 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
1338 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 1338 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1339 1339
1340 if (mtu) 1340 if (mtu)
1341 return mtu; 1341 goto out;
1342 1342
1343 mtu = IPV6_MIN_MTU; 1343 mtu = IPV6_MIN_MTU;
1344 1344
@@ -1348,7 +1348,8 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
1348 mtu = idev->cnf.mtu6; 1348 mtu = idev->cnf.mtu6;
1349 rcu_read_unlock(); 1349 rcu_read_unlock();
1350 1350
1351 return mtu; 1351out:
1352 return min_t(unsigned int, mtu, IP6_MAX_MTU);
1352} 1353}
1353 1354
1354static struct dst_entry *icmp6_dst_gc_list; 1355static struct dst_entry *icmp6_dst_gc_list;
@@ -1576,7 +1577,7 @@ int ip6_route_add(struct fib6_config *cfg)
1576 switch (cfg->fc_type) { 1577 switch (cfg->fc_type) {
1577 case RTN_BLACKHOLE: 1578 case RTN_BLACKHOLE:
1578 rt->dst.error = -EINVAL; 1579 rt->dst.error = -EINVAL;
1579 rt->dst.output = dst_discard; 1580 rt->dst.output = dst_discard_sk;
1580 rt->dst.input = dst_discard; 1581 rt->dst.input = dst_discard;
1581 break; 1582 break;
1582 case RTN_PROHIBIT: 1583 case RTN_PROHIBIT:
@@ -2128,7 +2129,7 @@ static int ip6_pkt_discard(struct sk_buff *skb)
2128 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 2129 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2129} 2130}
2130 2131
2131static int ip6_pkt_discard_out(struct sk_buff *skb) 2132static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2132{ 2133{
2133 skb->dev = skb_dst(skb)->dev; 2134 skb->dev = skb_dst(skb)->dev;
2134 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 2135 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
@@ -2139,7 +2140,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb)
2139 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 2140 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2140} 2141}
2141 2142
2142static int ip6_pkt_prohibit_out(struct sk_buff *skb) 2143static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2143{ 2144{
2144 skb->dev = skb_dst(skb)->dev; 2145 skb->dev = skb_dst(skb)->dev;
2145 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 2146 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 1693c8d885f0..8da8268d65f8 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -974,8 +974,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
974 goto out; 974 goto out;
975 } 975 }
976 976
977 err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos, 977 err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr,
978 ttl, df, !net_eq(tunnel->net, dev_net(dev))); 978 IPPROTO_IPV6, tos, ttl, df,
979 !net_eq(tunnel->net, dev_net(dev)));
979 iptunnel_xmit_stats(err, &dev->stats, dev->tstats); 980 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
980 return NETDEV_TX_OK; 981 return NETDEV_TX_OK;
981 982
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 6cd625e37706..19ef329bdbf8 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -163,7 +163,7 @@ static int __xfrm6_output(struct sk_buff *skb)
163 return x->outer_mode->afinfo->output_finish(skb); 163 return x->outer_mode->afinfo->output_finish(skb);
164} 164}
165 165
166int xfrm6_output(struct sk_buff *skb) 166int xfrm6_output(struct sock *sk, struct sk_buff *skb)
167{ 167{
168 return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, 168 return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL,
169 skb_dst(skb)->dev, __xfrm6_output); 169 skb_dst(skb)->dev, __xfrm6_output);
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 47f7a5490555..a4e37d7158dc 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1131,10 +1131,10 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
1131 skb->local_df = 1; 1131 skb->local_df = 1;
1132#if IS_ENABLED(CONFIG_IPV6) 1132#if IS_ENABLED(CONFIG_IPV6)
1133 if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped) 1133 if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped)
1134 error = inet6_csk_xmit(skb, NULL); 1134 error = inet6_csk_xmit(tunnel->sock, skb, NULL);
1135 else 1135 else
1136#endif 1136#endif
1137 error = ip_queue_xmit(skb, fl); 1137 error = ip_queue_xmit(tunnel->sock, skb, fl);
1138 1138
1139 /* Update stats */ 1139 /* Update stats */
1140 if (error >= 0) { 1140 if (error >= 0) {
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 0b44d855269c..3397fe6897c0 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -487,7 +487,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
487 487
488xmit: 488xmit:
489 /* Queue the packet to IP for output */ 489 /* Queue the packet to IP for output */
490 rc = ip_queue_xmit(skb, &inet->cork.fl); 490 rc = ip_queue_xmit(sk, skb, &inet->cork.fl);
491 rcu_read_unlock(); 491 rcu_read_unlock();
492 492
493error: 493error:
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 6dba48efe01e..75421f2ba8be 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1795,6 +1795,7 @@ int nf_conntrack_init_net(struct net *net)
1795 int cpu; 1795 int cpu;
1796 1796
1797 atomic_set(&net->ct.count, 0); 1797 atomic_set(&net->ct.count, 0);
1798 seqcount_init(&net->ct.generation);
1798 1799
1799 net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu); 1800 net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
1800 if (!net->ct.pcpu_lists) 1801 if (!net->ct.pcpu_lists)
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 7bd03decd36c..825c3e3f8305 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -605,32 +605,14 @@ static struct nf_conntrack_helper pptp __read_mostly = {
605 .expect_policy = &pptp_exp_policy, 605 .expect_policy = &pptp_exp_policy,
606}; 606};
607 607
608static void nf_conntrack_pptp_net_exit(struct net *net)
609{
610 nf_ct_gre_keymap_flush(net);
611}
612
613static struct pernet_operations nf_conntrack_pptp_net_ops = {
614 .exit = nf_conntrack_pptp_net_exit,
615};
616
617static int __init nf_conntrack_pptp_init(void) 608static int __init nf_conntrack_pptp_init(void)
618{ 609{
619 int rv; 610 return nf_conntrack_helper_register(&pptp);
620
621 rv = nf_conntrack_helper_register(&pptp);
622 if (rv < 0)
623 return rv;
624 rv = register_pernet_subsys(&nf_conntrack_pptp_net_ops);
625 if (rv < 0)
626 nf_conntrack_helper_unregister(&pptp);
627 return rv;
628} 611}
629 612
630static void __exit nf_conntrack_pptp_fini(void) 613static void __exit nf_conntrack_pptp_fini(void)
631{ 614{
632 nf_conntrack_helper_unregister(&pptp); 615 nf_conntrack_helper_unregister(&pptp);
633 unregister_pernet_subsys(&nf_conntrack_pptp_net_ops);
634} 616}
635 617
636module_init(nf_conntrack_pptp_init); 618module_init(nf_conntrack_pptp_init);
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 9d9c0dade602..d5665739e3b1 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -66,7 +66,7 @@ static inline struct netns_proto_gre *gre_pernet(struct net *net)
66 return net_generic(net, proto_gre_net_id); 66 return net_generic(net, proto_gre_net_id);
67} 67}
68 68
69void nf_ct_gre_keymap_flush(struct net *net) 69static void nf_ct_gre_keymap_flush(struct net *net)
70{ 70{
71 struct netns_proto_gre *net_gre = gre_pernet(net); 71 struct netns_proto_gre *net_gre = gre_pernet(net);
72 struct nf_ct_gre_keymap *km, *tmp; 72 struct nf_ct_gre_keymap *km, *tmp;
@@ -78,7 +78,6 @@ void nf_ct_gre_keymap_flush(struct net *net)
78 } 78 }
79 write_unlock_bh(&net_gre->keymap_lock); 79 write_unlock_bh(&net_gre->keymap_lock);
80} 80}
81EXPORT_SYMBOL(nf_ct_gre_keymap_flush);
82 81
83static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km, 82static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km,
84 const struct nf_conntrack_tuple *t) 83 const struct nf_conntrack_tuple *t)
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 90998a6ff8b9..804105391b9a 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -25,9 +25,8 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr,
25 struct nft_data data[NFT_REG_MAX + 1]) 25 struct nft_data data[NFT_REG_MAX + 1])
26{ 26{
27 const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); 27 const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
28 u32 mask; 28 u32 mask = nft_cmp_fast_mask(priv->len);
29 29
30 mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - priv->len);
31 if ((data[priv->sreg].data[0] & mask) == priv->data) 30 if ((data[priv->sreg].data[0] & mask) == priv->data)
32 return; 31 return;
33 data[NFT_REG_VERDICT].verdict = NFT_BREAK; 32 data[NFT_REG_VERDICT].verdict = NFT_BREAK;
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 954925db414d..e2b3f51c81f1 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -128,7 +128,7 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx,
128 BUG_ON(err < 0); 128 BUG_ON(err < 0);
129 desc.len *= BITS_PER_BYTE; 129 desc.len *= BITS_PER_BYTE;
130 130
131 mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - desc.len); 131 mask = nft_cmp_fast_mask(desc.len);
132 priv->data = data.data[0] & mask; 132 priv->data = data.data[0] & mask;
133 priv->len = desc.len; 133 priv->len = desc.len;
134 return 0; 134 return 0;
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index a3d6951602db..ebb6e2442554 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -174,7 +174,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
174 174
175 skb->local_df = 1; 175 skb->local_df = 1;
176 176
177 return iptunnel_xmit(rt, skb, fl.saddr, 177 return iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
178 OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE, 178 OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE,
179 OVS_CB(skb)->tun_key->ipv4_tos, 179 OVS_CB(skb)->tun_key->ipv4_tos,
180 OVS_CB(skb)->tun_key->ipv4_ttl, df, false); 180 OVS_CB(skb)->tun_key->ipv4_ttl, df, false);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 4f6d6f9d1274..39579c3e0d14 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1395,35 +1395,44 @@ static inline bool sctp_peer_needs_update(struct sctp_association *asoc)
1395 return false; 1395 return false;
1396} 1396}
1397 1397
1398/* Update asoc's rwnd for the approximated state in the buffer, 1398/* Increase asoc's rwnd by len and send any window update SACK if needed. */
1399 * and check whether SACK needs to be sent. 1399void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len)
1400 */
1401void sctp_assoc_rwnd_update(struct sctp_association *asoc, bool update_peer)
1402{ 1400{
1403 int rx_count;
1404 struct sctp_chunk *sack; 1401 struct sctp_chunk *sack;
1405 struct timer_list *timer; 1402 struct timer_list *timer;
1406 1403
1407 if (asoc->ep->rcvbuf_policy) 1404 if (asoc->rwnd_over) {
1408 rx_count = atomic_read(&asoc->rmem_alloc); 1405 if (asoc->rwnd_over >= len) {
1409 else 1406 asoc->rwnd_over -= len;
1410 rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc); 1407 } else {
1408 asoc->rwnd += (len - asoc->rwnd_over);
1409 asoc->rwnd_over = 0;
1410 }
1411 } else {
1412 asoc->rwnd += len;
1413 }
1411 1414
1412 if ((asoc->base.sk->sk_rcvbuf - rx_count) > 0) 1415 /* If we had window pressure, start recovering it
1413 asoc->rwnd = (asoc->base.sk->sk_rcvbuf - rx_count) >> 1; 1416 * once our rwnd had reached the accumulated pressure
1414 else 1417 * threshold. The idea is to recover slowly, but up
1415 asoc->rwnd = 0; 1418 * to the initial advertised window.
1419 */
1420 if (asoc->rwnd_press && asoc->rwnd >= asoc->rwnd_press) {
1421 int change = min(asoc->pathmtu, asoc->rwnd_press);
1422 asoc->rwnd += change;
1423 asoc->rwnd_press -= change;
1424 }
1416 1425
1417 pr_debug("%s: asoc:%p rwnd=%u, rx_count=%d, sk_rcvbuf=%d\n", 1426 pr_debug("%s: asoc:%p rwnd increased by %d to (%u, %u) - %u\n",
1418 __func__, asoc, asoc->rwnd, rx_count, 1427 __func__, asoc, len, asoc->rwnd, asoc->rwnd_over,
1419 asoc->base.sk->sk_rcvbuf); 1428 asoc->a_rwnd);
1420 1429
1421 /* Send a window update SACK if the rwnd has increased by at least the 1430 /* Send a window update SACK if the rwnd has increased by at least the
1422 * minimum of the association's PMTU and half of the receive buffer. 1431 * minimum of the association's PMTU and half of the receive buffer.
1423 * The algorithm used is similar to the one described in 1432 * The algorithm used is similar to the one described in
1424 * Section 4.2.3.3 of RFC 1122. 1433 * Section 4.2.3.3 of RFC 1122.
1425 */ 1434 */
1426 if (update_peer && sctp_peer_needs_update(asoc)) { 1435 if (sctp_peer_needs_update(asoc)) {
1427 asoc->a_rwnd = asoc->rwnd; 1436 asoc->a_rwnd = asoc->rwnd;
1428 1437
1429 pr_debug("%s: sending window update SACK- asoc:%p rwnd:%u " 1438 pr_debug("%s: sending window update SACK- asoc:%p rwnd:%u "
@@ -1445,6 +1454,45 @@ void sctp_assoc_rwnd_update(struct sctp_association *asoc, bool update_peer)
1445 } 1454 }
1446} 1455}
1447 1456
1457/* Decrease asoc's rwnd by len. */
1458void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
1459{
1460 int rx_count;
1461 int over = 0;
1462
1463 if (unlikely(!asoc->rwnd || asoc->rwnd_over))
1464 pr_debug("%s: association:%p has asoc->rwnd:%u, "
1465 "asoc->rwnd_over:%u!\n", __func__, asoc,
1466 asoc->rwnd, asoc->rwnd_over);
1467
1468 if (asoc->ep->rcvbuf_policy)
1469 rx_count = atomic_read(&asoc->rmem_alloc);
1470 else
1471 rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
1472
1473 /* If we've reached or overflowed our receive buffer, announce
1474 * a 0 rwnd if rwnd would still be positive. Store the
1475 * the potential pressure overflow so that the window can be restored
1476 * back to original value.
1477 */
1478 if (rx_count >= asoc->base.sk->sk_rcvbuf)
1479 over = 1;
1480
1481 if (asoc->rwnd >= len) {
1482 asoc->rwnd -= len;
1483 if (over) {
1484 asoc->rwnd_press += asoc->rwnd;
1485 asoc->rwnd = 0;
1486 }
1487 } else {
1488 asoc->rwnd_over = len - asoc->rwnd;
1489 asoc->rwnd = 0;
1490 }
1491
1492 pr_debug("%s: asoc:%p rwnd decreased by %d to (%u, %u, %u)\n",
1493 __func__, asoc, len, asoc->rwnd, asoc->rwnd_over,
1494 asoc->rwnd_press);
1495}
1448 1496
1449/* Build the bind address list for the association based on info from the 1497/* Build the bind address list for the association based on info from the
1450 * local endpoint and the remote peer. 1498 * local endpoint and the remote peer.
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 4e1d0fcb028e..c09757fbf803 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -957,7 +957,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb,
957 957
958 SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS); 958 SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS);
959 959
960 return ip_queue_xmit(skb, &transport->fl); 960 return ip_queue_xmit(&inet->sk, skb, &transport->fl);
961} 961}
962 962
963static struct sctp_af sctp_af_inet; 963static struct sctp_af sctp_af_inet;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 01e002430c85..ae9fbeba40b0 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -6178,7 +6178,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
6178 * PMTU. In cases, such as loopback, this might be a rather 6178 * PMTU. In cases, such as loopback, this might be a rather
6179 * large spill over. 6179 * large spill over.
6180 */ 6180 */
6181 if ((!chunk->data_accepted) && (!asoc->rwnd || 6181 if ((!chunk->data_accepted) && (!asoc->rwnd || asoc->rwnd_over ||
6182 (datalen > asoc->rwnd + asoc->frag_point))) { 6182 (datalen > asoc->rwnd + asoc->frag_point))) {
6183 6183
6184 /* If this is the next TSN, consider reneging to make 6184 /* If this is the next TSN, consider reneging to make
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e13519e9df80..ff20e2dbbbc7 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2115,6 +2115,12 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
2115 sctp_skb_pull(skb, copied); 2115 sctp_skb_pull(skb, copied);
2116 skb_queue_head(&sk->sk_receive_queue, skb); 2116 skb_queue_head(&sk->sk_receive_queue, skb);
2117 2117
2118 /* When only partial message is copied to the user, increase
2119 * rwnd by that amount. If all the data in the skb is read,
2120 * rwnd is updated when the event is freed.
2121 */
2122 if (!sctp_ulpevent_is_notification(event))
2123 sctp_assoc_rwnd_increase(event->asoc, copied);
2118 goto out; 2124 goto out;
2119 } else if ((event->msg_flags & MSG_NOTIFICATION) || 2125 } else if ((event->msg_flags & MSG_NOTIFICATION) ||
2120 (event->msg_flags & MSG_EOR)) 2126 (event->msg_flags & MSG_EOR))
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 8d198ae03606..85c64658bd0b 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -989,7 +989,7 @@ static void sctp_ulpevent_receive_data(struct sctp_ulpevent *event,
989 skb = sctp_event2skb(event); 989 skb = sctp_event2skb(event);
990 /* Set the owner and charge rwnd for bytes received. */ 990 /* Set the owner and charge rwnd for bytes received. */
991 sctp_ulpevent_set_owner(event, asoc); 991 sctp_ulpevent_set_owner(event, asoc);
992 sctp_assoc_rwnd_update(asoc, false); 992 sctp_assoc_rwnd_decrease(asoc, skb_headlen(skb));
993 993
994 if (!skb->data_len) 994 if (!skb->data_len)
995 return; 995 return;
@@ -1011,7 +1011,6 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event)
1011{ 1011{
1012 struct sk_buff *skb, *frag; 1012 struct sk_buff *skb, *frag;
1013 unsigned int len; 1013 unsigned int len;
1014 struct sctp_association *asoc;
1015 1014
1016 /* Current stack structures assume that the rcv buffer is 1015 /* Current stack structures assume that the rcv buffer is
1017 * per socket. For UDP style sockets this is not true as 1016 * per socket. For UDP style sockets this is not true as
@@ -1036,11 +1035,8 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event)
1036 } 1035 }
1037 1036
1038done: 1037done:
1039 asoc = event->asoc; 1038 sctp_assoc_rwnd_increase(event->asoc, len);
1040 sctp_association_hold(asoc);
1041 sctp_ulpevent_release_owner(event); 1039 sctp_ulpevent_release_owner(event);
1042 sctp_assoc_rwnd_update(asoc, true);
1043 sctp_association_put(asoc);
1044} 1040}
1045 1041
1046static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event) 1042static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f02f511b7107..c08fbd11ceff 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1842,7 +1842,7 @@ purge_queue:
1842 xfrm_pol_put(pol); 1842 xfrm_pol_put(pol);
1843} 1843}
1844 1844
1845static int xdst_queue_output(struct sk_buff *skb) 1845static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
1846{ 1846{
1847 unsigned long sched_next; 1847 unsigned long sched_next;
1848 struct dst_entry *dst = skb_dst(skb); 1848 struct dst_entry *dst = skb_dst(skb);