aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2012-09-06 19:48:59 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2012-09-06 19:48:59 -0400
commitfff34b3412b9401a76ba9d021db1bd91cb0e02b6 (patch)
tree870ed2d1555004e7939d15b5099017aae61c97b8 /net/ipv4
parent28e1e58fb668e262648fb8ee8a24154633f40507 (diff)
parent636802ef96eebe279b22ad9f9dacfe29291e45c7 (diff)
Merge branch 'merge' into next
Brings in various bug fixes from 3.6-rcX
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv4/inet_connection_sock.c7
-rw-r--r--net/ipv4/ip_output.c10
-rw-r--r--net/ipv4/ipmr.c14
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c14
-rw-r--r--net/ipv4/route.c11
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_cong.c3
-rw-r--r--net/ipv4/tcp_input.c19
-rw-r--r--net/ipv4/tcp_ipv4.c23
-rw-r--r--net/ipv4/tcp_metrics.c12
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c37
-rw-r--r--net/ipv4/tcp_timer.c6
-rw-r--r--net/ipv4/udp.c2
15 files changed, 108 insertions, 58 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index f0cdb30921c0..57bd978483e1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -367,7 +367,7 @@ static void __leaf_free_rcu(struct rcu_head *head)
367 367
368static inline void free_leaf(struct leaf *l) 368static inline void free_leaf(struct leaf *l)
369{ 369{
370 call_rcu_bh(&l->rcu, __leaf_free_rcu); 370 call_rcu(&l->rcu, __leaf_free_rcu);
371} 371}
372 372
373static inline void free_leaf_info(struct leaf_info *leaf) 373static inline void free_leaf_info(struct leaf_info *leaf)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index db0cf17c00f7..7f75f21d7b83 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -404,12 +404,15 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
404{ 404{
405 const struct inet_request_sock *ireq = inet_rsk(req); 405 const struct inet_request_sock *ireq = inet_rsk(req);
406 struct inet_sock *newinet = inet_sk(newsk); 406 struct inet_sock *newinet = inet_sk(newsk);
407 struct ip_options_rcu *opt = ireq->opt; 407 struct ip_options_rcu *opt;
408 struct net *net = sock_net(sk); 408 struct net *net = sock_net(sk);
409 struct flowi4 *fl4; 409 struct flowi4 *fl4;
410 struct rtable *rt; 410 struct rtable *rt;
411 411
412 fl4 = &newinet->cork.fl.u.ip4; 412 fl4 = &newinet->cork.fl.u.ip4;
413
414 rcu_read_lock();
415 opt = rcu_dereference(newinet->inet_opt);
413 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, 416 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
414 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 417 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
415 sk->sk_protocol, inet_sk_flowi_flags(sk), 418 sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -421,11 +424,13 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
421 goto no_route; 424 goto no_route;
422 if (opt && opt->opt.is_strictroute && rt->rt_gateway) 425 if (opt && opt->opt.is_strictroute && rt->rt_gateway)
423 goto route_err; 426 goto route_err;
427 rcu_read_unlock();
424 return &rt->dst; 428 return &rt->dst;
425 429
426route_err: 430route_err:
427 ip_rt_put(rt); 431 ip_rt_put(rt);
428no_route: 432no_route:
433 rcu_read_unlock();
429 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 434 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
430 return NULL; 435 return NULL;
431} 436}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index ba39a52d18c1..c196d749daf2 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -197,7 +197,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
197 neigh = __ipv4_neigh_lookup_noref(dev, nexthop); 197 neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
198 if (unlikely(!neigh)) 198 if (unlikely(!neigh))
199 neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); 199 neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
200 if (neigh) { 200 if (!IS_ERR(neigh)) {
201 int res = dst_neigh_output(dst, neigh, skb); 201 int res = dst_neigh_output(dst, neigh, skb);
202 202
203 rcu_read_unlock_bh(); 203 rcu_read_unlock_bh();
@@ -1338,10 +1338,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
1338 iph->ihl = 5; 1338 iph->ihl = 5;
1339 iph->tos = inet->tos; 1339 iph->tos = inet->tos;
1340 iph->frag_off = df; 1340 iph->frag_off = df;
1341 ip_select_ident(iph, &rt->dst, sk);
1342 iph->ttl = ttl; 1341 iph->ttl = ttl;
1343 iph->protocol = sk->sk_protocol; 1342 iph->protocol = sk->sk_protocol;
1344 ip_copy_addrs(iph, fl4); 1343 ip_copy_addrs(iph, fl4);
1344 ip_select_ident(iph, &rt->dst, sk);
1345 1345
1346 if (opt) { 1346 if (opt) {
1347 iph->ihl += opt->optlen>>2; 1347 iph->ihl += opt->optlen>>2;
@@ -1366,9 +1366,8 @@ out:
1366 return skb; 1366 return skb;
1367} 1367}
1368 1368
1369int ip_send_skb(struct sk_buff *skb) 1369int ip_send_skb(struct net *net, struct sk_buff *skb)
1370{ 1370{
1371 struct net *net = sock_net(skb->sk);
1372 int err; 1371 int err;
1373 1372
1374 err = ip_local_out(skb); 1373 err = ip_local_out(skb);
@@ -1391,7 +1390,7 @@ int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4)
1391 return 0; 1390 return 0;
1392 1391
1393 /* Netfilter gets whole the not fragmented skb. */ 1392 /* Netfilter gets whole the not fragmented skb. */
1394 return ip_send_skb(skb); 1393 return ip_send_skb(sock_net(sk), skb);
1395} 1394}
1396 1395
1397/* 1396/*
@@ -1536,6 +1535,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
1536 arg->csumoffset) = csum_fold(csum_add(nskb->csum, 1535 arg->csumoffset) = csum_fold(csum_add(nskb->csum,
1537 arg->csum)); 1536 arg->csum));
1538 nskb->ip_summed = CHECKSUM_NONE; 1537 nskb->ip_summed = CHECKSUM_NONE;
1538 skb_orphan(nskb);
1539 skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb)); 1539 skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
1540 ip_push_pending_frames(sk, &fl4); 1540 ip_push_pending_frames(sk, &fl4);
1541 } 1541 }
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8eec8f4a0536..ebdf06f938bf 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -124,6 +124,8 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
124static struct kmem_cache *mrt_cachep __read_mostly; 124static struct kmem_cache *mrt_cachep __read_mostly;
125 125
126static struct mr_table *ipmr_new_table(struct net *net, u32 id); 126static struct mr_table *ipmr_new_table(struct net *net, u32 id);
127static void ipmr_free_table(struct mr_table *mrt);
128
127static int ip_mr_forward(struct net *net, struct mr_table *mrt, 129static int ip_mr_forward(struct net *net, struct mr_table *mrt,
128 struct sk_buff *skb, struct mfc_cache *cache, 130 struct sk_buff *skb, struct mfc_cache *cache,
129 int local); 131 int local);
@@ -131,6 +133,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
131 struct sk_buff *pkt, vifi_t vifi, int assert); 133 struct sk_buff *pkt, vifi_t vifi, int assert);
132static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 134static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
133 struct mfc_cache *c, struct rtmsg *rtm); 135 struct mfc_cache *c, struct rtmsg *rtm);
136static void mroute_clean_tables(struct mr_table *mrt);
134static void ipmr_expire_process(unsigned long arg); 137static void ipmr_expire_process(unsigned long arg);
135 138
136#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 139#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -271,7 +274,7 @@ static void __net_exit ipmr_rules_exit(struct net *net)
271 274
272 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { 275 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
273 list_del(&mrt->list); 276 list_del(&mrt->list);
274 kfree(mrt); 277 ipmr_free_table(mrt);
275 } 278 }
276 fib_rules_unregister(net->ipv4.mr_rules_ops); 279 fib_rules_unregister(net->ipv4.mr_rules_ops);
277} 280}
@@ -299,7 +302,7 @@ static int __net_init ipmr_rules_init(struct net *net)
299 302
300static void __net_exit ipmr_rules_exit(struct net *net) 303static void __net_exit ipmr_rules_exit(struct net *net)
301{ 304{
302 kfree(net->ipv4.mrt); 305 ipmr_free_table(net->ipv4.mrt);
303} 306}
304#endif 307#endif
305 308
@@ -336,6 +339,13 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
336 return mrt; 339 return mrt;
337} 340}
338 341
342static void ipmr_free_table(struct mr_table *mrt)
343{
344 del_timer_sync(&mrt->ipmr_expire_timer);
345 mroute_clean_tables(mrt);
346 kfree(mrt);
347}
348
339/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 349/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
340 350
341static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) 351static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index ea4a23813d26..9c87cde28ff8 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -148,7 +148,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
148 if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, 148 if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
149 hdr, NULL, &matchoff, &matchlen, 149 hdr, NULL, &matchoff, &matchlen,
150 &addr, &port) > 0) { 150 &addr, &port) > 0) {
151 unsigned int matchend, poff, plen, buflen, n; 151 unsigned int olen, matchend, poff, plen, buflen, n;
152 char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; 152 char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
153 153
154 /* We're only interested in headers related to this 154 /* We're only interested in headers related to this
@@ -163,17 +163,18 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
163 goto next; 163 goto next;
164 } 164 }
165 165
166 olen = *datalen;
166 if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, 167 if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
167 &addr, port)) 168 &addr, port))
168 return NF_DROP; 169 return NF_DROP;
169 170
170 matchend = matchoff + matchlen; 171 matchend = matchoff + matchlen + *datalen - olen;
171 172
172 /* The maddr= parameter (RFC 2361) specifies where to send 173 /* The maddr= parameter (RFC 2361) specifies where to send
173 * the reply. */ 174 * the reply. */
174 if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, 175 if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
175 "maddr=", &poff, &plen, 176 "maddr=", &poff, &plen,
176 &addr) > 0 && 177 &addr, true) > 0 &&
177 addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && 178 addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
178 addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { 179 addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) {
179 buflen = sprintf(buffer, "%pI4", 180 buflen = sprintf(buffer, "%pI4",
@@ -187,7 +188,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
187 * from which the server received the request. */ 188 * from which the server received the request. */
188 if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, 189 if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
189 "received=", &poff, &plen, 190 "received=", &poff, &plen,
190 &addr) > 0 && 191 &addr, false) > 0 &&
191 addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && 192 addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip &&
192 addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { 193 addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) {
193 buflen = sprintf(buffer, "%pI4", 194 buflen = sprintf(buffer, "%pI4",
@@ -501,7 +502,10 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff,
501 ret = nf_ct_expect_related(rtcp_exp); 502 ret = nf_ct_expect_related(rtcp_exp);
502 if (ret == 0) 503 if (ret == 0)
503 break; 504 break;
504 else if (ret != -EBUSY) { 505 else if (ret == -EBUSY) {
506 nf_ct_unexpect_related(rtp_exp);
507 continue;
508 } else if (ret < 0) {
505 nf_ct_unexpect_related(rtp_exp); 509 nf_ct_unexpect_related(rtp_exp);
506 port = 0; 510 port = 0;
507 break; 511 break;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c035251beb07..82cf2a722b23 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -70,7 +70,6 @@
70#include <linux/types.h> 70#include <linux/types.h>
71#include <linux/kernel.h> 71#include <linux/kernel.h>
72#include <linux/mm.h> 72#include <linux/mm.h>
73#include <linux/bootmem.h>
74#include <linux/string.h> 73#include <linux/string.h>
75#include <linux/socket.h> 74#include <linux/socket.h>
76#include <linux/sockios.h> 75#include <linux/sockios.h>
@@ -80,7 +79,6 @@
80#include <linux/netdevice.h> 79#include <linux/netdevice.h>
81#include <linux/proc_fs.h> 80#include <linux/proc_fs.h>
82#include <linux/init.h> 81#include <linux/init.h>
83#include <linux/workqueue.h>
84#include <linux/skbuff.h> 82#include <linux/skbuff.h>
85#include <linux/inetdevice.h> 83#include <linux/inetdevice.h>
86#include <linux/igmp.h> 84#include <linux/igmp.h>
@@ -88,11 +86,9 @@
88#include <linux/mroute.h> 86#include <linux/mroute.h>
89#include <linux/netfilter_ipv4.h> 87#include <linux/netfilter_ipv4.h>
90#include <linux/random.h> 88#include <linux/random.h>
91#include <linux/jhash.h>
92#include <linux/rcupdate.h> 89#include <linux/rcupdate.h>
93#include <linux/times.h> 90#include <linux/times.h>
94#include <linux/slab.h> 91#include <linux/slab.h>
95#include <linux/prefetch.h>
96#include <net/dst.h> 92#include <net/dst.h>
97#include <net/net_namespace.h> 93#include <net/net_namespace.h>
98#include <net/protocol.h> 94#include <net/protocol.h>
@@ -938,12 +934,14 @@ static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
938 if (mtu < ip_rt_min_pmtu) 934 if (mtu < ip_rt_min_pmtu)
939 mtu = ip_rt_min_pmtu; 935 mtu = ip_rt_min_pmtu;
940 936
937 rcu_read_lock();
941 if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) { 938 if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) {
942 struct fib_nh *nh = &FIB_RES_NH(res); 939 struct fib_nh *nh = &FIB_RES_NH(res);
943 940
944 update_or_create_fnhe(nh, fl4->daddr, 0, mtu, 941 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
945 jiffies + ip_rt_mtu_expires); 942 jiffies + ip_rt_mtu_expires);
946 } 943 }
944 rcu_read_unlock();
947 return mtu; 945 return mtu;
948} 946}
949 947
@@ -960,7 +958,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
960 dst->obsolete = DST_OBSOLETE_KILL; 958 dst->obsolete = DST_OBSOLETE_KILL;
961 } else { 959 } else {
962 rt->rt_pmtu = mtu; 960 rt->rt_pmtu = mtu;
963 dst_set_expires(&rt->dst, ip_rt_mtu_expires); 961 rt->dst.expires = max(1UL, jiffies + ip_rt_mtu_expires);
964 } 962 }
965} 963}
966 964
@@ -1267,7 +1265,7 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
1267{ 1265{
1268 struct rtable *rt = (struct rtable *) dst; 1266 struct rtable *rt = (struct rtable *) dst;
1269 1267
1270 if (dst->flags & DST_NOCACHE) { 1268 if (!list_empty(&rt->rt_uncached)) {
1271 spin_lock_bh(&rt_uncached_lock); 1269 spin_lock_bh(&rt_uncached_lock);
1272 list_del(&rt->rt_uncached); 1270 list_del(&rt->rt_uncached);
1273 spin_unlock_bh(&rt_uncached_lock); 1271 spin_unlock_bh(&rt_uncached_lock);
@@ -2032,7 +2030,6 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
2032 } 2030 }
2033 dev_out = net->loopback_dev; 2031 dev_out = net->loopback_dev;
2034 fl4->flowi4_oif = dev_out->ifindex; 2032 fl4->flowi4_oif = dev_out->ifindex;
2035 res.fi = NULL;
2036 flags |= RTCF_LOCAL; 2033 flags |= RTCF_LOCAL;
2037 goto make_route; 2034 goto make_route;
2038 } 2035 }
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e7e6eeae49c0..2109ff4a1daf 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -811,7 +811,9 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
811 old_size_goal + mss_now > xmit_size_goal)) { 811 old_size_goal + mss_now > xmit_size_goal)) {
812 xmit_size_goal = old_size_goal; 812 xmit_size_goal = old_size_goal;
813 } else { 813 } else {
814 tp->xmit_size_goal_segs = xmit_size_goal / mss_now; 814 tp->xmit_size_goal_segs =
815 min_t(u16, xmit_size_goal / mss_now,
816 sk->sk_gso_max_segs);
815 xmit_size_goal = tp->xmit_size_goal_segs * mss_now; 817 xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
816 } 818 }
817 } 819 }
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 4d4db16e336e..1432cdb0644c 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -291,7 +291,8 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
291 left = tp->snd_cwnd - in_flight; 291 left = tp->snd_cwnd - in_flight;
292 if (sk_can_gso(sk) && 292 if (sk_can_gso(sk) &&
293 left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && 293 left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
294 left * tp->mss_cache < sk->sk_gso_max_size) 294 left * tp->mss_cache < sk->sk_gso_max_size &&
295 left < sk->sk_gso_max_segs)
295 return true; 296 return true;
296 return left <= tcp_max_tso_deferred_mss(tp); 297 return left <= tcp_max_tso_deferred_mss(tp);
297} 298}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2fd2bc9e3c64..6e38c6c23caa 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2926,13 +2926,14 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2926 * tcp_xmit_retransmit_queue(). 2926 * tcp_xmit_retransmit_queue().
2927 */ 2927 */
2928static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, 2928static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2929 int newly_acked_sacked, bool is_dupack, 2929 int prior_sacked, bool is_dupack,
2930 int flag) 2930 int flag)
2931{ 2931{
2932 struct inet_connection_sock *icsk = inet_csk(sk); 2932 struct inet_connection_sock *icsk = inet_csk(sk);
2933 struct tcp_sock *tp = tcp_sk(sk); 2933 struct tcp_sock *tp = tcp_sk(sk);
2934 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && 2934 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2935 (tcp_fackets_out(tp) > tp->reordering)); 2935 (tcp_fackets_out(tp) > tp->reordering));
2936 int newly_acked_sacked = 0;
2936 int fast_rexmit = 0; 2937 int fast_rexmit = 0;
2937 2938
2938 if (WARN_ON(!tp->packets_out && tp->sacked_out)) 2939 if (WARN_ON(!tp->packets_out && tp->sacked_out))
@@ -2992,6 +2993,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2992 tcp_add_reno_sack(sk); 2993 tcp_add_reno_sack(sk);
2993 } else 2994 } else
2994 do_lost = tcp_try_undo_partial(sk, pkts_acked); 2995 do_lost = tcp_try_undo_partial(sk, pkts_acked);
2996 newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
2995 break; 2997 break;
2996 case TCP_CA_Loss: 2998 case TCP_CA_Loss:
2997 if (flag & FLAG_DATA_ACKED) 2999 if (flag & FLAG_DATA_ACKED)
@@ -3013,6 +3015,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3013 if (is_dupack) 3015 if (is_dupack)
3014 tcp_add_reno_sack(sk); 3016 tcp_add_reno_sack(sk);
3015 } 3017 }
3018 newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
3016 3019
3017 if (icsk->icsk_ca_state <= TCP_CA_Disorder) 3020 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
3018 tcp_try_undo_dsack(sk); 3021 tcp_try_undo_dsack(sk);
@@ -3590,7 +3593,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3590 int prior_packets; 3593 int prior_packets;
3591 int prior_sacked = tp->sacked_out; 3594 int prior_sacked = tp->sacked_out;
3592 int pkts_acked = 0; 3595 int pkts_acked = 0;
3593 int newly_acked_sacked = 0;
3594 bool frto_cwnd = false; 3596 bool frto_cwnd = false;
3595 3597
3596 /* If the ack is older than previous acks 3598 /* If the ack is older than previous acks
@@ -3666,8 +3668,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3666 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); 3668 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
3667 3669
3668 pkts_acked = prior_packets - tp->packets_out; 3670 pkts_acked = prior_packets - tp->packets_out;
3669 newly_acked_sacked = (prior_packets - prior_sacked) -
3670 (tp->packets_out - tp->sacked_out);
3671 3671
3672 if (tp->frto_counter) 3672 if (tp->frto_counter)
3673 frto_cwnd = tcp_process_frto(sk, flag); 3673 frto_cwnd = tcp_process_frto(sk, flag);
@@ -3681,7 +3681,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3681 tcp_may_raise_cwnd(sk, flag)) 3681 tcp_may_raise_cwnd(sk, flag))
3682 tcp_cong_avoid(sk, ack, prior_in_flight); 3682 tcp_cong_avoid(sk, ack, prior_in_flight);
3683 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); 3683 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3684 tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, 3684 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3685 is_dupack, flag); 3685 is_dupack, flag);
3686 } else { 3686 } else {
3687 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) 3687 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
@@ -3698,7 +3698,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3698no_queue: 3698no_queue:
3699 /* If data was DSACKed, see if we can undo a cwnd reduction. */ 3699 /* If data was DSACKed, see if we can undo a cwnd reduction. */
3700 if (flag & FLAG_DSACKING_ACK) 3700 if (flag & FLAG_DSACKING_ACK)
3701 tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, 3701 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3702 is_dupack, flag); 3702 is_dupack, flag);
3703 /* If this ack opens up a zero window, clear backoff. It was 3703 /* If this ack opens up a zero window, clear backoff. It was
3704 * being used to time the probes, and is probably far higher than 3704 * being used to time the probes, and is probably far higher than
@@ -3718,8 +3718,7 @@ old_ack:
3718 */ 3718 */
3719 if (TCP_SKB_CB(skb)->sacked) { 3719 if (TCP_SKB_CB(skb)->sacked) {
3720 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); 3720 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3721 newly_acked_sacked = tp->sacked_out - prior_sacked; 3721 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3722 tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
3723 is_dupack, flag); 3722 is_dupack, flag);
3724 } 3723 }
3725 3724
@@ -5392,6 +5391,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5392{ 5391{
5393 struct tcp_sock *tp = tcp_sk(sk); 5392 struct tcp_sock *tp = tcp_sk(sk);
5394 5393
5394 if (unlikely(sk->sk_rx_dst == NULL))
5395 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
5395 /* 5396 /*
5396 * Header prediction. 5397 * Header prediction.
5397 * The code loosely follows the one in the famous 5398 * The code loosely follows the one in the famous
@@ -5605,7 +5606,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5605 tcp_set_state(sk, TCP_ESTABLISHED); 5606 tcp_set_state(sk, TCP_ESTABLISHED);
5606 5607
5607 if (skb != NULL) { 5608 if (skb != NULL) {
5608 inet_sk_rx_dst_set(sk, skb); 5609 icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
5609 security_inet_conn_established(sk, skb); 5610 security_inet_conn_established(sk, skb);
5610 } 5611 }
5611 5612
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 42b2a6a73092..00a748d14062 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -417,10 +417,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
417 417
418 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ 418 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
419 tp->mtu_info = info; 419 tp->mtu_info = info;
420 if (!sock_owned_by_user(sk)) 420 if (!sock_owned_by_user(sk)) {
421 tcp_v4_mtu_reduced(sk); 421 tcp_v4_mtu_reduced(sk);
422 else 422 } else {
423 set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags); 423 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
424 sock_hold(sk);
425 }
424 goto out; 426 goto out;
425 } 427 }
426 428
@@ -1462,6 +1464,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1462 goto exit_nonewsk; 1464 goto exit_nonewsk;
1463 1465
1464 newsk->sk_gso_type = SKB_GSO_TCPV4; 1466 newsk->sk_gso_type = SKB_GSO_TCPV4;
1467 inet_sk_rx_dst_set(newsk, skb);
1465 1468
1466 newtp = tcp_sk(newsk); 1469 newtp = tcp_sk(newsk);
1467 newinet = inet_sk(newsk); 1470 newinet = inet_sk(newsk);
@@ -1627,9 +1630,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1627 sk->sk_rx_dst = NULL; 1630 sk->sk_rx_dst = NULL;
1628 } 1631 }
1629 } 1632 }
1630 if (unlikely(sk->sk_rx_dst == NULL))
1631 inet_sk_rx_dst_set(sk, skb);
1632
1633 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { 1633 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1634 rsk = sk; 1634 rsk = sk;
1635 goto reset; 1635 goto reset;
@@ -1872,10 +1872,21 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = {
1872 .twsk_destructor= tcp_twsk_destructor, 1872 .twsk_destructor= tcp_twsk_destructor,
1873}; 1873};
1874 1874
1875void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1876{
1877 struct dst_entry *dst = skb_dst(skb);
1878
1879 dst_hold(dst);
1880 sk->sk_rx_dst = dst;
1881 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1882}
1883EXPORT_SYMBOL(inet_sk_rx_dst_set);
1884
1875const struct inet_connection_sock_af_ops ipv4_specific = { 1885const struct inet_connection_sock_af_ops ipv4_specific = {
1876 .queue_xmit = ip_queue_xmit, 1886 .queue_xmit = ip_queue_xmit,
1877 .send_check = tcp_v4_send_check, 1887 .send_check = tcp_v4_send_check,
1878 .rebuild_header = inet_sk_rebuild_header, 1888 .rebuild_header = inet_sk_rebuild_header,
1889 .sk_rx_dst_set = inet_sk_rx_dst_set,
1879 .conn_request = tcp_v4_conn_request, 1890 .conn_request = tcp_v4_conn_request,
1880 .syn_recv_sock = tcp_v4_syn_recv_sock, 1891 .syn_recv_sock = tcp_v4_syn_recv_sock,
1881 .net_header_len = sizeof(struct iphdr), 1892 .net_header_len = sizeof(struct iphdr),
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 2288a6399e1e..0abe67bb4d3a 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -731,6 +731,18 @@ static int __net_init tcp_net_metrics_init(struct net *net)
731 731
732static void __net_exit tcp_net_metrics_exit(struct net *net) 732static void __net_exit tcp_net_metrics_exit(struct net *net)
733{ 733{
734 unsigned int i;
735
736 for (i = 0; i < (1U << net->ipv4.tcp_metrics_hash_log) ; i++) {
737 struct tcp_metrics_block *tm, *next;
738
739 tm = rcu_dereference_protected(net->ipv4.tcp_metrics_hash[i].chain, 1);
740 while (tm) {
741 next = rcu_dereference_protected(tm->tcpm_next, 1);
742 kfree(tm);
743 tm = next;
744 }
745 }
734 kfree(net->ipv4.tcp_metrics_hash); 746 kfree(net->ipv4.tcp_metrics_hash);
735} 747}
736 748
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 232a90c3ec86..6ff7f10dce9d 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -387,8 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
387 struct tcp_sock *oldtp = tcp_sk(sk); 387 struct tcp_sock *oldtp = tcp_sk(sk);
388 struct tcp_cookie_values *oldcvp = oldtp->cookie_values; 388 struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
389 389
390 inet_sk_rx_dst_set(newsk, skb);
391
392 /* TCP Cookie Transactions require space for the cookie pair, 390 /* TCP Cookie Transactions require space for the cookie pair,
393 * as it differs for each connection. There is no need to 391 * as it differs for each connection. There is no need to
394 * copy any s_data_payload stored at the original socket. 392 * copy any s_data_payload stored at the original socket.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3f1bcff0b10b..d04632673a9e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -910,14 +910,18 @@ void tcp_release_cb(struct sock *sk)
910 if (flags & (1UL << TCP_TSQ_DEFERRED)) 910 if (flags & (1UL << TCP_TSQ_DEFERRED))
911 tcp_tsq_handler(sk); 911 tcp_tsq_handler(sk);
912 912
913 if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) 913 if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
914 tcp_write_timer_handler(sk); 914 tcp_write_timer_handler(sk);
915 915 __sock_put(sk);
916 if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) 916 }
917 if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
917 tcp_delack_timer_handler(sk); 918 tcp_delack_timer_handler(sk);
918 919 __sock_put(sk);
919 if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) 920 }
921 if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
920 sk->sk_prot->mtu_reduced(sk); 922 sk->sk_prot->mtu_reduced(sk);
923 __sock_put(sk);
924 }
921} 925}
922EXPORT_SYMBOL(tcp_release_cb); 926EXPORT_SYMBOL(tcp_release_cb);
923 927
@@ -940,7 +944,7 @@ void __init tcp_tasklet_init(void)
940 * We cant xmit new skbs from this context, as we might already 944 * We cant xmit new skbs from this context, as we might already
941 * hold qdisc lock. 945 * hold qdisc lock.
942 */ 946 */
943void tcp_wfree(struct sk_buff *skb) 947static void tcp_wfree(struct sk_buff *skb)
944{ 948{
945 struct sock *sk = skb->sk; 949 struct sock *sk = skb->sk;
946 struct tcp_sock *tp = tcp_sk(sk); 950 struct tcp_sock *tp = tcp_sk(sk);
@@ -1522,21 +1526,21 @@ static void tcp_cwnd_validate(struct sock *sk)
1522 * when we would be allowed to send the split-due-to-Nagle skb fully. 1526 * when we would be allowed to send the split-due-to-Nagle skb fully.
1523 */ 1527 */
1524static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, 1528static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
1525 unsigned int mss_now, unsigned int cwnd) 1529 unsigned int mss_now, unsigned int max_segs)
1526{ 1530{
1527 const struct tcp_sock *tp = tcp_sk(sk); 1531 const struct tcp_sock *tp = tcp_sk(sk);
1528 u32 needed, window, cwnd_len; 1532 u32 needed, window, max_len;
1529 1533
1530 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; 1534 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1531 cwnd_len = mss_now * cwnd; 1535 max_len = mss_now * max_segs;
1532 1536
1533 if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk))) 1537 if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
1534 return cwnd_len; 1538 return max_len;
1535 1539
1536 needed = min(skb->len, window); 1540 needed = min(skb->len, window);
1537 1541
1538 if (cwnd_len <= needed) 1542 if (max_len <= needed)
1539 return cwnd_len; 1543 return max_len;
1540 1544
1541 return needed - needed % mss_now; 1545 return needed - needed % mss_now;
1542} 1546}
@@ -1765,7 +1769,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1765 limit = min(send_win, cong_win); 1769 limit = min(send_win, cong_win);
1766 1770
1767 /* If a full-sized TSO skb can be sent, do it. */ 1771 /* If a full-sized TSO skb can be sent, do it. */
1768 if (limit >= sk->sk_gso_max_size) 1772 if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
1773 sk->sk_gso_max_segs * tp->mss_cache))
1769 goto send_now; 1774 goto send_now;
1770 1775
1771 /* Middle in queue won't get any more data, full sendable already? */ 1776 /* Middle in queue won't get any more data, full sendable already? */
@@ -1999,7 +2004,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1999 limit = mss_now; 2004 limit = mss_now;
2000 if (tso_segs > 1 && !tcp_urg_mode(tp)) 2005 if (tso_segs > 1 && !tcp_urg_mode(tp))
2001 limit = tcp_mss_split_point(sk, skb, mss_now, 2006 limit = tcp_mss_split_point(sk, skb, mss_now,
2002 cwnd_quota); 2007 min_t(unsigned int,
2008 cwnd_quota,
2009 sk->sk_gso_max_segs));
2003 2010
2004 if (skb->len > limit && 2011 if (skb->len > limit &&
2005 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) 2012 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 6df36ad55a38..b774a03bd1dc 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -252,7 +252,8 @@ static void tcp_delack_timer(unsigned long data)
252 inet_csk(sk)->icsk_ack.blocked = 1; 252 inet_csk(sk)->icsk_ack.blocked = 1;
253 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); 253 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
254 /* deleguate our work to tcp_release_cb() */ 254 /* deleguate our work to tcp_release_cb() */
255 set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); 255 if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
256 sock_hold(sk);
256 } 257 }
257 bh_unlock_sock(sk); 258 bh_unlock_sock(sk);
258 sock_put(sk); 259 sock_put(sk);
@@ -481,7 +482,8 @@ static void tcp_write_timer(unsigned long data)
481 tcp_write_timer_handler(sk); 482 tcp_write_timer_handler(sk);
482 } else { 483 } else {
483 /* deleguate our work to tcp_release_cb() */ 484 /* deleguate our work to tcp_release_cb() */
484 set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); 485 if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
486 sock_hold(sk);
485 } 487 }
486 bh_unlock_sock(sk); 488 bh_unlock_sock(sk);
487 sock_put(sk); 489 sock_put(sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b4c3582a991f..6f6d1aca3c3d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -758,7 +758,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
758 uh->check = CSUM_MANGLED_0; 758 uh->check = CSUM_MANGLED_0;
759 759
760send: 760send:
761 err = ip_send_skb(skb); 761 err = ip_send_skb(sock_net(sk), skb);
762 if (err) { 762 if (err) {
763 if (err == -ENOBUFS && !inet->recverr) { 763 if (err == -ENOBUFS && !inet->recverr) {
764 UDP_INC_STATS_USER(sock_net(sk), 764 UDP_INC_STATS_USER(sock_net(sk),