aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/devinet.c3
-rw-r--r--net/ipv4/ip_forward.c71
-rw-r--r--net/ipv4/ip_tunnel.c29
-rw-r--r--net/ipv4/ipconfig.c2
-rw-r--r--net/ipv4/netfilter/Kconfig5
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c5
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c75
-rw-r--r--net/ipv4/route.c13
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_input.c18
-rw-r--r--net/ipv4/tcp_output.c15
-rw-r--r--net/ipv4/udp_offload.c17
13 files changed, 209 insertions, 47 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ac2dff3c2c1c..bdbf68bb2e2d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1443,7 +1443,8 @@ static size_t inet_nlmsg_size(void)
1443 + nla_total_size(4) /* IFA_LOCAL */ 1443 + nla_total_size(4) /* IFA_LOCAL */
1444 + nla_total_size(4) /* IFA_BROADCAST */ 1444 + nla_total_size(4) /* IFA_BROADCAST */
1445 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ 1445 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1446 + nla_total_size(4); /* IFA_FLAGS */ 1446 + nla_total_size(4) /* IFA_FLAGS */
1447 + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1447} 1448}
1448 1449
1449static inline u32 cstamp_delta(unsigned long cstamp) 1450static inline u32 cstamp_delta(unsigned long cstamp)
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index e9f1217a8afd..f3869c186d97 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -39,6 +39,71 @@
39#include <net/route.h> 39#include <net/route.h>
40#include <net/xfrm.h> 40#include <net/xfrm.h>
41 41
42static bool ip_may_fragment(const struct sk_buff *skb)
43{
44 return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ||
45 !skb->local_df;
46}
47
48static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
49{
50 if (skb->len <= mtu || skb->local_df)
51 return false;
52
53 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
54 return false;
55
56 return true;
57}
58
59static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb)
60{
61 unsigned int mtu;
62
63 if (skb->local_df || !skb_is_gso(skb))
64 return false;
65
66 mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true);
67
68 /* if seglen > mtu, do software segmentation for IP fragmentation on
69 * output. DF bit cannot be set since ip_forward would have sent
70 * icmp error.
71 */
72 return skb_gso_network_seglen(skb) > mtu;
73}
74
75/* called if GSO skb needs to be fragmented on forward */
76static int ip_forward_finish_gso(struct sk_buff *skb)
77{
78 struct dst_entry *dst = skb_dst(skb);
79 netdev_features_t features;
80 struct sk_buff *segs;
81 int ret = 0;
82
83 features = netif_skb_dev_features(skb, dst->dev);
84 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
85 if (IS_ERR(segs)) {
86 kfree_skb(skb);
87 return -ENOMEM;
88 }
89
90 consume_skb(skb);
91
92 do {
93 struct sk_buff *nskb = segs->next;
94 int err;
95
96 segs->next = NULL;
97 err = dst_output(segs);
98
99 if (err && ret == 0)
100 ret = err;
101 segs = nskb;
102 } while (segs);
103
104 return ret;
105}
106
42static int ip_forward_finish(struct sk_buff *skb) 107static int ip_forward_finish(struct sk_buff *skb)
43{ 108{
44 struct ip_options *opt = &(IPCB(skb)->opt); 109 struct ip_options *opt = &(IPCB(skb)->opt);
@@ -49,6 +114,9 @@ static int ip_forward_finish(struct sk_buff *skb)
49 if (unlikely(opt->optlen)) 114 if (unlikely(opt->optlen))
50 ip_forward_options(skb); 115 ip_forward_options(skb);
51 116
117 if (ip_gso_exceeds_dst_mtu(skb))
118 return ip_forward_finish_gso(skb);
119
52 return dst_output(skb); 120 return dst_output(skb);
53} 121}
54 122
@@ -91,8 +159,7 @@ int ip_forward(struct sk_buff *skb)
91 159
92 IPCB(skb)->flags |= IPSKB_FORWARDED; 160 IPCB(skb)->flags |= IPSKB_FORWARDED;
93 mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); 161 mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
94 if (unlikely(skb->len > mtu && !skb_is_gso(skb) && 162 if (!ip_may_fragment(skb) && ip_exceeds_mtu(skb, mtu)) {
95 (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
96 IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS); 163 IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
97 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 164 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
98 htonl(mtu)); 165 htonl(mtu));
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index bd28f386bd02..50228be5c17b 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -101,28 +101,22 @@ static void tunnel_dst_reset_all(struct ip_tunnel *t)
101 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); 101 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
102} 102}
103 103
104static struct dst_entry *tunnel_dst_get(struct ip_tunnel *t) 104static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
105{ 105{
106 struct dst_entry *dst; 106 struct dst_entry *dst;
107 107
108 rcu_read_lock(); 108 rcu_read_lock();
109 dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); 109 dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
110 if (dst) 110 if (dst) {
111 if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
112 rcu_read_unlock();
113 tunnel_dst_reset(t);
114 return NULL;
115 }
111 dst_hold(dst); 116 dst_hold(dst);
112 rcu_read_unlock();
113 return dst;
114}
115
116static struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie)
117{
118 struct dst_entry *dst = tunnel_dst_get(t);
119
120 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
121 tunnel_dst_reset(t);
122 return NULL;
123 } 117 }
124 118 rcu_read_unlock();
125 return dst; 119 return (struct rtable *)dst;
126} 120}
127 121
128/* Often modified stats are per cpu, other are shared (netdev->stats) */ 122/* Often modified stats are per cpu, other are shared (netdev->stats) */
@@ -584,7 +578,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
584 struct flowi4 fl4; 578 struct flowi4 fl4;
585 u8 tos, ttl; 579 u8 tos, ttl;
586 __be16 df; 580 __be16 df;
587 struct rtable *rt = NULL; /* Route to the other host */ 581 struct rtable *rt; /* Route to the other host */
588 unsigned int max_headroom; /* The extra header space needed */ 582 unsigned int max_headroom; /* The extra header space needed */
589 __be32 dst; 583 __be32 dst;
590 int err; 584 int err;
@@ -657,8 +651,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
657 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, 651 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
658 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); 652 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
659 653
660 if (connected) 654 rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
661 rt = (struct rtable *)tunnel_dst_check(tunnel, 0);
662 655
663 if (!rt) { 656 if (!rt) {
664 rt = ip_route_output_key(tunnel->net, &fl4); 657 rt = ip_route_output_key(tunnel->net, &fl4);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index efa1138fa523..b3e86ea7b71b 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -273,7 +273,7 @@ static int __init ic_open_devs(void)
273 273
274 msleep(1); 274 msleep(1);
275 275
276 if time_before(jiffies, next_msg) 276 if (time_before(jiffies, next_msg))
277 continue; 277 continue;
278 278
279 elapsed = jiffies_to_msecs(jiffies - start); 279 elapsed = jiffies_to_msecs(jiffies - start);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 81c6910cfa92..a26ce035e3fa 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -61,6 +61,11 @@ config NFT_CHAIN_NAT_IPV4
61 packet transformations such as the source, destination address and 61 packet transformations such as the source, destination address and
62 source and destination ports. 62 source and destination ports.
63 63
64config NFT_REJECT_IPV4
65 depends on NF_TABLES_IPV4
66 default NFT_REJECT
67 tristate
68
64config NF_TABLES_ARP 69config NF_TABLES_ARP
65 depends on NF_TABLES 70 depends on NF_TABLES
66 tristate "ARP nf_tables support" 71 tristate "ARP nf_tables support"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index c16be9d58420..90b82405331e 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
30obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o 30obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
31obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o 31obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
32obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o 32obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
33obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
33obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o 34obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
34 35
35# generic IP tables 36# generic IP tables
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 9eea059dd621..574f7ebba0b6 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -229,7 +229,10 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
229 ret = nf_ct_expect_related(rtcp_exp); 229 ret = nf_ct_expect_related(rtcp_exp);
230 if (ret == 0) 230 if (ret == 0)
231 break; 231 break;
232 else if (ret != -EBUSY) { 232 else if (ret == -EBUSY) {
233 nf_ct_unexpect_related(rtp_exp);
234 continue;
235 } else if (ret < 0) {
233 nf_ct_unexpect_related(rtp_exp); 236 nf_ct_unexpect_related(rtp_exp);
234 nated_port = 0; 237 nated_port = 0;
235 break; 238 break;
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
new file mode 100644
index 000000000000..e79718a382f2
--- /dev/null
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -0,0 +1,75 @@
1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 * Copyright (c) 2013 Eric Leblond <eric@regit.org>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * Development of this code funded by Astaro AG (http://www.astaro.com/)
10 */
11
12#include <linux/kernel.h>
13#include <linux/init.h>
14#include <linux/module.h>
15#include <linux/netlink.h>
16#include <linux/netfilter.h>
17#include <linux/netfilter/nf_tables.h>
18#include <net/netfilter/nf_tables.h>
19#include <net/icmp.h>
20#include <net/netfilter/ipv4/nf_reject.h>
21#include <net/netfilter/nft_reject.h>
22
23void nft_reject_ipv4_eval(const struct nft_expr *expr,
24 struct nft_data data[NFT_REG_MAX + 1],
25 const struct nft_pktinfo *pkt)
26{
27 struct nft_reject *priv = nft_expr_priv(expr);
28
29 switch (priv->type) {
30 case NFT_REJECT_ICMP_UNREACH:
31 nf_send_unreach(pkt->skb, priv->icmp_code);
32 break;
33 case NFT_REJECT_TCP_RST:
34 nf_send_reset(pkt->skb, pkt->ops->hooknum);
35 break;
36 }
37
38 data[NFT_REG_VERDICT].verdict = NF_DROP;
39}
40EXPORT_SYMBOL_GPL(nft_reject_ipv4_eval);
41
42static struct nft_expr_type nft_reject_ipv4_type;
43static const struct nft_expr_ops nft_reject_ipv4_ops = {
44 .type = &nft_reject_ipv4_type,
45 .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
46 .eval = nft_reject_ipv4_eval,
47 .init = nft_reject_init,
48 .dump = nft_reject_dump,
49};
50
51static struct nft_expr_type nft_reject_ipv4_type __read_mostly = {
52 .family = NFPROTO_IPV4,
53 .name = "reject",
54 .ops = &nft_reject_ipv4_ops,
55 .policy = nft_reject_policy,
56 .maxattr = NFTA_REJECT_MAX,
57 .owner = THIS_MODULE,
58};
59
60static int __init nft_reject_ipv4_module_init(void)
61{
62 return nft_register_expr(&nft_reject_ipv4_type);
63}
64
65static void __exit nft_reject_ipv4_module_exit(void)
66{
67 nft_unregister_expr(&nft_reject_ipv4_type);
68}
69
70module_init(nft_reject_ipv4_module_init);
71module_exit(nft_reject_ipv4_module_exit);
72
73MODULE_LICENSE("GPL");
74MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
75MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "reject");
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 25071b48921c..4c011ec69ed4 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1597,6 +1597,7 @@ static int __mkroute_input(struct sk_buff *skb,
1597 rth->rt_gateway = 0; 1597 rth->rt_gateway = 0;
1598 rth->rt_uses_gateway = 0; 1598 rth->rt_uses_gateway = 0;
1599 INIT_LIST_HEAD(&rth->rt_uncached); 1599 INIT_LIST_HEAD(&rth->rt_uncached);
1600 RT_CACHE_STAT_INC(in_slow_tot);
1600 1601
1601 rth->dst.input = ip_forward; 1602 rth->dst.input = ip_forward;
1602 rth->dst.output = ip_output; 1603 rth->dst.output = ip_output;
@@ -1695,10 +1696,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1695 fl4.daddr = daddr; 1696 fl4.daddr = daddr;
1696 fl4.saddr = saddr; 1697 fl4.saddr = saddr;
1697 err = fib_lookup(net, &fl4, &res); 1698 err = fib_lookup(net, &fl4, &res);
1698 if (err != 0) 1699 if (err != 0) {
1700 if (!IN_DEV_FORWARD(in_dev))
1701 err = -EHOSTUNREACH;
1699 goto no_route; 1702 goto no_route;
1700 1703 }
1701 RT_CACHE_STAT_INC(in_slow_tot);
1702 1704
1703 if (res.type == RTN_BROADCAST) 1705 if (res.type == RTN_BROADCAST)
1704 goto brd_input; 1706 goto brd_input;
@@ -1712,8 +1714,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1712 goto local_input; 1714 goto local_input;
1713 } 1715 }
1714 1716
1715 if (!IN_DEV_FORWARD(in_dev)) 1717 if (!IN_DEV_FORWARD(in_dev)) {
1718 err = -EHOSTUNREACH;
1716 goto no_route; 1719 goto no_route;
1720 }
1717 if (res.type != RTN_UNICAST) 1721 if (res.type != RTN_UNICAST)
1718 goto martian_destination; 1722 goto martian_destination;
1719 1723
@@ -1768,6 +1772,7 @@ local_input:
1768 rth->rt_gateway = 0; 1772 rth->rt_gateway = 0;
1769 rth->rt_uses_gateway = 0; 1773 rth->rt_uses_gateway = 0;
1770 INIT_LIST_HEAD(&rth->rt_uncached); 1774 INIT_LIST_HEAD(&rth->rt_uncached);
1775 RT_CACHE_STAT_INC(in_slow_tot);
1771 if (res.type == RTN_UNREACHABLE) { 1776 if (res.type == RTN_UNREACHABLE) {
1772 rth->dst.input= ip_error; 1777 rth->dst.input= ip_error;
1773 rth->dst.error= -err; 1778 rth->dst.error= -err;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4475b3bb494d..9f3a2db9109e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2229,7 +2229,7 @@ adjudge_to_death:
2229 /* This is a (useful) BSD violating of the RFC. There is a 2229 /* This is a (useful) BSD violating of the RFC. There is a
2230 * problem with TCP as specified in that the other end could 2230 * problem with TCP as specified in that the other end could
2231 * keep a socket open forever with no application left this end. 2231 * keep a socket open forever with no application left this end.
2232 * We use a 3 minute timeout (about the same as BSD) then kill 2232 * We use a 1 minute timeout (about the same as BSD) then kill
2233 * our end. If they send after that then tough - BUT: long enough 2233 * our end. If they send after that then tough - BUT: long enough
2234 * that we won't make the old 4*rto = almost no time - whoops 2234 * that we won't make the old 4*rto = almost no time - whoops
2235 * reset mistake. 2235 * reset mistake.
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 65cf90e063d5..227cba79fa6b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -671,6 +671,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
671{ 671{
672 struct tcp_sock *tp = tcp_sk(sk); 672 struct tcp_sock *tp = tcp_sk(sk);
673 long m = mrtt; /* RTT */ 673 long m = mrtt; /* RTT */
674 u32 srtt = tp->srtt;
674 675
675 /* The following amusing code comes from Jacobson's 676 /* The following amusing code comes from Jacobson's
676 * article in SIGCOMM '88. Note that rtt and mdev 677 * article in SIGCOMM '88. Note that rtt and mdev
@@ -688,11 +689,9 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
688 * does not matter how to _calculate_ it. Seems, it was trap 689 * does not matter how to _calculate_ it. Seems, it was trap
689 * that VJ failed to avoid. 8) 690 * that VJ failed to avoid. 8)
690 */ 691 */
691 if (m == 0) 692 if (srtt != 0) {
692 m = 1; 693 m -= (srtt >> 3); /* m is now error in rtt est */
693 if (tp->srtt != 0) { 694 srtt += m; /* rtt = 7/8 rtt + 1/8 new */
694 m -= (tp->srtt >> 3); /* m is now error in rtt est */
695 tp->srtt += m; /* rtt = 7/8 rtt + 1/8 new */
696 if (m < 0) { 695 if (m < 0) {
697 m = -m; /* m is now abs(error) */ 696 m = -m; /* m is now abs(error) */
698 m -= (tp->mdev >> 2); /* similar update on mdev */ 697 m -= (tp->mdev >> 2); /* similar update on mdev */
@@ -723,11 +722,12 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
723 } 722 }
724 } else { 723 } else {
725 /* no previous measure. */ 724 /* no previous measure. */
726 tp->srtt = m << 3; /* take the measured time to be rtt */ 725 srtt = m << 3; /* take the measured time to be rtt */
727 tp->mdev = m << 1; /* make sure rto = 3*rtt */ 726 tp->mdev = m << 1; /* make sure rto = 3*rtt */
728 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); 727 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
729 tp->rtt_seq = tp->snd_nxt; 728 tp->rtt_seq = tp->snd_nxt;
730 } 729 }
730 tp->srtt = max(1U, srtt);
731} 731}
732 732
733/* Set the sk_pacing_rate to allow proper sizing of TSO packets. 733/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
@@ -746,8 +746,10 @@ static void tcp_update_pacing_rate(struct sock *sk)
746 746
747 rate *= max(tp->snd_cwnd, tp->packets_out); 747 rate *= max(tp->snd_cwnd, tp->packets_out);
748 748
749 /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3), 749 /* Correction for small srtt and scheduling constraints.
750 * be conservative and assume srtt = 1 (125 us instead of 1.25 ms) 750 * For small rtt, consider noise is too high, and use
751 * the minimal value (srtt = 1 -> 125 us for HZ=1000)
752 *
751 * We probably need usec resolution in the future. 753 * We probably need usec resolution in the future.
752 * Note: This also takes care of possible srtt=0 case, 754 * Note: This also takes care of possible srtt=0 case,
753 * when tcp_rtt_estimator() was not yet called. 755 * when tcp_rtt_estimator() was not yet called.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 03d26b85eab8..3be16727f058 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -698,7 +698,8 @@ static void tcp_tsq_handler(struct sock *sk)
698 if ((1 << sk->sk_state) & 698 if ((1 << sk->sk_state) &
699 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | 699 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
700 TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) 700 TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
701 tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC); 701 tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle,
702 0, GFP_ATOMIC);
702} 703}
703/* 704/*
704 * One tasklet per cpu tries to send more skbs. 705 * One tasklet per cpu tries to send more skbs.
@@ -1904,7 +1905,15 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1904 1905
1905 if (atomic_read(&sk->sk_wmem_alloc) > limit) { 1906 if (atomic_read(&sk->sk_wmem_alloc) > limit) {
1906 set_bit(TSQ_THROTTLED, &tp->tsq_flags); 1907 set_bit(TSQ_THROTTLED, &tp->tsq_flags);
1907 break; 1908 /* It is possible TX completion already happened
1909 * before we set TSQ_THROTTLED, so we must
1910 * test again the condition.
1911 * We abuse smp_mb__after_clear_bit() because
1912 * there is no smp_mb__after_set_bit() yet
1913 */
1914 smp_mb__after_clear_bit();
1915 if (atomic_read(&sk->sk_wmem_alloc) > limit)
1916 break;
1908 } 1917 }
1909 1918
1910 limit = mss_now; 1919 limit = mss_now;
@@ -1977,7 +1986,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
1977 /* Schedule a loss probe in 2*RTT for SACK capable connections 1986 /* Schedule a loss probe in 2*RTT for SACK capable connections
1978 * in Open state, that are either limited by cwnd or application. 1987 * in Open state, that are either limited by cwnd or application.
1979 */ 1988 */
1980 if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out || 1989 if (sysctl_tcp_early_retrans < 3 || !tp->srtt || !tp->packets_out ||
1981 !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) 1990 !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
1982 return false; 1991 return false;
1983 1992
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 25f5cee3a08a..88b4023ecfcf 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -17,6 +17,8 @@
17static DEFINE_SPINLOCK(udp_offload_lock); 17static DEFINE_SPINLOCK(udp_offload_lock);
18static struct udp_offload_priv __rcu *udp_offload_base __read_mostly; 18static struct udp_offload_priv __rcu *udp_offload_base __read_mostly;
19 19
20#define udp_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&udp_offload_lock))
21
20struct udp_offload_priv { 22struct udp_offload_priv {
21 struct udp_offload *offload; 23 struct udp_offload *offload;
22 struct rcu_head rcu; 24 struct rcu_head rcu;
@@ -100,8 +102,7 @@ out:
100 102
101int udp_add_offload(struct udp_offload *uo) 103int udp_add_offload(struct udp_offload *uo)
102{ 104{
103 struct udp_offload_priv __rcu **head = &udp_offload_base; 105 struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC);
104 struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_KERNEL);
105 106
106 if (!new_offload) 107 if (!new_offload)
107 return -ENOMEM; 108 return -ENOMEM;
@@ -109,8 +110,8 @@ int udp_add_offload(struct udp_offload *uo)
109 new_offload->offload = uo; 110 new_offload->offload = uo;
110 111
111 spin_lock(&udp_offload_lock); 112 spin_lock(&udp_offload_lock);
112 rcu_assign_pointer(new_offload->next, rcu_dereference(*head)); 113 new_offload->next = udp_offload_base;
113 rcu_assign_pointer(*head, new_offload); 114 rcu_assign_pointer(udp_offload_base, new_offload);
114 spin_unlock(&udp_offload_lock); 115 spin_unlock(&udp_offload_lock);
115 116
116 return 0; 117 return 0;
@@ -130,12 +131,12 @@ void udp_del_offload(struct udp_offload *uo)
130 131
131 spin_lock(&udp_offload_lock); 132 spin_lock(&udp_offload_lock);
132 133
133 uo_priv = rcu_dereference(*head); 134 uo_priv = udp_deref_protected(*head);
134 for (; uo_priv != NULL; 135 for (; uo_priv != NULL;
135 uo_priv = rcu_dereference(*head)) { 136 uo_priv = udp_deref_protected(*head)) {
136
137 if (uo_priv->offload == uo) { 137 if (uo_priv->offload == uo) {
138 rcu_assign_pointer(*head, rcu_dereference(uo_priv->next)); 138 rcu_assign_pointer(*head,
139 udp_deref_protected(uo_priv->next));
139 goto unlock; 140 goto unlock;
140 } 141 }
141 head = &uo_priv->next; 142 head = &uo_priv->next;