aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/devinet.c3
-rw-r--r--net/ipv4/ip_tunnel.c29
-rw-r--r--net/ipv4/netfilter/Kconfig5
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c5
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c75
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_input.c18
-rw-r--r--net/ipv4/tcp_output.c15
-rw-r--r--net/ipv4/udp_offload.c17
10 files changed, 130 insertions, 40 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ac2dff3c2c1c..bdbf68bb2e2d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1443,7 +1443,8 @@ static size_t inet_nlmsg_size(void)
1443 + nla_total_size(4) /* IFA_LOCAL */ 1443 + nla_total_size(4) /* IFA_LOCAL */
1444 + nla_total_size(4) /* IFA_BROADCAST */ 1444 + nla_total_size(4) /* IFA_BROADCAST */
1445 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ 1445 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1446 + nla_total_size(4); /* IFA_FLAGS */ 1446 + nla_total_size(4) /* IFA_FLAGS */
1447 + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1447} 1448}
1448 1449
1449static inline u32 cstamp_delta(unsigned long cstamp) 1450static inline u32 cstamp_delta(unsigned long cstamp)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index bd28f386bd02..50228be5c17b 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -101,28 +101,22 @@ static void tunnel_dst_reset_all(struct ip_tunnel *t)
101 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); 101 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
102} 102}
103 103
104static struct dst_entry *tunnel_dst_get(struct ip_tunnel *t) 104static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
105{ 105{
106 struct dst_entry *dst; 106 struct dst_entry *dst;
107 107
108 rcu_read_lock(); 108 rcu_read_lock();
109 dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); 109 dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
110 if (dst) 110 if (dst) {
111 if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
112 rcu_read_unlock();
113 tunnel_dst_reset(t);
114 return NULL;
115 }
111 dst_hold(dst); 116 dst_hold(dst);
112 rcu_read_unlock();
113 return dst;
114}
115
116static struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie)
117{
118 struct dst_entry *dst = tunnel_dst_get(t);
119
120 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
121 tunnel_dst_reset(t);
122 return NULL;
123 } 117 }
124 118 rcu_read_unlock();
125 return dst; 119 return (struct rtable *)dst;
126} 120}
127 121
128/* Often modified stats are per cpu, other are shared (netdev->stats) */ 122/* Often modified stats are per cpu, other are shared (netdev->stats) */
@@ -584,7 +578,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
584 struct flowi4 fl4; 578 struct flowi4 fl4;
585 u8 tos, ttl; 579 u8 tos, ttl;
586 __be16 df; 580 __be16 df;
587 struct rtable *rt = NULL; /* Route to the other host */ 581 struct rtable *rt; /* Route to the other host */
588 unsigned int max_headroom; /* The extra header space needed */ 582 unsigned int max_headroom; /* The extra header space needed */
589 __be32 dst; 583 __be32 dst;
590 int err; 584 int err;
@@ -657,8 +651,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
657 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, 651 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
658 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); 652 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
659 653
660 if (connected) 654 rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
661 rt = (struct rtable *)tunnel_dst_check(tunnel, 0);
662 655
663 if (!rt) { 656 if (!rt) {
664 rt = ip_route_output_key(tunnel->net, &fl4); 657 rt = ip_route_output_key(tunnel->net, &fl4);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 81c6910cfa92..a26ce035e3fa 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -61,6 +61,11 @@ config NFT_CHAIN_NAT_IPV4
61 packet transformations such as the source, destination address and 61 packet transformations such as the source, destination address and
62 source and destination ports. 62 source and destination ports.
63 63
64config NFT_REJECT_IPV4
65 depends on NF_TABLES_IPV4
66 default NFT_REJECT
67 tristate
68
64config NF_TABLES_ARP 69config NF_TABLES_ARP
65 depends on NF_TABLES 70 depends on NF_TABLES
66 tristate "ARP nf_tables support" 71 tristate "ARP nf_tables support"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index c16be9d58420..90b82405331e 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
30obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o 30obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
31obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o 31obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
32obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o 32obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
33obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
33obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o 34obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
34 35
35# generic IP tables 36# generic IP tables
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 9eea059dd621..574f7ebba0b6 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -229,7 +229,10 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
229 ret = nf_ct_expect_related(rtcp_exp); 229 ret = nf_ct_expect_related(rtcp_exp);
230 if (ret == 0) 230 if (ret == 0)
231 break; 231 break;
232 else if (ret != -EBUSY) { 232 else if (ret == -EBUSY) {
233 nf_ct_unexpect_related(rtp_exp);
234 continue;
235 } else if (ret < 0) {
233 nf_ct_unexpect_related(rtp_exp); 236 nf_ct_unexpect_related(rtp_exp);
234 nated_port = 0; 237 nated_port = 0;
235 break; 238 break;
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
new file mode 100644
index 000000000000..e79718a382f2
--- /dev/null
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -0,0 +1,75 @@
1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 * Copyright (c) 2013 Eric Leblond <eric@regit.org>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * Development of this code funded by Astaro AG (http://www.astaro.com/)
10 */
11
12#include <linux/kernel.h>
13#include <linux/init.h>
14#include <linux/module.h>
15#include <linux/netlink.h>
16#include <linux/netfilter.h>
17#include <linux/netfilter/nf_tables.h>
18#include <net/netfilter/nf_tables.h>
19#include <net/icmp.h>
20#include <net/netfilter/ipv4/nf_reject.h>
21#include <net/netfilter/nft_reject.h>
22
23void nft_reject_ipv4_eval(const struct nft_expr *expr,
24 struct nft_data data[NFT_REG_MAX + 1],
25 const struct nft_pktinfo *pkt)
26{
27 struct nft_reject *priv = nft_expr_priv(expr);
28
29 switch (priv->type) {
30 case NFT_REJECT_ICMP_UNREACH:
31 nf_send_unreach(pkt->skb, priv->icmp_code);
32 break;
33 case NFT_REJECT_TCP_RST:
34 nf_send_reset(pkt->skb, pkt->ops->hooknum);
35 break;
36 }
37
38 data[NFT_REG_VERDICT].verdict = NF_DROP;
39}
40EXPORT_SYMBOL_GPL(nft_reject_ipv4_eval);
41
42static struct nft_expr_type nft_reject_ipv4_type;
43static const struct nft_expr_ops nft_reject_ipv4_ops = {
44 .type = &nft_reject_ipv4_type,
45 .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
46 .eval = nft_reject_ipv4_eval,
47 .init = nft_reject_init,
48 .dump = nft_reject_dump,
49};
50
51static struct nft_expr_type nft_reject_ipv4_type __read_mostly = {
52 .family = NFPROTO_IPV4,
53 .name = "reject",
54 .ops = &nft_reject_ipv4_ops,
55 .policy = nft_reject_policy,
56 .maxattr = NFTA_REJECT_MAX,
57 .owner = THIS_MODULE,
58};
59
60static int __init nft_reject_ipv4_module_init(void)
61{
62 return nft_register_expr(&nft_reject_ipv4_type);
63}
64
65static void __exit nft_reject_ipv4_module_exit(void)
66{
67 nft_unregister_expr(&nft_reject_ipv4_type);
68}
69
70module_init(nft_reject_ipv4_module_init);
71module_exit(nft_reject_ipv4_module_exit);
72
73MODULE_LICENSE("GPL");
74MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
75MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "reject");
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4475b3bb494d..9f3a2db9109e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2229,7 +2229,7 @@ adjudge_to_death:
2229 /* This is a (useful) BSD violating of the RFC. There is a 2229 /* This is a (useful) BSD violating of the RFC. There is a
2230 * problem with TCP as specified in that the other end could 2230 * problem with TCP as specified in that the other end could
2231 * keep a socket open forever with no application left this end. 2231 * keep a socket open forever with no application left this end.
2232 * We use a 3 minute timeout (about the same as BSD) then kill 2232 * We use a 1 minute timeout (about the same as BSD) then kill
2233 * our end. If they send after that then tough - BUT: long enough 2233 * our end. If they send after that then tough - BUT: long enough
2234 * that we won't make the old 4*rto = almost no time - whoops 2234 * that we won't make the old 4*rto = almost no time - whoops
2235 * reset mistake. 2235 * reset mistake.
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 65cf90e063d5..227cba79fa6b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -671,6 +671,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
671{ 671{
672 struct tcp_sock *tp = tcp_sk(sk); 672 struct tcp_sock *tp = tcp_sk(sk);
673 long m = mrtt; /* RTT */ 673 long m = mrtt; /* RTT */
674 u32 srtt = tp->srtt;
674 675
675 /* The following amusing code comes from Jacobson's 676 /* The following amusing code comes from Jacobson's
676 * article in SIGCOMM '88. Note that rtt and mdev 677 * article in SIGCOMM '88. Note that rtt and mdev
@@ -688,11 +689,9 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
688 * does not matter how to _calculate_ it. Seems, it was trap 689 * does not matter how to _calculate_ it. Seems, it was trap
689 * that VJ failed to avoid. 8) 690 * that VJ failed to avoid. 8)
690 */ 691 */
691 if (m == 0) 692 if (srtt != 0) {
692 m = 1; 693 m -= (srtt >> 3); /* m is now error in rtt est */
693 if (tp->srtt != 0) { 694 srtt += m; /* rtt = 7/8 rtt + 1/8 new */
694 m -= (tp->srtt >> 3); /* m is now error in rtt est */
695 tp->srtt += m; /* rtt = 7/8 rtt + 1/8 new */
696 if (m < 0) { 695 if (m < 0) {
697 m = -m; /* m is now abs(error) */ 696 m = -m; /* m is now abs(error) */
698 m -= (tp->mdev >> 2); /* similar update on mdev */ 697 m -= (tp->mdev >> 2); /* similar update on mdev */
@@ -723,11 +722,12 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
723 } 722 }
724 } else { 723 } else {
725 /* no previous measure. */ 724 /* no previous measure. */
726 tp->srtt = m << 3; /* take the measured time to be rtt */ 725 srtt = m << 3; /* take the measured time to be rtt */
727 tp->mdev = m << 1; /* make sure rto = 3*rtt */ 726 tp->mdev = m << 1; /* make sure rto = 3*rtt */
728 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); 727 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
729 tp->rtt_seq = tp->snd_nxt; 728 tp->rtt_seq = tp->snd_nxt;
730 } 729 }
730 tp->srtt = max(1U, srtt);
731} 731}
732 732
733/* Set the sk_pacing_rate to allow proper sizing of TSO packets. 733/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
@@ -746,8 +746,10 @@ static void tcp_update_pacing_rate(struct sock *sk)
746 746
747 rate *= max(tp->snd_cwnd, tp->packets_out); 747 rate *= max(tp->snd_cwnd, tp->packets_out);
748 748
749 /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3), 749 /* Correction for small srtt and scheduling constraints.
750 * be conservative and assume srtt = 1 (125 us instead of 1.25 ms) 750 * For small rtt, consider noise is too high, and use
751 * the minimal value (srtt = 1 -> 125 us for HZ=1000)
752 *
751 * We probably need usec resolution in the future. 753 * We probably need usec resolution in the future.
752 * Note: This also takes care of possible srtt=0 case, 754 * Note: This also takes care of possible srtt=0 case,
753 * when tcp_rtt_estimator() was not yet called. 755 * when tcp_rtt_estimator() was not yet called.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 03d26b85eab8..3be16727f058 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -698,7 +698,8 @@ static void tcp_tsq_handler(struct sock *sk)
698 if ((1 << sk->sk_state) & 698 if ((1 << sk->sk_state) &
699 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | 699 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
700 TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) 700 TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
701 tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC); 701 tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle,
702 0, GFP_ATOMIC);
702} 703}
703/* 704/*
704 * One tasklet per cpu tries to send more skbs. 705 * One tasklet per cpu tries to send more skbs.
@@ -1904,7 +1905,15 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1904 1905
1905 if (atomic_read(&sk->sk_wmem_alloc) > limit) { 1906 if (atomic_read(&sk->sk_wmem_alloc) > limit) {
1906 set_bit(TSQ_THROTTLED, &tp->tsq_flags); 1907 set_bit(TSQ_THROTTLED, &tp->tsq_flags);
1907 break; 1908 /* It is possible TX completion already happened
1909 * before we set TSQ_THROTTLED, so we must
1910 * test again the condition.
1911 * We abuse smp_mb__after_clear_bit() because
1912 * there is no smp_mb__after_set_bit() yet
1913 */
1914 smp_mb__after_clear_bit();
1915 if (atomic_read(&sk->sk_wmem_alloc) > limit)
1916 break;
1908 } 1917 }
1909 1918
1910 limit = mss_now; 1919 limit = mss_now;
@@ -1977,7 +1986,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
1977 /* Schedule a loss probe in 2*RTT for SACK capable connections 1986 /* Schedule a loss probe in 2*RTT for SACK capable connections
1978 * in Open state, that are either limited by cwnd or application. 1987 * in Open state, that are either limited by cwnd or application.
1979 */ 1988 */
1980 if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out || 1989 if (sysctl_tcp_early_retrans < 3 || !tp->srtt || !tp->packets_out ||
1981 !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) 1990 !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
1982 return false; 1991 return false;
1983 1992
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 25f5cee3a08a..88b4023ecfcf 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -17,6 +17,8 @@
17static DEFINE_SPINLOCK(udp_offload_lock); 17static DEFINE_SPINLOCK(udp_offload_lock);
18static struct udp_offload_priv __rcu *udp_offload_base __read_mostly; 18static struct udp_offload_priv __rcu *udp_offload_base __read_mostly;
19 19
20#define udp_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&udp_offload_lock))
21
20struct udp_offload_priv { 22struct udp_offload_priv {
21 struct udp_offload *offload; 23 struct udp_offload *offload;
22 struct rcu_head rcu; 24 struct rcu_head rcu;
@@ -100,8 +102,7 @@ out:
100 102
101int udp_add_offload(struct udp_offload *uo) 103int udp_add_offload(struct udp_offload *uo)
102{ 104{
103 struct udp_offload_priv __rcu **head = &udp_offload_base; 105 struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC);
104 struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_KERNEL);
105 106
106 if (!new_offload) 107 if (!new_offload)
107 return -ENOMEM; 108 return -ENOMEM;
@@ -109,8 +110,8 @@ int udp_add_offload(struct udp_offload *uo)
109 new_offload->offload = uo; 110 new_offload->offload = uo;
110 111
111 spin_lock(&udp_offload_lock); 112 spin_lock(&udp_offload_lock);
112 rcu_assign_pointer(new_offload->next, rcu_dereference(*head)); 113 new_offload->next = udp_offload_base;
113 rcu_assign_pointer(*head, new_offload); 114 rcu_assign_pointer(udp_offload_base, new_offload);
114 spin_unlock(&udp_offload_lock); 115 spin_unlock(&udp_offload_lock);
115 116
116 return 0; 117 return 0;
@@ -130,12 +131,12 @@ void udp_del_offload(struct udp_offload *uo)
130 131
131 spin_lock(&udp_offload_lock); 132 spin_lock(&udp_offload_lock);
132 133
133 uo_priv = rcu_dereference(*head); 134 uo_priv = udp_deref_protected(*head);
134 for (; uo_priv != NULL; 135 for (; uo_priv != NULL;
135 uo_priv = rcu_dereference(*head)) { 136 uo_priv = udp_deref_protected(*head)) {
136
137 if (uo_priv->offload == uo) { 137 if (uo_priv->offload == uo) {
138 rcu_assign_pointer(*head, rcu_dereference(uo_priv->next)); 138 rcu_assign_pointer(*head,
139 udp_deref_protected(uo_priv->next));
139 goto unlock; 140 goto unlock;
140 } 141 }
141 head = &uo_priv->next; 142 head = &uo_priv->next;