aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/ah4.c2
-rw-r--r--net/ipv4/devinet.c7
-rw-r--r--net/ipv4/esp4.c2
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/ipcomp.c6
-rw-r--r--net/ipv4/sysctl_net_ipv4.c14
-rw-r--r--net/ipv4/tcp.c14
-rw-r--r--net/ipv4/tcp_input.c12
-rw-r--r--net/ipv4/tcp_timer.c21
9 files changed, 73 insertions, 7 deletions
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 7ed3e4ae93ae..987b47dc69ad 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -393,7 +393,7 @@ static void ah4_err(struct sk_buff *skb, u32 info)
393 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) 393 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
394 return; 394 return;
395 395
396 x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); 396 x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET);
397 if (!x) 397 if (!x)
398 return; 398 return;
399 printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", 399 printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n",
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 014982b61564..51ca946e3392 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1317,14 +1317,19 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
1317{ 1317{
1318 int *valp = ctl->data; 1318 int *valp = ctl->data;
1319 int val = *valp; 1319 int val = *valp;
1320 loff_t pos = *ppos;
1320 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1321 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1321 1322
1322 if (write && *valp != val) { 1323 if (write && *valp != val) {
1323 struct net *net = ctl->extra2; 1324 struct net *net = ctl->extra2;
1324 1325
1325 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { 1326 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1326 if (!rtnl_trylock()) 1327 if (!rtnl_trylock()) {
1328 /* Restore the original values before restarting */
1329 *valp = val;
1330 *ppos = pos;
1327 return restart_syscall(); 1331 return restart_syscall();
1332 }
1328 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { 1333 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1329 inet_forward_change(net); 1334 inet_forward_change(net);
1330 } else if (*valp) { 1335 } else if (*valp) {
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 1948895beb6d..14ca1f1c3fb0 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -422,7 +422,7 @@ static void esp4_err(struct sk_buff *skb, u32 info)
422 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) 422 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
423 return; 423 return;
424 424
425 x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); 425 x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET);
426 if (!x) 426 if (!x)
427 return; 427 return;
428 NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", 428 NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a2a5983dbf03..c0c5274d0271 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -793,7 +793,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
793 } 793 }
794 794
795 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { 795 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
796 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); 796 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
797 ip_rt_put(rt); 797 ip_rt_put(rt);
798 goto tx_error; 798 goto tx_error;
799 } 799 }
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 83ed71500898..629067571f02 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -36,7 +36,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
36 return; 36 return;
37 37
38 spi = htonl(ntohs(ipch->cpi)); 38 spi = htonl(ntohs(ipch->cpi));
39 x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, 39 x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr,
40 spi, IPPROTO_COMP, AF_INET); 40 spi, IPPROTO_COMP, AF_INET);
41 if (!x) 41 if (!x)
42 return; 42 return;
@@ -63,6 +63,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
63 t->props.mode = x->props.mode; 63 t->props.mode = x->props.mode;
64 t->props.saddr.a4 = x->props.saddr.a4; 64 t->props.saddr.a4 = x->props.saddr.a4;
65 t->props.flags = x->props.flags; 65 t->props.flags = x->props.flags;
66 memcpy(&t->mark, &x->mark, sizeof(t->mark));
66 67
67 if (xfrm_init_state(t)) 68 if (xfrm_init_state(t))
68 goto error; 69 goto error;
@@ -87,8 +88,9 @@ static int ipcomp_tunnel_attach(struct xfrm_state *x)
87 struct net *net = xs_net(x); 88 struct net *net = xs_net(x);
88 int err = 0; 89 int err = 0;
89 struct xfrm_state *t; 90 struct xfrm_state *t;
91 u32 mark = x->mark.v & x->mark.m;
90 92
91 t = xfrm_state_lookup(net, (xfrm_address_t *)&x->id.daddr.a4, 93 t = xfrm_state_lookup(net, mark, (xfrm_address_t *)&x->id.daddr.a4,
92 x->props.saddr.a4, IPPROTO_IPIP, AF_INET); 94 x->props.saddr.a4, IPPROTO_IPIP, AF_INET);
93 if (!t) { 95 if (!t) {
94 t = ipcomp_tunnel_create(x); 96 t = ipcomp_tunnel_create(x);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 7e3712ce3994..c1bc074f61b7 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -576,6 +576,20 @@ static struct ctl_table ipv4_table[] = {
576 .proc_handler = proc_dointvec 576 .proc_handler = proc_dointvec
577 }, 577 },
578 { 578 {
579 .procname = "tcp_thin_linear_timeouts",
580 .data = &sysctl_tcp_thin_linear_timeouts,
581 .maxlen = sizeof(int),
582 .mode = 0644,
583 .proc_handler = proc_dointvec
584 },
585 {
586 .procname = "tcp_thin_dupack",
587 .data = &sysctl_tcp_thin_dupack,
588 .maxlen = sizeof(int),
589 .mode = 0644,
590 .proc_handler = proc_dointvec
591 },
592 {
579 .procname = "udp_mem", 593 .procname = "udp_mem",
580 .data = &sysctl_udp_mem, 594 .data = &sysctl_udp_mem,
581 .maxlen = sizeof(sysctl_udp_mem), 595 .maxlen = sizeof(sysctl_udp_mem),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e471d037fcc9..5901010fad55 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2229,6 +2229,20 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2229 } 2229 }
2230 break; 2230 break;
2231 2231
2232 case TCP_THIN_LINEAR_TIMEOUTS:
2233 if (val < 0 || val > 1)
2234 err = -EINVAL;
2235 else
2236 tp->thin_lto = val;
2237 break;
2238
2239 case TCP_THIN_DUPACK:
2240 if (val < 0 || val > 1)
2241 err = -EINVAL;
2242 else
2243 tp->thin_dupack = val;
2244 break;
2245
2232 case TCP_CORK: 2246 case TCP_CORK:
2233 /* When set indicates to always queue non-full frames. 2247 /* When set indicates to always queue non-full frames.
2234 * Later the user clears this option and we transmit 2248 * Later the user clears this option and we transmit
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3fddc69ccccc..788851ca8c5d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -89,6 +89,8 @@ int sysctl_tcp_frto __read_mostly = 2;
89int sysctl_tcp_frto_response __read_mostly; 89int sysctl_tcp_frto_response __read_mostly;
90int sysctl_tcp_nometrics_save __read_mostly; 90int sysctl_tcp_nometrics_save __read_mostly;
91 91
92int sysctl_tcp_thin_dupack __read_mostly;
93
92int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; 94int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
93int sysctl_tcp_abc __read_mostly; 95int sysctl_tcp_abc __read_mostly;
94 96
@@ -2447,6 +2449,16 @@ static int tcp_time_to_recover(struct sock *sk)
2447 return 1; 2449 return 1;
2448 } 2450 }
2449 2451
2452 /* If a thin stream is detected, retransmit after first
2453 * received dupack. Employ only if SACK is supported in order
2454 * to avoid possible corner-case series of spurious retransmissions
2455 * Use only if there are no unsent data.
2456 */
2457 if ((tp->thin_dupack || sysctl_tcp_thin_dupack) &&
2458 tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
2459 tcp_is_sack(tp) && !tcp_send_head(sk))
2460 return 1;
2461
2450 return 0; 2462 return 0;
2451} 2463}
2452 2464
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index de7d1bf9114f..a17629b8912e 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -29,6 +29,7 @@ int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL;
29int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; 29int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
30int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; 30int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
31int sysctl_tcp_orphan_retries __read_mostly; 31int sysctl_tcp_orphan_retries __read_mostly;
32int sysctl_tcp_thin_linear_timeouts __read_mostly;
32 33
33static void tcp_write_timer(unsigned long); 34static void tcp_write_timer(unsigned long);
34static void tcp_delack_timer(unsigned long); 35static void tcp_delack_timer(unsigned long);
@@ -415,7 +416,25 @@ void tcp_retransmit_timer(struct sock *sk)
415 icsk->icsk_retransmits++; 416 icsk->icsk_retransmits++;
416 417
417out_reset_timer: 418out_reset_timer:
418 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); 419 /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
420 * used to reset timer, set to 0. Recalculate 'icsk_rto' as this
421 * might be increased if the stream oscillates between thin and thick,
422 * thus the old value might already be too high compared to the value
423 * set by 'tcp_set_rto' in tcp_input.c which resets the rto without
424 * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating
425 * exponential backoff behaviour to avoid continue hammering
426 * linear-timeout retransmissions into a black hole
427 */
428 if (sk->sk_state == TCP_ESTABLISHED &&
429 (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) &&
430 tcp_stream_is_thin(tp) &&
431 icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
432 icsk->icsk_backoff = 0;
433 icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
434 } else {
435 /* Use normal (exponential) backoff */
436 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
437 }
419 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); 438 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
420 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) 439 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1))
421 __sk_dst_reset(sk); 440 __sk_dst_reset(sk);