diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/ah4.c | 2 | ||||
-rw-r--r-- | net/ipv4/devinet.c | 7 | ||||
-rw-r--r-- | net/ipv4/esp4.c | 2 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipcomp.c | 6 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 14 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 14 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 12 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 21 |
9 files changed, 73 insertions, 7 deletions
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 7ed3e4ae93ae..987b47dc69ad 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c | |||
@@ -393,7 +393,7 @@ static void ah4_err(struct sk_buff *skb, u32 info) | |||
393 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) | 393 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) |
394 | return; | 394 | return; |
395 | 395 | ||
396 | x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); | 396 | x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); |
397 | if (!x) | 397 | if (!x) |
398 | return; | 398 | return; |
399 | printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", | 399 | printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 014982b61564..51ca946e3392 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -1317,14 +1317,19 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, | |||
1317 | { | 1317 | { |
1318 | int *valp = ctl->data; | 1318 | int *valp = ctl->data; |
1319 | int val = *valp; | 1319 | int val = *valp; |
1320 | loff_t pos = *ppos; | ||
1320 | int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); | 1321 | int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); |
1321 | 1322 | ||
1322 | if (write && *valp != val) { | 1323 | if (write && *valp != val) { |
1323 | struct net *net = ctl->extra2; | 1324 | struct net *net = ctl->extra2; |
1324 | 1325 | ||
1325 | if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { | 1326 | if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { |
1326 | if (!rtnl_trylock()) | 1327 | if (!rtnl_trylock()) { |
1328 | /* Restore the original values before restarting */ | ||
1329 | *valp = val; | ||
1330 | *ppos = pos; | ||
1327 | return restart_syscall(); | 1331 | return restart_syscall(); |
1332 | } | ||
1328 | if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { | 1333 | if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { |
1329 | inet_forward_change(net); | 1334 | inet_forward_change(net); |
1330 | } else if (*valp) { | 1335 | } else if (*valp) { |
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 1948895beb6d..14ca1f1c3fb0 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c | |||
@@ -422,7 +422,7 @@ static void esp4_err(struct sk_buff *skb, u32 info) | |||
422 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) | 422 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) |
423 | return; | 423 | return; |
424 | 424 | ||
425 | x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); | 425 | x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); |
426 | if (!x) | 426 | if (!x) |
427 | return; | 427 | return; |
428 | NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", | 428 | NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", |
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index a2a5983dbf03..c0c5274d0271 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -793,7 +793,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
793 | } | 793 | } |
794 | 794 | ||
795 | if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { | 795 | if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { |
796 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); | 796 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
797 | ip_rt_put(rt); | 797 | ip_rt_put(rt); |
798 | goto tx_error; | 798 | goto tx_error; |
799 | } | 799 | } |
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 83ed71500898..629067571f02 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c | |||
@@ -36,7 +36,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) | |||
36 | return; | 36 | return; |
37 | 37 | ||
38 | spi = htonl(ntohs(ipch->cpi)); | 38 | spi = htonl(ntohs(ipch->cpi)); |
39 | x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, | 39 | x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, |
40 | spi, IPPROTO_COMP, AF_INET); | 40 | spi, IPPROTO_COMP, AF_INET); |
41 | if (!x) | 41 | if (!x) |
42 | return; | 42 | return; |
@@ -63,6 +63,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) | |||
63 | t->props.mode = x->props.mode; | 63 | t->props.mode = x->props.mode; |
64 | t->props.saddr.a4 = x->props.saddr.a4; | 64 | t->props.saddr.a4 = x->props.saddr.a4; |
65 | t->props.flags = x->props.flags; | 65 | t->props.flags = x->props.flags; |
66 | memcpy(&t->mark, &x->mark, sizeof(t->mark)); | ||
66 | 67 | ||
67 | if (xfrm_init_state(t)) | 68 | if (xfrm_init_state(t)) |
68 | goto error; | 69 | goto error; |
@@ -87,8 +88,9 @@ static int ipcomp_tunnel_attach(struct xfrm_state *x) | |||
87 | struct net *net = xs_net(x); | 88 | struct net *net = xs_net(x); |
88 | int err = 0; | 89 | int err = 0; |
89 | struct xfrm_state *t; | 90 | struct xfrm_state *t; |
91 | u32 mark = x->mark.v & x->mark.m; | ||
90 | 92 | ||
91 | t = xfrm_state_lookup(net, (xfrm_address_t *)&x->id.daddr.a4, | 93 | t = xfrm_state_lookup(net, mark, (xfrm_address_t *)&x->id.daddr.a4, |
92 | x->props.saddr.a4, IPPROTO_IPIP, AF_INET); | 94 | x->props.saddr.a4, IPPROTO_IPIP, AF_INET); |
93 | if (!t) { | 95 | if (!t) { |
94 | t = ipcomp_tunnel_create(x); | 96 | t = ipcomp_tunnel_create(x); |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 7e3712ce3994..c1bc074f61b7 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -576,6 +576,20 @@ static struct ctl_table ipv4_table[] = { | |||
576 | .proc_handler = proc_dointvec | 576 | .proc_handler = proc_dointvec |
577 | }, | 577 | }, |
578 | { | 578 | { |
579 | .procname = "tcp_thin_linear_timeouts", | ||
580 | .data = &sysctl_tcp_thin_linear_timeouts, | ||
581 | .maxlen = sizeof(int), | ||
582 | .mode = 0644, | ||
583 | .proc_handler = proc_dointvec | ||
584 | }, | ||
585 | { | ||
586 | .procname = "tcp_thin_dupack", | ||
587 | .data = &sysctl_tcp_thin_dupack, | ||
588 | .maxlen = sizeof(int), | ||
589 | .mode = 0644, | ||
590 | .proc_handler = proc_dointvec | ||
591 | }, | ||
592 | { | ||
579 | .procname = "udp_mem", | 593 | .procname = "udp_mem", |
580 | .data = &sysctl_udp_mem, | 594 | .data = &sysctl_udp_mem, |
581 | .maxlen = sizeof(sysctl_udp_mem), | 595 | .maxlen = sizeof(sysctl_udp_mem), |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e471d037fcc9..5901010fad55 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -2229,6 +2229,20 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2229 | } | 2229 | } |
2230 | break; | 2230 | break; |
2231 | 2231 | ||
2232 | case TCP_THIN_LINEAR_TIMEOUTS: | ||
2233 | if (val < 0 || val > 1) | ||
2234 | err = -EINVAL; | ||
2235 | else | ||
2236 | tp->thin_lto = val; | ||
2237 | break; | ||
2238 | |||
2239 | case TCP_THIN_DUPACK: | ||
2240 | if (val < 0 || val > 1) | ||
2241 | err = -EINVAL; | ||
2242 | else | ||
2243 | tp->thin_dupack = val; | ||
2244 | break; | ||
2245 | |||
2232 | case TCP_CORK: | 2246 | case TCP_CORK: |
2233 | /* When set indicates to always queue non-full frames. | 2247 | /* When set indicates to always queue non-full frames. |
2234 | * Later the user clears this option and we transmit | 2248 | * Later the user clears this option and we transmit |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3fddc69ccccc..788851ca8c5d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -89,6 +89,8 @@ int sysctl_tcp_frto __read_mostly = 2; | |||
89 | int sysctl_tcp_frto_response __read_mostly; | 89 | int sysctl_tcp_frto_response __read_mostly; |
90 | int sysctl_tcp_nometrics_save __read_mostly; | 90 | int sysctl_tcp_nometrics_save __read_mostly; |
91 | 91 | ||
92 | int sysctl_tcp_thin_dupack __read_mostly; | ||
93 | |||
92 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; | 94 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; |
93 | int sysctl_tcp_abc __read_mostly; | 95 | int sysctl_tcp_abc __read_mostly; |
94 | 96 | ||
@@ -2447,6 +2449,16 @@ static int tcp_time_to_recover(struct sock *sk) | |||
2447 | return 1; | 2449 | return 1; |
2448 | } | 2450 | } |
2449 | 2451 | ||
2452 | /* If a thin stream is detected, retransmit after first | ||
2453 | * received dupack. Employ only if SACK is supported in order | ||
2454 | * to avoid possible corner-case series of spurious retransmissions | ||
2455 | * Use only if there are no unsent data. | ||
2456 | */ | ||
2457 | if ((tp->thin_dupack || sysctl_tcp_thin_dupack) && | ||
2458 | tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 && | ||
2459 | tcp_is_sack(tp) && !tcp_send_head(sk)) | ||
2460 | return 1; | ||
2461 | |||
2450 | return 0; | 2462 | return 0; |
2451 | } | 2463 | } |
2452 | 2464 | ||
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index de7d1bf9114f..a17629b8912e 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -29,6 +29,7 @@ int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; | |||
29 | int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; | 29 | int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; |
30 | int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; | 30 | int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; |
31 | int sysctl_tcp_orphan_retries __read_mostly; | 31 | int sysctl_tcp_orphan_retries __read_mostly; |
32 | int sysctl_tcp_thin_linear_timeouts __read_mostly; | ||
32 | 33 | ||
33 | static void tcp_write_timer(unsigned long); | 34 | static void tcp_write_timer(unsigned long); |
34 | static void tcp_delack_timer(unsigned long); | 35 | static void tcp_delack_timer(unsigned long); |
@@ -415,7 +416,25 @@ void tcp_retransmit_timer(struct sock *sk) | |||
415 | icsk->icsk_retransmits++; | 416 | icsk->icsk_retransmits++; |
416 | 417 | ||
417 | out_reset_timer: | 418 | out_reset_timer: |
418 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); | 419 | /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is |
420 | * used to reset timer, set to 0. Recalculate 'icsk_rto' as this | ||
421 | * might be increased if the stream oscillates between thin and thick, | ||
422 | * thus the old value might already be too high compared to the value | ||
423 | * set by 'tcp_set_rto' in tcp_input.c which resets the rto without | ||
424 | * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating | ||
425 | * exponential backoff behaviour to avoid continue hammering | ||
426 | * linear-timeout retransmissions into a black hole | ||
427 | */ | ||
428 | if (sk->sk_state == TCP_ESTABLISHED && | ||
429 | (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) && | ||
430 | tcp_stream_is_thin(tp) && | ||
431 | icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { | ||
432 | icsk->icsk_backoff = 0; | ||
433 | icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX); | ||
434 | } else { | ||
435 | /* Use normal (exponential) backoff */ | ||
436 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); | ||
437 | } | ||
419 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); | 438 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); |
420 | if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) | 439 | if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) |
421 | __sk_dst_reset(sk); | 440 | __sk_dst_reset(sk); |