diff options
author | Jon Maxwell <jmaxwell37@gmail.com> | 2018-05-10 02:53:51 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-05-10 17:44:52 -0400 |
commit | 00483690552c5fb6aa30bf3acb75b0ee89b4c0fd (patch) | |
tree | 6946a9497933df7fe6ae0f54729ee84b04e1ca1e /net/ipv4/tcp_ipv4.c | |
parent | 03bdfc001c951cb04ad3d28aecee4ec0e18e9664 (diff) |
tcp: Add mark for TIMEWAIT sockets
This version has some suggestions by Eric Dumazet:
- Use a local variable for the mark in IPv6 instead of ctl_sk to avoid SMP
races.
- Use the more elegant "IP4_REPLY_MARK(net, skb->mark) ?: sk->sk_mark"
statement.
- Factorize code as sk_fullsock() check is not necessary.
Aidan McGurn from Openwave Mobility systems reported the following bug:
"Marked routing is broken on customer deployment. Its effects are large
increase in Uplink retransmissions caused by the client never receiving
the final ACK to their FINACK - this ACK misses the mark and routes out
of the incorrect route."
Currently marks are added to sk_buffs for replies when the "fwmark_reflect"
sysctl is enabled. But not for TW sockets that had sk->sk_mark set via
setsockopt(SO_MARK..).
Fix this in IPv4/v6 by adding tw->tw_mark for TIME_WAIT sockets. Copy the the
original sk->sk_mark in __inet_twsk_hashdance() to the new tw->tw_mark location.
Then progate this so that the skb gets sent with the correct mark. Do the same
for resets. Give the "fwmark_reflect" sysctl precedence over sk->sk_mark so that
netfilter rules are still honored.
Signed-off-by: Jon Maxwell <jmaxwell37@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 16 |
1 files changed, 14 insertions, 2 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f70586b50838..caf23de88f8a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -621,6 +621,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) | |||
621 | struct sock *sk1 = NULL; | 621 | struct sock *sk1 = NULL; |
622 | #endif | 622 | #endif |
623 | struct net *net; | 623 | struct net *net; |
624 | struct sock *ctl_sk; | ||
624 | 625 | ||
625 | /* Never send a reset in response to a reset. */ | 626 | /* Never send a reset in response to a reset. */ |
626 | if (th->rst) | 627 | if (th->rst) |
@@ -723,11 +724,16 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) | |||
723 | arg.tos = ip_hdr(skb)->tos; | 724 | arg.tos = ip_hdr(skb)->tos; |
724 | arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); | 725 | arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); |
725 | local_bh_disable(); | 726 | local_bh_disable(); |
726 | ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), | 727 | ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk); |
728 | if (sk) | ||
729 | ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? | ||
730 | inet_twsk(sk)->tw_mark : sk->sk_mark; | ||
731 | ip_send_unicast_reply(ctl_sk, | ||
727 | skb, &TCP_SKB_CB(skb)->header.h4.opt, | 732 | skb, &TCP_SKB_CB(skb)->header.h4.opt, |
728 | ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, | 733 | ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, |
729 | &arg, arg.iov[0].iov_len); | 734 | &arg, arg.iov[0].iov_len); |
730 | 735 | ||
736 | ctl_sk->sk_mark = 0; | ||
731 | __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); | 737 | __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); |
732 | __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); | 738 | __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); |
733 | local_bh_enable(); | 739 | local_bh_enable(); |
@@ -759,6 +765,7 @@ static void tcp_v4_send_ack(const struct sock *sk, | |||
759 | } rep; | 765 | } rep; |
760 | struct net *net = sock_net(sk); | 766 | struct net *net = sock_net(sk); |
761 | struct ip_reply_arg arg; | 767 | struct ip_reply_arg arg; |
768 | struct sock *ctl_sk; | ||
762 | 769 | ||
763 | memset(&rep.th, 0, sizeof(struct tcphdr)); | 770 | memset(&rep.th, 0, sizeof(struct tcphdr)); |
764 | memset(&arg, 0, sizeof(arg)); | 771 | memset(&arg, 0, sizeof(arg)); |
@@ -809,11 +816,16 @@ static void tcp_v4_send_ack(const struct sock *sk, | |||
809 | arg.tos = tos; | 816 | arg.tos = tos; |
810 | arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); | 817 | arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); |
811 | local_bh_disable(); | 818 | local_bh_disable(); |
812 | ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), | 819 | ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk); |
820 | if (sk) | ||
821 | ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? | ||
822 | inet_twsk(sk)->tw_mark : sk->sk_mark; | ||
823 | ip_send_unicast_reply(ctl_sk, | ||
813 | skb, &TCP_SKB_CB(skb)->header.h4.opt, | 824 | skb, &TCP_SKB_CB(skb)->header.h4.opt, |
814 | ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, | 825 | ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, |
815 | &arg, arg.iov[0].iov_len); | 826 | &arg, arg.iov[0].iov_len); |
816 | 827 | ||
828 | ctl_sk->sk_mark = 0; | ||
817 | __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); | 829 | __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); |
818 | local_bh_enable(); | 830 | local_bh_enable(); |
819 | } | 831 | } |