aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
authorJon Maxwell <jmaxwell37@gmail.com>2018-05-10 02:53:51 -0400
committerDavid S. Miller <davem@davemloft.net>2018-05-10 17:44:52 -0400
commit00483690552c5fb6aa30bf3acb75b0ee89b4c0fd (patch)
tree6946a9497933df7fe6ae0f54729ee84b04e1ca1e /net/ipv4/tcp_ipv4.c
parent03bdfc001c951cb04ad3d28aecee4ec0e18e9664 (diff)
tcp: Add mark for TIMEWAIT sockets
This version has some suggestions by Eric Dumazet: - Use a local variable for the mark in IPv6 instead of ctl_sk to avoid SMP races. - Use the more elegant "IP4_REPLY_MARK(net, skb->mark) ?: sk->sk_mark" statement. - Factorize code as sk_fullsock() check is not necessary. Aidan McGurn from Openwave Mobility systems reported the following bug: "Marked routing is broken on customer deployment. Its effects are large increase in Uplink retransmissions caused by the client never receiving the final ACK to their FINACK - this ACK misses the mark and routes out of the incorrect route." Currently marks are added to sk_buffs for replies when the "fwmark_reflect" sysctl is enabled. But not for TW sockets that had sk->sk_mark set via setsockopt(SO_MARK..). Fix this in IPv4/v6 by adding tw->tw_mark for TIME_WAIT sockets. Copy the the original sk->sk_mark in __inet_twsk_hashdance() to the new tw->tw_mark location. Then progate this so that the skb gets sent with the correct mark. Do the same for resets. Give the "fwmark_reflect" sysctl precedence over sk->sk_mark so that netfilter rules are still honored. Signed-off-by: Jon Maxwell <jmaxwell37@gmail.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c16
1 files changed, 14 insertions, 2 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f70586b50838..caf23de88f8a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -621,6 +621,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
621 struct sock *sk1 = NULL; 621 struct sock *sk1 = NULL;
622#endif 622#endif
623 struct net *net; 623 struct net *net;
624 struct sock *ctl_sk;
624 625
625 /* Never send a reset in response to a reset. */ 626 /* Never send a reset in response to a reset. */
626 if (th->rst) 627 if (th->rst)
@@ -723,11 +724,16 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
723 arg.tos = ip_hdr(skb)->tos; 724 arg.tos = ip_hdr(skb)->tos;
724 arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 725 arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
725 local_bh_disable(); 726 local_bh_disable();
726 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), 727 ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
728 if (sk)
729 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
730 inet_twsk(sk)->tw_mark : sk->sk_mark;
731 ip_send_unicast_reply(ctl_sk,
727 skb, &TCP_SKB_CB(skb)->header.h4.opt, 732 skb, &TCP_SKB_CB(skb)->header.h4.opt,
728 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 733 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
729 &arg, arg.iov[0].iov_len); 734 &arg, arg.iov[0].iov_len);
730 735
736 ctl_sk->sk_mark = 0;
731 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 737 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
732 __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 738 __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
733 local_bh_enable(); 739 local_bh_enable();
@@ -759,6 +765,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
759 } rep; 765 } rep;
760 struct net *net = sock_net(sk); 766 struct net *net = sock_net(sk);
761 struct ip_reply_arg arg; 767 struct ip_reply_arg arg;
768 struct sock *ctl_sk;
762 769
763 memset(&rep.th, 0, sizeof(struct tcphdr)); 770 memset(&rep.th, 0, sizeof(struct tcphdr));
764 memset(&arg, 0, sizeof(arg)); 771 memset(&arg, 0, sizeof(arg));
@@ -809,11 +816,16 @@ static void tcp_v4_send_ack(const struct sock *sk,
809 arg.tos = tos; 816 arg.tos = tos;
810 arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); 817 arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
811 local_bh_disable(); 818 local_bh_disable();
812 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), 819 ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
820 if (sk)
821 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
822 inet_twsk(sk)->tw_mark : sk->sk_mark;
823 ip_send_unicast_reply(ctl_sk,
813 skb, &TCP_SKB_CB(skb)->header.h4.opt, 824 skb, &TCP_SKB_CB(skb)->header.h4.opt,
814 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 825 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
815 &arg, arg.iov[0].iov_len); 826 &arg, arg.iov[0].iov_len);
816 827
828 ctl_sk->sk_mark = 0;
817 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 829 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
818 local_bh_enable(); 830 local_bh_enable();
819} 831}