aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2011-10-24 03:06:21 -0400
committerDavid S. Miller <davem@davemloft.net>2011-10-24 03:06:21 -0400
commit66b13d99d96a1a69f47a6bc3dc47f45955967377 (patch)
treec566bee026b5e9703f789d27657aefa7c70c587c
parent318cf7aaa0a6d20ecf6be33eb771291e5ff2e3b9 (diff)
ipv4: tcp: fix TOS value in ACK messages sent from TIME_WAIT
There is a long standing bug in linux tcp stack, about ACK messages sent on behalf of TIME_WAIT sockets. In the IP header of the ACK message, we choose to reflect TOS field of incoming message, and this might break some setups. Example of things that were broken : - Routing using TOS as a selector - Firewalls - Trafic classification / shaping We now remember in timewait structure the inet tos field and use it in ACK generation, and route lookup. Notes : - We still reflect incoming TOS in RST messages. - We could extend MuraliRaja Muniraju patch to report TOS value in netlink messages for TIME_WAIT sockets. - A patch is needed for IPv6 Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/inet_timewait_sock.h3
-rw-r--r--include/net/ip.h3
-rw-r--r--net/ipv4/inet_timewait_sock.c1
-rw-r--r--net/ipv4/ip_output.c6
-rw-r--r--net/ipv4/tcp_ipv4.c11
5 files changed, 15 insertions, 9 deletions
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index f1a770977c4f..180231c5bbbe 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -126,7 +126,8 @@ struct inet_timewait_sock {
126 /* And these are ours. */ 126 /* And these are ours. */
127 unsigned int tw_ipv6only : 1, 127 unsigned int tw_ipv6only : 1,
128 tw_transparent : 1, 128 tw_transparent : 1,
129 tw_pad : 14, /* 14 bits hole */ 129 tw_pad : 6, /* 6 bits hole */
130 tw_tos : 8,
130 tw_ipv6_offset : 16; 131 tw_ipv6_offset : 16;
131 kmemcheck_bitfield_end(flags); 132 kmemcheck_bitfield_end(flags);
132 unsigned long tw_ttd; 133 unsigned long tw_ttd;
diff --git a/include/net/ip.h b/include/net/ip.h
index c7e066a1c611..eca0ef7a495e 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -165,6 +165,7 @@ struct ip_reply_arg {
165 int csumoffset; /* u16 offset of csum in iov[0].iov_base */ 165 int csumoffset; /* u16 offset of csum in iov[0].iov_base */
166 /* -1 if not needed */ 166 /* -1 if not needed */
167 int bound_dev_if; 167 int bound_dev_if;
168 u8 tos;
168}; 169};
169 170
170#define IP_REPLY_ARG_NOSRCCHECK 1 171#define IP_REPLY_ARG_NOSRCCHECK 1
@@ -175,7 +176,7 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg)
175} 176}
176 177
177void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, 178void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
178 struct ip_reply_arg *arg, unsigned int len); 179 const struct ip_reply_arg *arg, unsigned int len);
179 180
180struct ipv4_config { 181struct ipv4_config {
181 int log_martians; 182 int log_martians;
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 3c8dfa16614d..44d65d546e30 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -183,6 +183,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
183 tw->tw_daddr = inet->inet_daddr; 183 tw->tw_daddr = inet->inet_daddr;
184 tw->tw_rcv_saddr = inet->inet_rcv_saddr; 184 tw->tw_rcv_saddr = inet->inet_rcv_saddr;
185 tw->tw_bound_dev_if = sk->sk_bound_dev_if; 185 tw->tw_bound_dev_if = sk->sk_bound_dev_if;
186 tw->tw_tos = inet->tos;
186 tw->tw_num = inet->inet_num; 187 tw->tw_num = inet->inet_num;
187 tw->tw_state = TCP_TIME_WAIT; 188 tw->tw_state = TCP_TIME_WAIT;
188 tw->tw_substate = state; 189 tw->tw_substate = state;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e1374ab034bb..0bc95f3977d2 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1466,7 +1466,7 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1466 * structure to pass arguments. 1466 * structure to pass arguments.
1467 */ 1467 */
1468void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, 1468void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
1469 struct ip_reply_arg *arg, unsigned int len) 1469 const struct ip_reply_arg *arg, unsigned int len)
1470{ 1470{
1471 struct inet_sock *inet = inet_sk(sk); 1471 struct inet_sock *inet = inet_sk(sk);
1472 struct ip_options_data replyopts; 1472 struct ip_options_data replyopts;
@@ -1489,7 +1489,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
1489 } 1489 }
1490 1490
1491 flowi4_init_output(&fl4, arg->bound_dev_if, 0, 1491 flowi4_init_output(&fl4, arg->bound_dev_if, 0,
1492 RT_TOS(ip_hdr(skb)->tos), 1492 RT_TOS(arg->tos),
1493 RT_SCOPE_UNIVERSE, sk->sk_protocol, 1493 RT_SCOPE_UNIVERSE, sk->sk_protocol,
1494 ip_reply_arg_flowi_flags(arg), 1494 ip_reply_arg_flowi_flags(arg),
1495 daddr, rt->rt_spec_dst, 1495 daddr, rt->rt_spec_dst,
@@ -1506,7 +1506,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
1506 with locally disabled BH and that sk cannot be already spinlocked. 1506 with locally disabled BH and that sk cannot be already spinlocked.
1507 */ 1507 */
1508 bh_lock_sock(sk); 1508 bh_lock_sock(sk);
1509 inet->tos = ip_hdr(skb)->tos; 1509 inet->tos = arg->tos;
1510 sk->sk_priority = skb->priority; 1510 sk->sk_priority = skb->priority;
1511 sk->sk_protocol = ip_hdr(skb)->protocol; 1511 sk->sk_protocol = ip_hdr(skb)->protocol;
1512 sk->sk_bound_dev_if = arg->bound_dev_if; 1512 sk->sk_bound_dev_if = arg->bound_dev_if;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1dad7e92f005..0ea10eefa60f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -652,6 +652,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
652 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; 652 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
653 653
654 net = dev_net(skb_dst(skb)->dev); 654 net = dev_net(skb_dst(skb)->dev);
655 arg.tos = ip_hdr(skb)->tos;
655 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, 656 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
656 &arg, arg.iov[0].iov_len); 657 &arg, arg.iov[0].iov_len);
657 658
@@ -666,7 +667,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
666static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, 667static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
667 u32 win, u32 ts, int oif, 668 u32 win, u32 ts, int oif,
668 struct tcp_md5sig_key *key, 669 struct tcp_md5sig_key *key,
669 int reply_flags) 670 int reply_flags, u8 tos)
670{ 671{
671 const struct tcphdr *th = tcp_hdr(skb); 672 const struct tcphdr *th = tcp_hdr(skb);
672 struct { 673 struct {
@@ -726,7 +727,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
726 arg.csumoffset = offsetof(struct tcphdr, check) / 2; 727 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
727 if (oif) 728 if (oif)
728 arg.bound_dev_if = oif; 729 arg.bound_dev_if = oif;
729 730 arg.tos = tos;
730 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, 731 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
731 &arg, arg.iov[0].iov_len); 732 &arg, arg.iov[0].iov_len);
732 733
@@ -743,7 +744,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
743 tcptw->tw_ts_recent, 744 tcptw->tw_ts_recent,
744 tw->tw_bound_dev_if, 745 tw->tw_bound_dev_if,
745 tcp_twsk_md5_key(tcptw), 746 tcp_twsk_md5_key(tcptw),
746 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0 747 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
748 tw->tw_tos
747 ); 749 );
748 750
749 inet_twsk_put(tw); 751 inet_twsk_put(tw);
@@ -757,7 +759,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
757 req->ts_recent, 759 req->ts_recent,
758 0, 760 0,
759 tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr), 761 tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
760 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0); 762 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
763 ip_hdr(skb)->tos);
761} 764}
762 765
763/* 766/*