aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2013-02-13 13:22:24 -0500
committerDavid S. Miller <davem@davemloft.net>2013-02-13 13:22:24 -0500
commitb8fa4100350432504df438014e2e5e9c1bbb6325 (patch)
tree0fc416746eeca68bbba228958636dd919fc3785f
parentd0023f820e003857248d14f2213ac3930283f16c (diff)
parentee684b6f2830047d19877e5547989740f18b1a5d (diff)
Merge branch 'tcp_tsoffset'
Andrey Vagin says: ==================== If a TCP socket will get live-migrated from one box to another the timestamps (which are typically ON) will get screwed up -- the new kernel will generate TS values that has nothing to do with what they were on dump. The solution is to yet again fix the kernel and put a "timestamp offset" on a socket. A socket offset is added in places where externally visible tcp timestamp option is parsed/initialized. Connections in the SYN_RECV state are not supported, global tcp_time_stamp is used for them, because repair mode doesn't support this state. In a future it can be implemented by the similar way as for TIME_WAIT sockets. For time-wait sockets offset is inhereted by a proper tcp_sock. A per-socket offset can be set only for sockets in repair mode. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/tcp.h3
-rw-r--r--include/uapi/linux/tcp.h1
-rw-r--r--net/ipv4/tcp.c11
-rw-r--r--net/ipv4/tcp_input.c8
-rw-r--r--net/ipv4/tcp_ipv4.c12
-rw-r--r--net/ipv4/tcp_minisocks.c3
-rw-r--r--net/ipv4/tcp_output.c4
-rw-r--r--net/ipv6/tcp_ipv6.c22
8 files changed, 47 insertions, 17 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 6d0d46138ae8..f28408c07dc2 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -162,6 +162,8 @@ struct tcp_sock {
162 u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ 162 u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
163 u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ 163 u32 lsndtime; /* timestamp of last sent data packet (for restart window) */
164 164
165 u32 tsoffset; /* timestamp offset */
166
165 struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ 167 struct list_head tsq_node; /* anchor in tsq_tasklet.head list */
166 unsigned long tsq_flags; 168 unsigned long tsq_flags;
167 169
@@ -353,6 +355,7 @@ struct tcp_timewait_sock {
353 u32 tw_rcv_nxt; 355 u32 tw_rcv_nxt;
354 u32 tw_snd_nxt; 356 u32 tw_snd_nxt;
355 u32 tw_rcv_wnd; 357 u32 tw_rcv_wnd;
358 u32 tw_ts_offset;
356 u32 tw_ts_recent; 359 u32 tw_ts_recent;
357 long tw_ts_recent_stamp; 360 long tw_ts_recent_stamp;
358#ifdef CONFIG_TCP_MD5SIG 361#ifdef CONFIG_TCP_MD5SIG
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index e962faa5ab0d..6b1ead0b0c9d 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -111,6 +111,7 @@ enum {
111#define TCP_QUEUE_SEQ 21 111#define TCP_QUEUE_SEQ 21
112#define TCP_REPAIR_OPTIONS 22 112#define TCP_REPAIR_OPTIONS 22
113#define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */ 113#define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */
114#define TCP_TIMESTAMP 24
114 115
115struct tcp_repair_opt { 116struct tcp_repair_opt {
116 __u32 opt_code; 117 __u32 opt_code;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2c7e5963c2ea..801b07b796f0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -400,6 +400,8 @@ void tcp_init_sock(struct sock *sk)
400 tcp_enable_early_retrans(tp); 400 tcp_enable_early_retrans(tp);
401 icsk->icsk_ca_ops = &tcp_init_congestion_ops; 401 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
402 402
403 tp->tsoffset = 0;
404
403 sk->sk_state = TCP_CLOSE; 405 sk->sk_state = TCP_CLOSE;
404 406
405 sk->sk_write_space = sk_stream_write_space; 407 sk->sk_write_space = sk_stream_write_space;
@@ -2712,6 +2714,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2712 else 2714 else
2713 err = -EINVAL; 2715 err = -EINVAL;
2714 break; 2716 break;
2717 case TCP_TIMESTAMP:
2718 if (!tp->repair)
2719 err = -EPERM;
2720 else
2721 tp->tsoffset = val - tcp_time_stamp;
2722 break;
2715 default: 2723 default:
2716 err = -ENOPROTOOPT; 2724 err = -ENOPROTOOPT;
2717 break; 2725 break;
@@ -2960,6 +2968,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2960 case TCP_USER_TIMEOUT: 2968 case TCP_USER_TIMEOUT:
2961 val = jiffies_to_msecs(icsk->icsk_user_timeout); 2969 val = jiffies_to_msecs(icsk->icsk_user_timeout);
2962 break; 2970 break;
2971 case TCP_TIMESTAMP:
2972 val = tcp_time_stamp + tp->tsoffset;
2973 break;
2963 default: 2974 default:
2964 return -ENOPROTOOPT; 2975 return -ENOPROTOOPT;
2965 } 2976 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ea678b62e94f..d9bfaea34322 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3860,7 +3860,7 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr
3860 ++ptr; 3860 ++ptr;
3861 tp->rx_opt.rcv_tsval = ntohl(*ptr); 3861 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3862 ++ptr; 3862 ++ptr;
3863 tp->rx_opt.rcv_tsecr = ntohl(*ptr); 3863 tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
3864 return true; 3864 return true;
3865 } 3865 }
3866 return false; 3866 return false;
@@ -3884,7 +3884,11 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb,
3884 if (tcp_parse_aligned_timestamp(tp, th)) 3884 if (tcp_parse_aligned_timestamp(tp, th))
3885 return true; 3885 return true;
3886 } 3886 }
3887
3887 tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); 3888 tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL);
3889 if (tp->rx_opt.saw_tstamp)
3890 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
3891
3888 return true; 3892 return true;
3889} 3893}
3890 3894
@@ -5665,6 +5669,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5665 int saved_clamp = tp->rx_opt.mss_clamp; 5669 int saved_clamp = tp->rx_opt.mss_clamp;
5666 5670
5667 tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc); 5671 tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc);
5672 if (tp->rx_opt.saw_tstamp)
5673 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
5668 5674
5669 if (th->ack) { 5675 if (th->ack) {
5670 /* rfc793: 5676 /* rfc793:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0eaf685bddc9..77f5050efc8d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -726,7 +726,7 @@ release_sk1:
726 */ 726 */
727 727
728static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, 728static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
729 u32 win, u32 ts, int oif, 729 u32 win, u32 tsval, u32 tsecr, int oif,
730 struct tcp_md5sig_key *key, 730 struct tcp_md5sig_key *key,
731 int reply_flags, u8 tos) 731 int reply_flags, u8 tos)
732{ 732{
@@ -747,12 +747,12 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
747 747
748 arg.iov[0].iov_base = (unsigned char *)&rep; 748 arg.iov[0].iov_base = (unsigned char *)&rep;
749 arg.iov[0].iov_len = sizeof(rep.th); 749 arg.iov[0].iov_len = sizeof(rep.th);
750 if (ts) { 750 if (tsecr) {
751 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 751 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
752 (TCPOPT_TIMESTAMP << 8) | 752 (TCPOPT_TIMESTAMP << 8) |
753 TCPOLEN_TIMESTAMP); 753 TCPOLEN_TIMESTAMP);
754 rep.opt[1] = htonl(tcp_time_stamp); 754 rep.opt[1] = htonl(tsval);
755 rep.opt[2] = htonl(ts); 755 rep.opt[2] = htonl(tsecr);
756 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED; 756 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
757 } 757 }
758 758
@@ -767,7 +767,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
767 767
768#ifdef CONFIG_TCP_MD5SIG 768#ifdef CONFIG_TCP_MD5SIG
769 if (key) { 769 if (key) {
770 int offset = (ts) ? 3 : 0; 770 int offset = (tsecr) ? 3 : 0;
771 771
772 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | 772 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
773 (TCPOPT_NOP << 16) | 773 (TCPOPT_NOP << 16) |
@@ -802,6 +802,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
802 802
803 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 803 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
804 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 804 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
805 tcp_time_stamp + tcptw->tw_ts_offset,
805 tcptw->tw_ts_recent, 806 tcptw->tw_ts_recent,
806 tw->tw_bound_dev_if, 807 tw->tw_bound_dev_if,
807 tcp_twsk_md5_key(tcptw), 808 tcp_twsk_md5_key(tcptw),
@@ -821,6 +822,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
821 tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? 822 tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
822 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 823 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
823 tcp_rsk(req)->rcv_nxt, req->rcv_wnd, 824 tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
825 tcp_time_stamp,
824 req->ts_recent, 826 req->ts_recent,
825 0, 827 0,
826 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, 828 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f0409287b5f4..b83a49cc3816 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -102,6 +102,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
102 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); 102 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
103 103
104 if (tmp_opt.saw_tstamp) { 104 if (tmp_opt.saw_tstamp) {
105 tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset;
105 tmp_opt.ts_recent = tcptw->tw_ts_recent; 106 tmp_opt.ts_recent = tcptw->tw_ts_recent;
106 tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; 107 tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
107 paws_reject = tcp_paws_reject(&tmp_opt, th->rst); 108 paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
@@ -288,6 +289,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
288 tcptw->tw_rcv_wnd = tcp_receive_window(tp); 289 tcptw->tw_rcv_wnd = tcp_receive_window(tp);
289 tcptw->tw_ts_recent = tp->rx_opt.ts_recent; 290 tcptw->tw_ts_recent = tp->rx_opt.ts_recent;
290 tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; 291 tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
292 tcptw->tw_ts_offset = tp->tsoffset;
291 293
292#if IS_ENABLED(CONFIG_IPV6) 294#if IS_ENABLED(CONFIG_IPV6)
293 if (tw->tw_family == PF_INET6) { 295 if (tw->tw_family == PF_INET6) {
@@ -499,6 +501,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
499 newtp->rx_opt.ts_recent_stamp = 0; 501 newtp->rx_opt.ts_recent_stamp = 0;
500 newtp->tcp_header_len = sizeof(struct tcphdr); 502 newtp->tcp_header_len = sizeof(struct tcphdr);
501 } 503 }
504 newtp->tsoffset = 0;
502#ifdef CONFIG_TCP_MD5SIG 505#ifdef CONFIG_TCP_MD5SIG
503 newtp->md5sig_info = NULL; /*XXX*/ 506 newtp->md5sig_info = NULL; /*XXX*/
504 if (newtp->af_specific->md5_lookup(sk, newsk)) 507 if (newtp->af_specific->md5_lookup(sk, newsk))
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 367e2ec01da1..564bf89d9fd3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -622,7 +622,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
622 622
623 if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { 623 if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
624 opts->options |= OPTION_TS; 624 opts->options |= OPTION_TS;
625 opts->tsval = TCP_SKB_CB(skb)->when; 625 opts->tsval = TCP_SKB_CB(skb)->when + tp->tsoffset;
626 opts->tsecr = tp->rx_opt.ts_recent; 626 opts->tsecr = tp->rx_opt.ts_recent;
627 remaining -= TCPOLEN_TSTAMP_ALIGNED; 627 remaining -= TCPOLEN_TSTAMP_ALIGNED;
628 } 628 }
@@ -806,7 +806,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
806 806
807 if (likely(tp->rx_opt.tstamp_ok)) { 807 if (likely(tp->rx_opt.tstamp_ok)) {
808 opts->options |= OPTION_TS; 808 opts->options |= OPTION_TS;
809 opts->tsval = tcb ? tcb->when : 0; 809 opts->tsval = tcb ? tcb->when + tp->tsoffset : 0;
810 opts->tsecr = tp->rx_opt.ts_recent; 810 opts->tsecr = tp->rx_opt.ts_recent;
811 size += TCPOLEN_TSTAMP_ALIGNED; 811 size += TCPOLEN_TSTAMP_ALIGNED;
812 } 812 }
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index bbb28ae7e5f3..9b6460055df5 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -713,7 +713,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
713#endif 713#endif
714 714
715static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, 715static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
716 u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass) 716 u32 tsval, u32 tsecr,
717 struct tcp_md5sig_key *key, int rst, u8 tclass)
717{ 718{
718 const struct tcphdr *th = tcp_hdr(skb); 719 const struct tcphdr *th = tcp_hdr(skb);
719 struct tcphdr *t1; 720 struct tcphdr *t1;
@@ -725,7 +726,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
725 struct dst_entry *dst; 726 struct dst_entry *dst;
726 __be32 *topt; 727 __be32 *topt;
727 728
728 if (ts) 729 if (tsecr)
729 tot_len += TCPOLEN_TSTAMP_ALIGNED; 730 tot_len += TCPOLEN_TSTAMP_ALIGNED;
730#ifdef CONFIG_TCP_MD5SIG 731#ifdef CONFIG_TCP_MD5SIG
731 if (key) 732 if (key)
@@ -755,11 +756,11 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
755 756
756 topt = (__be32 *)(t1 + 1); 757 topt = (__be32 *)(t1 + 1);
757 758
758 if (ts) { 759 if (tsecr) {
759 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 760 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
760 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 761 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
761 *topt++ = htonl(tcp_time_stamp); 762 *topt++ = htonl(tsval);
762 *topt++ = htonl(ts); 763 *topt++ = htonl(tsecr);
763 } 764 }
764 765
765#ifdef CONFIG_TCP_MD5SIG 766#ifdef CONFIG_TCP_MD5SIG
@@ -860,7 +861,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
860 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 861 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
861 (th->doff << 2); 862 (th->doff << 2);
862 863
863 tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0); 864 tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0);
864 865
865#ifdef CONFIG_TCP_MD5SIG 866#ifdef CONFIG_TCP_MD5SIG
866release_sk1: 867release_sk1:
@@ -871,10 +872,11 @@ release_sk1:
871#endif 872#endif
872} 873}
873 874
874static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, 875static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
876 u32 win, u32 tsval, u32 tsecr,
875 struct tcp_md5sig_key *key, u8 tclass) 877 struct tcp_md5sig_key *key, u8 tclass)
876{ 878{
877 tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass); 879 tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass);
878} 880}
879 881
880static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) 882static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -884,6 +886,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
884 886
885 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 887 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
886 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 888 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
889 tcp_time_stamp + tcptw->tw_ts_offset,
887 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw), 890 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
888 tw->tw_tclass); 891 tw->tw_tclass);
889 892
@@ -893,7 +896,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
893static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, 896static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
894 struct request_sock *req) 897 struct request_sock *req)
895{ 898{
896 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent, 899 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1,
900 req->rcv_wnd, tcp_time_stamp, req->ts_recent,
897 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0); 901 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0);
898} 902}
899 903