diff options
author | Andrey Vagin <avagin@openvz.org> | 2016-06-27 18:33:56 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-06-30 08:15:31 -0400 |
commit | b1ed4c4fa9a5ccf325184fd90edc50978ef6e33a (patch) | |
tree | eb298a85a5f016c52ad375f351bf171eec99152a | |
parent | 641f7e405ed208cfeb6b52145252675b51c43180 (diff) |
tcp: add an ability to dump and restore window parameters
We found that sometimes a restored tcp socket doesn't work.
A reason of this bug is incorrect window parameters and in this case
tcp_acceptable_seq() returns tcp_wnd_end(tp) instead of tp->snd_nxt. The
other side drops packets with this seq, because seq is less than
tp->rcv_nxt ( tcp_sequence() ).
Data from a send queue is sent only if there is enough space in a
window, so when we restore unacked data, we need to expand a window to
fit this data.
This was in a first version of this patch:
"tcp: extend window to fit all restored unacked data in a send queue"
Then Alexey recommended me to restore window parameters instead of
adjusted them according with data in a sent queue. This sounds resonable.
rcv_wnd has to be restored, because it was reported to another side
and the offered window is never shrunk.
One of reasons why we need to restore snd_wnd was described above.
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: James Morris <jmorris@namei.org>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: Andrey Vagin <avagin@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/uapi/linux/tcp.h | 10 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 57 |
2 files changed, 67 insertions, 0 deletions
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 53e8e3fe6b1b..482898fc433a 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h | |||
@@ -115,12 +115,22 @@ enum { | |||
115 | #define TCP_CC_INFO 26 /* Get Congestion Control (optional) info */ | 115 | #define TCP_CC_INFO 26 /* Get Congestion Control (optional) info */ |
116 | #define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */ | 116 | #define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */ |
117 | #define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ | 117 | #define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ |
118 | #define TCP_REPAIR_WINDOW 29 /* Get/set window parameters */ | ||
118 | 119 | ||
119 | struct tcp_repair_opt { | 120 | struct tcp_repair_opt { |
120 | __u32 opt_code; | 121 | __u32 opt_code; |
121 | __u32 opt_val; | 122 | __u32 opt_val; |
122 | }; | 123 | }; |
123 | 124 | ||
125 | struct tcp_repair_window { | ||
126 | __u32 snd_wl1; | ||
127 | __u32 snd_wnd; | ||
128 | __u32 max_window; | ||
129 | |||
130 | __u32 rcv_wnd; | ||
131 | __u32 rcv_wup; | ||
132 | }; | ||
133 | |||
124 | enum { | 134 | enum { |
125 | TCP_NO_QUEUE, | 135 | TCP_NO_QUEUE, |
126 | TCP_RECV_QUEUE, | 136 | TCP_RECV_QUEUE, |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5c7ed147449c..108ef2a6665c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -2277,6 +2277,38 @@ static inline bool tcp_can_repair_sock(const struct sock *sk) | |||
2277 | ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); | 2277 | ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); |
2278 | } | 2278 | } |
2279 | 2279 | ||
2280 | static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len) | ||
2281 | { | ||
2282 | struct tcp_repair_window opt; | ||
2283 | |||
2284 | if (!tp->repair) | ||
2285 | return -EPERM; | ||
2286 | |||
2287 | if (len != sizeof(opt)) | ||
2288 | return -EINVAL; | ||
2289 | |||
2290 | if (copy_from_user(&opt, optbuf, sizeof(opt))) | ||
2291 | return -EFAULT; | ||
2292 | |||
2293 | if (opt.max_window < opt.snd_wnd) | ||
2294 | return -EINVAL; | ||
2295 | |||
2296 | if (after(opt.snd_wl1, tp->rcv_nxt + opt.rcv_wnd)) | ||
2297 | return -EINVAL; | ||
2298 | |||
2299 | if (after(opt.rcv_wup, tp->rcv_nxt)) | ||
2300 | return -EINVAL; | ||
2301 | |||
2302 | tp->snd_wl1 = opt.snd_wl1; | ||
2303 | tp->snd_wnd = opt.snd_wnd; | ||
2304 | tp->max_window = opt.max_window; | ||
2305 | |||
2306 | tp->rcv_wnd = opt.rcv_wnd; | ||
2307 | tp->rcv_wup = opt.rcv_wup; | ||
2308 | |||
2309 | return 0; | ||
2310 | } | ||
2311 | |||
2280 | static int tcp_repair_options_est(struct tcp_sock *tp, | 2312 | static int tcp_repair_options_est(struct tcp_sock *tp, |
2281 | struct tcp_repair_opt __user *optbuf, unsigned int len) | 2313 | struct tcp_repair_opt __user *optbuf, unsigned int len) |
2282 | { | 2314 | { |
@@ -2604,6 +2636,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2604 | else | 2636 | else |
2605 | tp->tsoffset = val - tcp_time_stamp; | 2637 | tp->tsoffset = val - tcp_time_stamp; |
2606 | break; | 2638 | break; |
2639 | case TCP_REPAIR_WINDOW: | ||
2640 | err = tcp_repair_set_window(tp, optval, optlen); | ||
2641 | break; | ||
2607 | case TCP_NOTSENT_LOWAT: | 2642 | case TCP_NOTSENT_LOWAT: |
2608 | tp->notsent_lowat = val; | 2643 | tp->notsent_lowat = val; |
2609 | sk->sk_write_space(sk); | 2644 | sk->sk_write_space(sk); |
@@ -2860,6 +2895,28 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
2860 | return -EINVAL; | 2895 | return -EINVAL; |
2861 | break; | 2896 | break; |
2862 | 2897 | ||
2898 | case TCP_REPAIR_WINDOW: { | ||
2899 | struct tcp_repair_window opt; | ||
2900 | |||
2901 | if (get_user(len, optlen)) | ||
2902 | return -EFAULT; | ||
2903 | |||
2904 | if (len != sizeof(opt)) | ||
2905 | return -EINVAL; | ||
2906 | |||
2907 | if (!tp->repair) | ||
2908 | return -EPERM; | ||
2909 | |||
2910 | opt.snd_wl1 = tp->snd_wl1; | ||
2911 | opt.snd_wnd = tp->snd_wnd; | ||
2912 | opt.max_window = tp->max_window; | ||
2913 | opt.rcv_wnd = tp->rcv_wnd; | ||
2914 | opt.rcv_wup = tp->rcv_wup; | ||
2915 | |||
2916 | if (copy_to_user(optval, &opt, len)) | ||
2917 | return -EFAULT; | ||
2918 | return 0; | ||
2919 | } | ||
2863 | case TCP_QUEUE_SEQ: | 2920 | case TCP_QUEUE_SEQ: |
2864 | if (tp->repair_queue == TCP_SEND_QUEUE) | 2921 | if (tp->repair_queue == TCP_SEND_QUEUE) |
2865 | val = tp->write_seq; | 2922 | val = tp->write_seq; |