summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrey Vagin <avagin@openvz.org>2016-06-27 18:33:56 -0400
committerDavid S. Miller <davem@davemloft.net>2016-06-30 08:15:31 -0400
commitb1ed4c4fa9a5ccf325184fd90edc50978ef6e33a (patch)
treeeb298a85a5f016c52ad375f351bf171eec99152a
parent641f7e405ed208cfeb6b52145252675b51c43180 (diff)
tcp: add an ability to dump and restore window parameters
We found that sometimes a restored tcp socket doesn't work. A reason of this bug is incorrect window parameters and in this case tcp_acceptable_seq() returns tcp_wnd_end(tp) instead of tp->snd_nxt. The other side drops packets with this seq, because seq is less than tp->rcv_nxt ( tcp_sequence() ). Data from a send queue is sent only if there is enough space in a window, so when we restore unacked data, we need to expand a window to fit this data. This was in a first version of this patch: "tcp: extend window to fit all restored unacked data in a send queue" Then Alexey recommended me to restore window parameters instead of adjusted them according with data in a sent queue. This sounds resonable. rcv_wnd has to be restored, because it was reported to another side and the offered window is never shrunk. One of reasons why we need to restore snd_wnd was described above. Cc: Pavel Emelyanov <xemul@parallels.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> Cc: James Morris <jmorris@namei.org> Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org> Cc: Patrick McHardy <kaber@trash.net> Signed-off-by: Andrey Vagin <avagin@openvz.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/uapi/linux/tcp.h10
-rw-r--r--net/ipv4/tcp.c57
2 files changed, 67 insertions, 0 deletions
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 53e8e3fe6b1b..482898fc433a 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -115,12 +115,22 @@ enum {
115#define TCP_CC_INFO 26 /* Get Congestion Control (optional) info */ 115#define TCP_CC_INFO 26 /* Get Congestion Control (optional) info */
116#define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */ 116#define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */
117#define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ 117#define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */
118#define TCP_REPAIR_WINDOW 29 /* Get/set window parameters */
118 119
119struct tcp_repair_opt { 120struct tcp_repair_opt {
120 __u32 opt_code; 121 __u32 opt_code;
121 __u32 opt_val; 122 __u32 opt_val;
122}; 123};
123 124
125struct tcp_repair_window {
126 __u32 snd_wl1;
127 __u32 snd_wnd;
128 __u32 max_window;
129
130 __u32 rcv_wnd;
131 __u32 rcv_wup;
132};
133
124enum { 134enum {
125 TCP_NO_QUEUE, 135 TCP_NO_QUEUE,
126 TCP_RECV_QUEUE, 136 TCP_RECV_QUEUE,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5c7ed147449c..108ef2a6665c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2277,6 +2277,38 @@ static inline bool tcp_can_repair_sock(const struct sock *sk)
2277 ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); 2277 ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
2278} 2278}
2279 2279
2280static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len)
2281{
2282 struct tcp_repair_window opt;
2283
2284 if (!tp->repair)
2285 return -EPERM;
2286
2287 if (len != sizeof(opt))
2288 return -EINVAL;
2289
2290 if (copy_from_user(&opt, optbuf, sizeof(opt)))
2291 return -EFAULT;
2292
2293 if (opt.max_window < opt.snd_wnd)
2294 return -EINVAL;
2295
2296 if (after(opt.snd_wl1, tp->rcv_nxt + opt.rcv_wnd))
2297 return -EINVAL;
2298
2299 if (after(opt.rcv_wup, tp->rcv_nxt))
2300 return -EINVAL;
2301
2302 tp->snd_wl1 = opt.snd_wl1;
2303 tp->snd_wnd = opt.snd_wnd;
2304 tp->max_window = opt.max_window;
2305
2306 tp->rcv_wnd = opt.rcv_wnd;
2307 tp->rcv_wup = opt.rcv_wup;
2308
2309 return 0;
2310}
2311
2280static int tcp_repair_options_est(struct tcp_sock *tp, 2312static int tcp_repair_options_est(struct tcp_sock *tp,
2281 struct tcp_repair_opt __user *optbuf, unsigned int len) 2313 struct tcp_repair_opt __user *optbuf, unsigned int len)
2282{ 2314{
@@ -2604,6 +2636,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2604 else 2636 else
2605 tp->tsoffset = val - tcp_time_stamp; 2637 tp->tsoffset = val - tcp_time_stamp;
2606 break; 2638 break;
2639 case TCP_REPAIR_WINDOW:
2640 err = tcp_repair_set_window(tp, optval, optlen);
2641 break;
2607 case TCP_NOTSENT_LOWAT: 2642 case TCP_NOTSENT_LOWAT:
2608 tp->notsent_lowat = val; 2643 tp->notsent_lowat = val;
2609 sk->sk_write_space(sk); 2644 sk->sk_write_space(sk);
@@ -2860,6 +2895,28 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2860 return -EINVAL; 2895 return -EINVAL;
2861 break; 2896 break;
2862 2897
2898 case TCP_REPAIR_WINDOW: {
2899 struct tcp_repair_window opt;
2900
2901 if (get_user(len, optlen))
2902 return -EFAULT;
2903
2904 if (len != sizeof(opt))
2905 return -EINVAL;
2906
2907 if (!tp->repair)
2908 return -EPERM;
2909
2910 opt.snd_wl1 = tp->snd_wl1;
2911 opt.snd_wnd = tp->snd_wnd;
2912 opt.max_window = tp->max_window;
2913 opt.rcv_wnd = tp->rcv_wnd;
2914 opt.rcv_wup = tp->rcv_wup;
2915
2916 if (copy_to_user(optval, &opt, len))
2917 return -EFAULT;
2918 return 0;
2919 }
2863 case TCP_QUEUE_SEQ: 2920 case TCP_QUEUE_SEQ:
2864 if (tp->repair_queue == TCP_SEND_QUEUE) 2921 if (tp->repair_queue == TCP_SEND_QUEUE)
2865 val = tp->write_seq; 2922 val = tp->write_seq;