aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
authorPavel Emelyanov <xemul@parallels.com>2012-04-18 23:41:57 -0400
committerDavid S. Miller <davem@davemloft.net>2012-04-21 15:52:25 -0400
commitb139ba4e90dccbf4cd4efb112af96a5c9e0b098c (patch)
treec955ff09255e74b8cc991dfa6f67853668a3b3c4 /net/ipv4/tcp.c
parent5e6a3ce6573f0c519d1ff57df60e3877bb2d3151 (diff)
tcp: Repair connection-time negotiated parameters
There are options, which are set up on a socket while performing TCP handshake. Need to resurrect them on a socket while repairing. A new sockoption accepts a buffer and parses it. The buffer should be CODE:VALUE sequence of bytes, where CODE is standard option code and VALUE is the respective value. Only 4 options should be handled on repaired socket. To read 3 out of 4 of these options the TCP_INFO sockoption can be used. An ability to get the last one (the mss_clamp) was added by the previous patch. Now the restore. Three of these options -- timestamp_ok, mss_clamp and snd_wscale -- are just restored on a coket. The sack_ok flags has 2 issues. First, whether or not to do sacks at all. This flag is just read and set back. No other sack info is saved or restored, since according to the standart and the code dropping all sack-ed segments is OK, the sender will resubmit them again, so after the repair we will probably experience a pause in connection. Next, the fack bit. It's just set back on a socket if the respective sysctl is set. No collected stats about packets flow is preserved. As far as I see (plz, correct me if I'm wrong) the fack-based congestion algorithm survives dropping all of the stats and repairs itself eventually, probably losing the performance for that period. Signed-off-by: Pavel Emelyanov <xemul@openvz.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c71
1 files changed, 71 insertions, 0 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b4e690ddb08c..3ce3bd031f33 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2218,6 +2218,68 @@ static inline int tcp_can_repair_sock(struct sock *sk)
2218 ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); 2218 ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
2219} 2219}
2220 2220
2221static int tcp_repair_options_est(struct tcp_sock *tp, char __user *optbuf, unsigned int len)
2222{
2223 /*
2224 * Options are stored in CODE:VALUE form where CODE is 8bit and VALUE
2225 * fits the respective TCPOLEN_ size
2226 */
2227
2228 while (len > 0) {
2229 u8 opcode;
2230
2231 if (get_user(opcode, optbuf))
2232 return -EFAULT;
2233
2234 optbuf++;
2235 len--;
2236
2237 switch (opcode) {
2238 case TCPOPT_MSS: {
2239 u16 in_mss;
2240
2241 if (len < sizeof(in_mss))
2242 return -ENODATA;
2243 if (get_user(in_mss, optbuf))
2244 return -EFAULT;
2245
2246 tp->rx_opt.mss_clamp = in_mss;
2247
2248 optbuf += sizeof(in_mss);
2249 len -= sizeof(in_mss);
2250 break;
2251 }
2252 case TCPOPT_WINDOW: {
2253 u8 wscale;
2254
2255 if (len < sizeof(wscale))
2256 return -ENODATA;
2257 if (get_user(wscale, optbuf))
2258 return -EFAULT;
2259
2260 if (wscale > 14)
2261 return -EFBIG;
2262
2263 tp->rx_opt.snd_wscale = wscale;
2264
2265 optbuf += sizeof(wscale);
2266 len -= sizeof(wscale);
2267 break;
2268 }
2269 case TCPOPT_SACK_PERM:
2270 tp->rx_opt.sack_ok |= TCP_SACK_SEEN;
2271 if (sysctl_tcp_fack)
2272 tcp_enable_fack(tp);
2273 break;
2274 case TCPOPT_TIMESTAMP:
2275 tp->rx_opt.tstamp_ok = 1;
2276 break;
2277 }
2278 }
2279
2280 return 0;
2281}
2282
2221/* 2283/*
2222 * Socket option code for TCP. 2284 * Socket option code for TCP.
2223 */ 2285 */
@@ -2426,6 +2488,15 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2426 err = -EINVAL; 2488 err = -EINVAL;
2427 break; 2489 break;
2428 2490
2491 case TCP_REPAIR_OPTIONS:
2492 if (!tp->repair)
2493 err = -EINVAL;
2494 else if (sk->sk_state == TCP_ESTABLISHED)
2495 err = tcp_repair_options_est(tp, optval, optlen);
2496 else
2497 err = -EPERM;
2498 break;
2499
2429 case TCP_CORK: 2500 case TCP_CORK:
2430 /* When set indicates to always queue non-full frames. 2501 /* When set indicates to always queue non-full frames.
2431 * Later the user clears this option and we transmit 2502 * Later the user clears this option and we transmit