aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorPavel Emelyanov <xemul@parallels.com>2012-04-18 23:40:39 -0400
committerDavid S. Miller <davem@davemloft.net>2012-04-21 15:52:25 -0400
commitee9952831cfd0bbe834f4a26489d7dce74582e37 (patch)
tree64c195fa45e1a200f38d68751161d8e06dfb5a6c /net
parent370816aef0c5436c2adbec3966038f36ca326933 (diff)
tcp: Initial repair mode
This includes (according the the previous description): * TCP_REPAIR sockoption This one just puts the socket in/out of the repair mode. Allowed for CAP_NET_ADMIN and for closed/establised sockets only. When repair mode is turned off and the socket happens to be in the established state the window probe is sent to the peer to 'unlock' the connection. * TCP_REPAIR_QUEUE sockoption This one sets the queue which we're about to repair. The 'no-queue' is set by default. * TCP_QUEUE_SEQ socoption Sets the write_seq/rcv_nxt of a selected repaired queue. Allowed for TCP_CLOSE-d sockets only. When the socket changes its state the other seq-s are changed by the kernel according to the protocol rules (most of the existing code is actually reused). * Ability to forcibly bind a socket to a port The sk->sk_reuse is set to SK_FORCE_REUSE. * Immediate connect modification The connect syscall initializes the connection, then directly jumps to the code which finalizes it. * Silent close modification The close just aborts the connection (similar to SO_LINGER with 0 time) but without sending any FIN/RST-s to peer. Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp.c68
-rw-r--r--net/ipv4/tcp_ipv4.c19
-rw-r--r--net/ipv4/tcp_output.c16
3 files changed, 96 insertions, 7 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bb4200f56158..e38d6f240321 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1935,7 +1935,9 @@ void tcp_close(struct sock *sk, long timeout)
1935 * advertise a zero window, then kill -9 the FTP client, wheee... 1935 * advertise a zero window, then kill -9 the FTP client, wheee...
1936 * Note: timeout is always zero in such a case. 1936 * Note: timeout is always zero in such a case.
1937 */ 1937 */
1938 if (data_was_unread) { 1938 if (unlikely(tcp_sk(sk)->repair)) {
1939 sk->sk_prot->disconnect(sk, 0);
1940 } else if (data_was_unread) {
1939 /* Unread data was tossed, zap the connection. */ 1941 /* Unread data was tossed, zap the connection. */
1940 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); 1942 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
1941 tcp_set_state(sk, TCP_CLOSE); 1943 tcp_set_state(sk, TCP_CLOSE);
@@ -2074,6 +2076,8 @@ int tcp_disconnect(struct sock *sk, int flags)
2074 /* ABORT function of RFC793 */ 2076 /* ABORT function of RFC793 */
2075 if (old_state == TCP_LISTEN) { 2077 if (old_state == TCP_LISTEN) {
2076 inet_csk_listen_stop(sk); 2078 inet_csk_listen_stop(sk);
2079 } else if (unlikely(tp->repair)) {
2080 sk->sk_err = ECONNABORTED;
2077 } else if (tcp_need_reset(old_state) || 2081 } else if (tcp_need_reset(old_state) ||
2078 (tp->snd_nxt != tp->write_seq && 2082 (tp->snd_nxt != tp->write_seq &&
2079 (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { 2083 (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
@@ -2125,6 +2129,12 @@ int tcp_disconnect(struct sock *sk, int flags)
2125} 2129}
2126EXPORT_SYMBOL(tcp_disconnect); 2130EXPORT_SYMBOL(tcp_disconnect);
2127 2131
2132static inline int tcp_can_repair_sock(struct sock *sk)
2133{
2134 return capable(CAP_NET_ADMIN) &&
2135 ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
2136}
2137
2128/* 2138/*
2129 * Socket option code for TCP. 2139 * Socket option code for TCP.
2130 */ 2140 */
@@ -2297,6 +2307,42 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2297 tp->thin_dupack = val; 2307 tp->thin_dupack = val;
2298 break; 2308 break;
2299 2309
2310 case TCP_REPAIR:
2311 if (!tcp_can_repair_sock(sk))
2312 err = -EPERM;
2313 else if (val == 1) {
2314 tp->repair = 1;
2315 sk->sk_reuse = SK_FORCE_REUSE;
2316 tp->repair_queue = TCP_NO_QUEUE;
2317 } else if (val == 0) {
2318 tp->repair = 0;
2319 sk->sk_reuse = SK_NO_REUSE;
2320 tcp_send_window_probe(sk);
2321 } else
2322 err = -EINVAL;
2323
2324 break;
2325
2326 case TCP_REPAIR_QUEUE:
2327 if (!tp->repair)
2328 err = -EPERM;
2329 else if (val < TCP_QUEUES_NR)
2330 tp->repair_queue = val;
2331 else
2332 err = -EINVAL;
2333 break;
2334
2335 case TCP_QUEUE_SEQ:
2336 if (sk->sk_state != TCP_CLOSE)
2337 err = -EPERM;
2338 else if (tp->repair_queue == TCP_SEND_QUEUE)
2339 tp->write_seq = val;
2340 else if (tp->repair_queue == TCP_RECV_QUEUE)
2341 tp->rcv_nxt = val;
2342 else
2343 err = -EINVAL;
2344 break;
2345
2300 case TCP_CORK: 2346 case TCP_CORK:
2301 /* When set indicates to always queue non-full frames. 2347 /* When set indicates to always queue non-full frames.
2302 * Later the user clears this option and we transmit 2348 * Later the user clears this option and we transmit
@@ -2632,6 +2678,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2632 val = tp->thin_dupack; 2678 val = tp->thin_dupack;
2633 break; 2679 break;
2634 2680
2681 case TCP_REPAIR:
2682 val = tp->repair;
2683 break;
2684
2685 case TCP_REPAIR_QUEUE:
2686 if (tp->repair)
2687 val = tp->repair_queue;
2688 else
2689 return -EINVAL;
2690 break;
2691
2692 case TCP_QUEUE_SEQ:
2693 if (tp->repair_queue == TCP_SEND_QUEUE)
2694 val = tp->write_seq;
2695 else if (tp->repair_queue == TCP_RECV_QUEUE)
2696 val = tp->rcv_nxt;
2697 else
2698 return -EINVAL;
2699 break;
2700
2635 case TCP_USER_TIMEOUT: 2701 case TCP_USER_TIMEOUT:
2636 val = jiffies_to_msecs(icsk->icsk_user_timeout); 2702 val = jiffies_to_msecs(icsk->icsk_user_timeout);
2637 break; 2703 break;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0cb86ceb652f..ba6dad81908e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -138,6 +138,14 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
138} 138}
139EXPORT_SYMBOL_GPL(tcp_twsk_unique); 139EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140 140
141static int tcp_repair_connect(struct sock *sk)
142{
143 tcp_connect_init(sk);
144 tcp_finish_connect(sk, NULL);
145
146 return 0;
147}
148
141/* This will initiate an outgoing connection. */ 149/* This will initiate an outgoing connection. */
142int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 150int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
143{ 151{
@@ -196,7 +204,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
196 /* Reset inherited state */ 204 /* Reset inherited state */
197 tp->rx_opt.ts_recent = 0; 205 tp->rx_opt.ts_recent = 0;
198 tp->rx_opt.ts_recent_stamp = 0; 206 tp->rx_opt.ts_recent_stamp = 0;
199 tp->write_seq = 0; 207 if (likely(!tp->repair))
208 tp->write_seq = 0;
200 } 209 }
201 210
202 if (tcp_death_row.sysctl_tw_recycle && 211 if (tcp_death_row.sysctl_tw_recycle &&
@@ -247,7 +256,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
247 sk->sk_gso_type = SKB_GSO_TCPV4; 256 sk->sk_gso_type = SKB_GSO_TCPV4;
248 sk_setup_caps(sk, &rt->dst); 257 sk_setup_caps(sk, &rt->dst);
249 258
250 if (!tp->write_seq) 259 if (!tp->write_seq && likely(!tp->repair))
251 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, 260 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
252 inet->inet_daddr, 261 inet->inet_daddr,
253 inet->inet_sport, 262 inet->inet_sport,
@@ -255,7 +264,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
255 264
256 inet->inet_id = tp->write_seq ^ jiffies; 265 inet->inet_id = tp->write_seq ^ jiffies;
257 266
258 err = tcp_connect(sk); 267 if (likely(!tp->repair))
268 err = tcp_connect(sk);
269 else
270 err = tcp_repair_connect(sk);
271
259 rt = NULL; 272 rt = NULL;
260 if (err) 273 if (err)
261 goto failure; 274 goto failure;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index db126a6954a2..fa442a61be6a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2617,9 +2617,11 @@ void tcp_connect_init(struct sock *sk)
2617 tp->snd_sml = tp->write_seq; 2617 tp->snd_sml = tp->write_seq;
2618 tp->snd_up = tp->write_seq; 2618 tp->snd_up = tp->write_seq;
2619 tp->snd_nxt = tp->write_seq; 2619 tp->snd_nxt = tp->write_seq;
2620 tp->rcv_nxt = 0; 2620
2621 tp->rcv_wup = 0; 2621 if (likely(!tp->repair))
2622 tp->copied_seq = 0; 2622 tp->rcv_nxt = 0;
2623 tp->rcv_wup = tp->rcv_nxt;
2624 tp->copied_seq = tp->rcv_nxt;
2623 2625
2624 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; 2626 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
2625 inet_csk(sk)->icsk_retransmits = 0; 2627 inet_csk(sk)->icsk_retransmits = 0;
@@ -2790,6 +2792,14 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
2790 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); 2792 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
2791} 2793}
2792 2794
2795void tcp_send_window_probe(struct sock *sk)
2796{
2797 if (sk->sk_state == TCP_ESTABLISHED) {
2798 tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
2799 tcp_xmit_probe_skb(sk, 0);
2800 }
2801}
2802
2793/* Initiate keepalive or window probe from timer. */ 2803/* Initiate keepalive or window probe from timer. */
2794int tcp_write_wakeup(struct sock *sk) 2804int tcp_write_wakeup(struct sock *sk)
2795{ 2805{