diff options
-rw-r--r-- | include/linux/ipv6.h | 7 | ||||
-rw-r--r-- | include/net/inet_connection_sock.h | 152 | ||||
-rw-r--r-- | include/net/tcp.h | 160 | ||||
-rw-r--r-- | net/ipv4/Makefile | 2 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 401 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 93 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 10 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 210 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 19 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 65 |
10 files changed, 588 insertions, 531 deletions
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 88591913c94f..777339b68464 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h | |||
@@ -333,15 +333,10 @@ static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) | |||
333 | return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; | 333 | return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; |
334 | } | 334 | } |
335 | 335 | ||
336 | static inline int inet_twsk_ipv6only(const struct sock *sk) | ||
337 | { | ||
338 | return inet_twsk(sk)->tw_ipv6only; | ||
339 | } | ||
340 | |||
341 | static inline int inet_v6_ipv6only(const struct sock *sk) | 336 | static inline int inet_v6_ipv6only(const struct sock *sk) |
342 | { | 337 | { |
343 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | 338 | return likely(sk->sk_state != TCP_TIME_WAIT) ? |
344 | ipv6_only_sock(sk) : inet_twsk_ipv6only(sk); | 339 | ipv6_only_sock(sk) : inet_twsk(sk)->tw_ipv6only; |
345 | } | 340 | } |
346 | #else | 341 | #else |
347 | #define __ipv6_only_sock(sk) 0 | 342 | #define __ipv6_only_sock(sk) 0 |
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index ef609396e41b..97e002001c1a 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h | |||
@@ -16,9 +16,15 @@ | |||
16 | #define _INET_CONNECTION_SOCK_H | 16 | #define _INET_CONNECTION_SOCK_H |
17 | 17 | ||
18 | #include <linux/ip.h> | 18 | #include <linux/ip.h> |
19 | #include <linux/string.h> | ||
19 | #include <linux/timer.h> | 20 | #include <linux/timer.h> |
20 | #include <net/request_sock.h> | 21 | #include <net/request_sock.h> |
21 | 22 | ||
23 | #define INET_CSK_DEBUG 1 | ||
24 | |||
25 | /* Cancel timers, when they are not required. */ | ||
26 | #undef INET_CSK_CLEAR_TIMERS | ||
27 | |||
22 | struct inet_bind_bucket; | 28 | struct inet_bind_bucket; |
23 | struct inet_hashinfo; | 29 | struct inet_hashinfo; |
24 | 30 | ||
@@ -61,17 +67,107 @@ struct inet_connection_sock { | |||
61 | } icsk_ack; | 67 | } icsk_ack; |
62 | }; | 68 | }; |
63 | 69 | ||
70 | #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ | ||
71 | #define ICSK_TIME_DACK 2 /* Delayed ack timer */ | ||
72 | #define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ | ||
73 | #define ICSK_TIME_KEEPOPEN 4 /* Keepalive timer */ | ||
74 | |||
64 | static inline struct inet_connection_sock *inet_csk(const struct sock *sk) | 75 | static inline struct inet_connection_sock *inet_csk(const struct sock *sk) |
65 | { | 76 | { |
66 | return (struct inet_connection_sock *)sk; | 77 | return (struct inet_connection_sock *)sk; |
67 | } | 78 | } |
68 | 79 | ||
80 | enum inet_csk_ack_state_t { | ||
81 | ICSK_ACK_SCHED = 1, | ||
82 | ICSK_ACK_TIMER = 2, | ||
83 | ICSK_ACK_PUSHED = 4 | ||
84 | }; | ||
85 | |||
69 | extern void inet_csk_init_xmit_timers(struct sock *sk, | 86 | extern void inet_csk_init_xmit_timers(struct sock *sk, |
70 | void (*retransmit_handler)(unsigned long), | 87 | void (*retransmit_handler)(unsigned long), |
71 | void (*delack_handler)(unsigned long), | 88 | void (*delack_handler)(unsigned long), |
72 | void (*keepalive_handler)(unsigned long)); | 89 | void (*keepalive_handler)(unsigned long)); |
73 | extern void inet_csk_clear_xmit_timers(struct sock *sk); | 90 | extern void inet_csk_clear_xmit_timers(struct sock *sk); |
74 | 91 | ||
92 | static inline void inet_csk_schedule_ack(struct sock *sk) | ||
93 | { | ||
94 | inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED; | ||
95 | } | ||
96 | |||
97 | static inline int inet_csk_ack_scheduled(const struct sock *sk) | ||
98 | { | ||
99 | return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED; | ||
100 | } | ||
101 | |||
102 | static inline void inet_csk_delack_init(struct sock *sk) | ||
103 | { | ||
104 | memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack)); | ||
105 | } | ||
106 | |||
107 | extern void inet_csk_delete_keepalive_timer(struct sock *sk); | ||
108 | extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout); | ||
109 | |||
110 | #ifdef INET_CSK_DEBUG | ||
111 | extern const char inet_csk_timer_bug_msg[]; | ||
112 | #endif | ||
113 | |||
114 | static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) | ||
115 | { | ||
116 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
117 | |||
118 | if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { | ||
119 | icsk->icsk_pending = 0; | ||
120 | #ifdef INET_CSK_CLEAR_TIMERS | ||
121 | sk_stop_timer(sk, &icsk->icsk_retransmit_timer); | ||
122 | #endif | ||
123 | } else if (what == ICSK_TIME_DACK) { | ||
124 | icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0; | ||
125 | #ifdef INET_CSK_CLEAR_TIMERS | ||
126 | sk_stop_timer(sk, &icsk->icsk_delack_timer); | ||
127 | #endif | ||
128 | } | ||
129 | #ifdef INET_CSK_DEBUG | ||
130 | else { | ||
131 | pr_debug(inet_csk_timer_bug_msg); | ||
132 | } | ||
133 | #endif | ||
134 | } | ||
135 | |||
136 | /* | ||
137 | * Reset the retransmission timer | ||
138 | */ | ||
139 | static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, | ||
140 | unsigned long when, | ||
141 | const unsigned long max_when) | ||
142 | { | ||
143 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
144 | |||
145 | if (when > max_when) { | ||
146 | #ifdef INET_CSK_DEBUG | ||
147 | pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", | ||
148 | sk, what, when, current_text_addr()); | ||
149 | #endif | ||
150 | when = max_when; | ||
151 | } | ||
152 | |||
153 | if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { | ||
154 | icsk->icsk_pending = what; | ||
155 | icsk->icsk_timeout = jiffies + when; | ||
156 | sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); | ||
157 | } else if (what == ICSK_TIME_DACK) { | ||
158 | icsk->icsk_ack.pending |= ICSK_ACK_TIMER; | ||
159 | icsk->icsk_ack.timeout = jiffies + when; | ||
160 | sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); | ||
161 | } | ||
162 | #ifdef INET_CSK_DEBUG | ||
163 | else { | ||
164 | pr_debug(inet_csk_timer_bug_msg); | ||
165 | } | ||
166 | #endif | ||
167 | } | ||
168 | |||
169 | extern struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); | ||
170 | |||
75 | extern struct request_sock *inet_csk_search_req(const struct sock *sk, | 171 | extern struct request_sock *inet_csk_search_req(const struct sock *sk, |
76 | struct request_sock ***prevp, | 172 | struct request_sock ***prevp, |
77 | const __u16 rport, | 173 | const __u16 rport, |
@@ -83,4 +179,60 @@ extern int inet_csk_get_port(struct inet_hashinfo *hashinfo, | |||
83 | extern struct dst_entry* inet_csk_route_req(struct sock *sk, | 179 | extern struct dst_entry* inet_csk_route_req(struct sock *sk, |
84 | const struct request_sock *req); | 180 | const struct request_sock *req); |
85 | 181 | ||
182 | static inline void inet_csk_reqsk_queue_add(struct sock *sk, | ||
183 | struct request_sock *req, | ||
184 | struct sock *child) | ||
185 | { | ||
186 | reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); | ||
187 | } | ||
188 | |||
189 | extern void inet_csk_reqsk_queue_hash_add(struct sock *sk, | ||
190 | struct request_sock *req, | ||
191 | const unsigned timeout); | ||
192 | |||
193 | static inline void inet_csk_reqsk_queue_removed(struct sock *sk, | ||
194 | struct request_sock *req) | ||
195 | { | ||
196 | if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0) | ||
197 | inet_csk_delete_keepalive_timer(sk); | ||
198 | } | ||
199 | |||
200 | static inline void inet_csk_reqsk_queue_added(struct sock *sk, | ||
201 | const unsigned long timeout) | ||
202 | { | ||
203 | if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0) | ||
204 | inet_csk_reset_keepalive_timer(sk, timeout); | ||
205 | } | ||
206 | |||
207 | static inline int inet_csk_reqsk_queue_len(const struct sock *sk) | ||
208 | { | ||
209 | return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); | ||
210 | } | ||
211 | |||
212 | static inline int inet_csk_reqsk_queue_young(const struct sock *sk) | ||
213 | { | ||
214 | return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue); | ||
215 | } | ||
216 | |||
217 | static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) | ||
218 | { | ||
219 | return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); | ||
220 | } | ||
221 | |||
222 | static inline void inet_csk_reqsk_queue_unlink(struct sock *sk, | ||
223 | struct request_sock *req, | ||
224 | struct request_sock **prev) | ||
225 | { | ||
226 | reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev); | ||
227 | } | ||
228 | |||
229 | static inline void inet_csk_reqsk_queue_drop(struct sock *sk, | ||
230 | struct request_sock *req, | ||
231 | struct request_sock **prev) | ||
232 | { | ||
233 | inet_csk_reqsk_queue_unlink(sk, req, prev); | ||
234 | inet_csk_reqsk_queue_removed(sk, req); | ||
235 | reqsk_free(req); | ||
236 | } | ||
237 | |||
86 | #endif /* _INET_CONNECTION_SOCK_H */ | 238 | #endif /* _INET_CONNECTION_SOCK_H */ |
diff --git a/include/net/tcp.h b/include/net/tcp.h index a943c79c88b0..dd9a5a288f88 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -19,18 +19,16 @@ | |||
19 | #define _TCP_H | 19 | #define _TCP_H |
20 | 20 | ||
21 | #define TCP_DEBUG 1 | 21 | #define TCP_DEBUG 1 |
22 | #define INET_CSK_DEBUG 1 | ||
23 | #define FASTRETRANS_DEBUG 1 | 22 | #define FASTRETRANS_DEBUG 1 |
24 | 23 | ||
25 | /* Cancel timers, when they are not required. */ | ||
26 | #undef INET_CSK_CLEAR_TIMERS | ||
27 | |||
28 | #include <linux/config.h> | 24 | #include <linux/config.h> |
29 | #include <linux/list.h> | 25 | #include <linux/list.h> |
30 | #include <linux/tcp.h> | 26 | #include <linux/tcp.h> |
31 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
32 | #include <linux/cache.h> | 28 | #include <linux/cache.h> |
33 | #include <linux/percpu.h> | 29 | #include <linux/percpu.h> |
30 | |||
31 | #include <net/inet_connection_sock.h> | ||
34 | #include <net/inet_hashtables.h> | 32 | #include <net/inet_hashtables.h> |
35 | #include <net/checksum.h> | 33 | #include <net/checksum.h> |
36 | #include <net/request_sock.h> | 34 | #include <net/request_sock.h> |
@@ -206,11 +204,6 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); | |||
206 | #define TCPOLEN_SACK_BASE_ALIGNED 4 | 204 | #define TCPOLEN_SACK_BASE_ALIGNED 4 |
207 | #define TCPOLEN_SACK_PERBLOCK 8 | 205 | #define TCPOLEN_SACK_PERBLOCK 8 |
208 | 206 | ||
209 | #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ | ||
210 | #define ICSK_TIME_DACK 2 /* Delayed ack timer */ | ||
211 | #define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ | ||
212 | #define ICSK_TIME_KEEPOPEN 4 /* Keepalive timer */ | ||
213 | |||
214 | /* Flags in tp->nonagle */ | 207 | /* Flags in tp->nonagle */ |
215 | #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ | 208 | #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ |
216 | #define TCP_NAGLE_CORK 2 /* Socket is corked */ | 209 | #define TCP_NAGLE_CORK 2 /* Socket is corked */ |
@@ -257,12 +250,6 @@ extern atomic_t tcp_memory_allocated; | |||
257 | extern atomic_t tcp_sockets_allocated; | 250 | extern atomic_t tcp_sockets_allocated; |
258 | extern int tcp_memory_pressure; | 251 | extern int tcp_memory_pressure; |
259 | 252 | ||
260 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
261 | #define AF_INET_FAMILY(fam) ((fam) == AF_INET) | ||
262 | #else | ||
263 | #define AF_INET_FAMILY(fam) 1 | ||
264 | #endif | ||
265 | |||
266 | /* | 253 | /* |
267 | * Pointers to address related TCP functions | 254 | * Pointers to address related TCP functions |
268 | * (i.e. things that depend on the address family) | 255 | * (i.e. things that depend on the address family) |
@@ -373,22 +360,6 @@ extern int tcp_rcv_established(struct sock *sk, | |||
373 | 360 | ||
374 | extern void tcp_rcv_space_adjust(struct sock *sk); | 361 | extern void tcp_rcv_space_adjust(struct sock *sk); |
375 | 362 | ||
376 | enum inet_csk_ack_state_t { | ||
377 | ICSK_ACK_SCHED = 1, | ||
378 | ICSK_ACK_TIMER = 2, | ||
379 | ICSK_ACK_PUSHED = 4 | ||
380 | }; | ||
381 | |||
382 | static inline void inet_csk_schedule_ack(struct sock *sk) | ||
383 | { | ||
384 | inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED; | ||
385 | } | ||
386 | |||
387 | static inline int inet_csk_ack_scheduled(const struct sock *sk) | ||
388 | { | ||
389 | return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED; | ||
390 | } | ||
391 | |||
392 | static inline void tcp_dec_quickack_mode(struct sock *sk, | 363 | static inline void tcp_dec_quickack_mode(struct sock *sk, |
393 | const unsigned int pkts) | 364 | const unsigned int pkts) |
394 | { | 365 | { |
@@ -406,11 +377,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk, | |||
406 | 377 | ||
407 | extern void tcp_enter_quickack_mode(struct sock *sk); | 378 | extern void tcp_enter_quickack_mode(struct sock *sk); |
408 | 379 | ||
409 | static inline void inet_csk_delack_init(struct sock *sk) | ||
410 | { | ||
411 | memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack)); | ||
412 | } | ||
413 | |||
414 | static inline void tcp_clear_options(struct tcp_options_received *rx_opt) | 380 | static inline void tcp_clear_options(struct tcp_options_received *rx_opt) |
415 | { | 381 | { |
416 | rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0; | 382 | rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0; |
@@ -442,7 +408,6 @@ extern void tcp_update_metrics(struct sock *sk); | |||
442 | 408 | ||
443 | extern void tcp_close(struct sock *sk, | 409 | extern void tcp_close(struct sock *sk, |
444 | long timeout); | 410 | long timeout); |
445 | extern struct sock * inet_csk_accept(struct sock *sk, int flags, int *err); | ||
446 | extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); | 411 | extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); |
447 | 412 | ||
448 | extern int tcp_getsockopt(struct sock *sk, int level, | 413 | extern int tcp_getsockopt(struct sock *sk, int level, |
@@ -541,15 +506,9 @@ static inline void tcp_clear_xmit_timers(struct sock *sk) | |||
541 | inet_csk_clear_xmit_timers(sk); | 506 | inet_csk_clear_xmit_timers(sk); |
542 | } | 507 | } |
543 | 508 | ||
544 | extern void inet_csk_delete_keepalive_timer(struct sock *sk); | ||
545 | extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout); | ||
546 | extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); | 509 | extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); |
547 | extern unsigned int tcp_current_mss(struct sock *sk, int large); | 510 | extern unsigned int tcp_current_mss(struct sock *sk, int large); |
548 | 511 | ||
549 | #ifdef INET_CSK_DEBUG | ||
550 | extern const char inet_csk_timer_bug_msg[]; | ||
551 | #endif | ||
552 | |||
553 | /* tcp_diag.c */ | 512 | /* tcp_diag.c */ |
554 | extern void tcp_get_info(struct sock *, struct tcp_info *); | 513 | extern void tcp_get_info(struct sock *, struct tcp_info *); |
555 | 514 | ||
@@ -559,60 +518,6 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, | |||
559 | extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | 518 | extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, |
560 | sk_read_actor_t recv_actor); | 519 | sk_read_actor_t recv_actor); |
561 | 520 | ||
562 | static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) | ||
563 | { | ||
564 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
565 | |||
566 | if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { | ||
567 | icsk->icsk_pending = 0; | ||
568 | #ifdef INET_CSK_CLEAR_TIMERS | ||
569 | sk_stop_timer(sk, &icsk->icsk_retransmit_timer); | ||
570 | #endif | ||
571 | } else if (what == ICSK_TIME_DACK) { | ||
572 | icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0; | ||
573 | #ifdef INET_CSK_CLEAR_TIMERS | ||
574 | sk_stop_timer(sk, &icsk->icsk_delack_timer); | ||
575 | #endif | ||
576 | } | ||
577 | #ifdef INET_CSK_DEBUG | ||
578 | else { | ||
579 | pr_debug(inet_csk_timer_bug_msg); | ||
580 | } | ||
581 | #endif | ||
582 | } | ||
583 | |||
584 | /* | ||
585 | * Reset the retransmission timer | ||
586 | */ | ||
587 | static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, | ||
588 | unsigned long when) | ||
589 | { | ||
590 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
591 | |||
592 | if (when > TCP_RTO_MAX) { | ||
593 | #ifdef INET_CSK_DEBUG | ||
594 | pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", | ||
595 | sk, what, when, current_text_addr()); | ||
596 | #endif | ||
597 | when = TCP_RTO_MAX; | ||
598 | } | ||
599 | |||
600 | if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { | ||
601 | icsk->icsk_pending = what; | ||
602 | icsk->icsk_timeout = jiffies + when; | ||
603 | sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); | ||
604 | } else if (what == ICSK_TIME_DACK) { | ||
605 | icsk->icsk_ack.pending |= ICSK_ACK_TIMER; | ||
606 | icsk->icsk_ack.timeout = jiffies + when; | ||
607 | sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); | ||
608 | } | ||
609 | #ifdef INET_CSK_DEBUG | ||
610 | else { | ||
611 | pr_debug(inet_csk_timer_bug_msg); | ||
612 | } | ||
613 | #endif | ||
614 | } | ||
615 | |||
616 | /* Initialize RCV_MSS value. | 521 | /* Initialize RCV_MSS value. |
617 | * RCV_MSS is an our guess about MSS used by the peer. | 522 | * RCV_MSS is an our guess about MSS used by the peer. |
618 | * We haven't any direct information about the MSS. | 523 | * We haven't any direct information about the MSS. |
@@ -765,7 +670,8 @@ static inline void tcp_packets_out_inc(struct sock *sk, | |||
765 | 670 | ||
766 | tp->packets_out += tcp_skb_pcount(skb); | 671 | tp->packets_out += tcp_skb_pcount(skb); |
767 | if (!orig) | 672 | if (!orig) |
768 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); | 673 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
674 | inet_csk(sk)->icsk_rto, TCP_RTO_MAX); | ||
769 | } | 675 | } |
770 | 676 | ||
771 | static inline void tcp_packets_out_dec(struct tcp_sock *tp, | 677 | static inline void tcp_packets_out_dec(struct tcp_sock *tp, |
@@ -934,7 +840,8 @@ static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *t | |||
934 | { | 840 | { |
935 | const struct inet_connection_sock *icsk = inet_csk(sk); | 841 | const struct inet_connection_sock *icsk = inet_csk(sk); |
936 | if (!tp->packets_out && !icsk->icsk_pending) | 842 | if (!tp->packets_out && !icsk->icsk_pending) |
937 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, icsk->icsk_rto); | 843 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, |
844 | icsk->icsk_rto, TCP_RTO_MAX); | ||
938 | } | 845 | } |
939 | 846 | ||
940 | static __inline__ void tcp_push_pending_frames(struct sock *sk, | 847 | static __inline__ void tcp_push_pending_frames(struct sock *sk, |
@@ -1017,7 +924,8 @@ static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb) | |||
1017 | wake_up_interruptible(sk->sk_sleep); | 924 | wake_up_interruptible(sk->sk_sleep); |
1018 | if (!inet_csk_ack_scheduled(sk)) | 925 | if (!inet_csk_ack_scheduled(sk)) |
1019 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | 926 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, |
1020 | (3 * TCP_RTO_MIN) / 4); | 927 | (3 * TCP_RTO_MIN) / 4, |
928 | TCP_RTO_MAX); | ||
1021 | } | 929 | } |
1022 | return 1; | 930 | return 1; |
1023 | } | 931 | } |
@@ -1181,58 +1089,6 @@ static inline int tcp_full_space(const struct sock *sk) | |||
1181 | return tcp_win_from_space(sk->sk_rcvbuf); | 1089 | return tcp_win_from_space(sk->sk_rcvbuf); |
1182 | } | 1090 | } |
1183 | 1091 | ||
1184 | static inline void inet_csk_reqsk_queue_add(struct sock *sk, | ||
1185 | struct request_sock *req, | ||
1186 | struct sock *child) | ||
1187 | { | ||
1188 | reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); | ||
1189 | } | ||
1190 | |||
1191 | static inline void inet_csk_reqsk_queue_removed(struct sock *sk, | ||
1192 | struct request_sock *req) | ||
1193 | { | ||
1194 | if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0) | ||
1195 | inet_csk_delete_keepalive_timer(sk); | ||
1196 | } | ||
1197 | |||
1198 | static inline void inet_csk_reqsk_queue_added(struct sock *sk, | ||
1199 | const unsigned long timeout) | ||
1200 | { | ||
1201 | if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0) | ||
1202 | inet_csk_reset_keepalive_timer(sk, timeout); | ||
1203 | } | ||
1204 | |||
1205 | static inline int inet_csk_reqsk_queue_len(const struct sock *sk) | ||
1206 | { | ||
1207 | return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); | ||
1208 | } | ||
1209 | |||
1210 | static inline int inet_csk_reqsk_queue_young(const struct sock *sk) | ||
1211 | { | ||
1212 | return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue); | ||
1213 | } | ||
1214 | |||
1215 | static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) | ||
1216 | { | ||
1217 | return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); | ||
1218 | } | ||
1219 | |||
1220 | static inline void inet_csk_reqsk_queue_unlink(struct sock *sk, | ||
1221 | struct request_sock *req, | ||
1222 | struct request_sock **prev) | ||
1223 | { | ||
1224 | reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev); | ||
1225 | } | ||
1226 | |||
1227 | static inline void inet_csk_reqsk_queue_drop(struct sock *sk, | ||
1228 | struct request_sock *req, | ||
1229 | struct request_sock **prev) | ||
1230 | { | ||
1231 | inet_csk_reqsk_queue_unlink(sk, req, prev); | ||
1232 | inet_csk_reqsk_queue_removed(sk, req); | ||
1233 | reqsk_free(req); | ||
1234 | } | ||
1235 | |||
1236 | static __inline__ void tcp_openreq_init(struct request_sock *req, | 1092 | static __inline__ void tcp_openreq_init(struct request_sock *req, |
1237 | struct tcp_options_received *rx_opt, | 1093 | struct tcp_options_received *rx_opt, |
1238 | struct sk_buff *skb) | 1094 | struct sk_buff *skb) |
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 6650d18e400f..ea0e1d87dc7e 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile | |||
@@ -5,7 +5,7 @@ | |||
5 | obj-y := route.o inetpeer.o protocol.o \ | 5 | obj-y := route.o inetpeer.o protocol.o \ |
6 | ip_input.o ip_fragment.o ip_forward.o ip_options.o \ | 6 | ip_input.o ip_fragment.o ip_forward.o ip_options.o \ |
7 | ip_output.o ip_sockglue.o inet_hashtables.o \ | 7 | ip_output.o ip_sockglue.o inet_hashtables.o \ |
8 | inet_timewait_sock.o \ | 8 | inet_timewait_sock.o inet_connection_sock.o \ |
9 | tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ | 9 | tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ |
10 | tcp_minisocks.o tcp_cong.o \ | 10 | tcp_minisocks.o tcp_cong.o \ |
11 | datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ | 11 | datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c new file mode 100644 index 000000000000..2712400a8bb8 --- /dev/null +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -0,0 +1,401 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * Support for INET connection oriented protocols. | ||
7 | * | ||
8 | * Authors: See the TCP sources | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or(at your option) any later version. | ||
14 | */ | ||
15 | |||
16 | #include <linux/config.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/jhash.h> | ||
19 | |||
20 | #include <net/inet_connection_sock.h> | ||
21 | #include <net/inet_hashtables.h> | ||
22 | #include <net/inet_timewait_sock.h> | ||
23 | #include <net/ip.h> | ||
24 | #include <net/route.h> | ||
25 | #include <net/tcp_states.h> | ||
26 | |||
27 | #ifdef INET_CSK_DEBUG | ||
28 | const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; | ||
29 | EXPORT_SYMBOL(inet_csk_timer_bug_msg); | ||
30 | #endif | ||
31 | |||
32 | /* | ||
33 | * This array holds the first and last local port number. | ||
34 | * For high-usage systems, use sysctl to change this to | ||
35 | * 32768-61000 | ||
36 | */ | ||
37 | int sysctl_local_port_range[2] = { 1024, 4999 }; | ||
38 | |||
39 | static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) | ||
40 | { | ||
41 | const u32 sk_rcv_saddr = inet_rcv_saddr(sk); | ||
42 | struct sock *sk2; | ||
43 | struct hlist_node *node; | ||
44 | int reuse = sk->sk_reuse; | ||
45 | |||
46 | sk_for_each_bound(sk2, node, &tb->owners) { | ||
47 | if (sk != sk2 && | ||
48 | !inet_v6_ipv6only(sk2) && | ||
49 | (!sk->sk_bound_dev_if || | ||
50 | !sk2->sk_bound_dev_if || | ||
51 | sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { | ||
52 | if (!reuse || !sk2->sk_reuse || | ||
53 | sk2->sk_state == TCP_LISTEN) { | ||
54 | const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); | ||
55 | if (!sk2_rcv_saddr || !sk_rcv_saddr || | ||
56 | sk2_rcv_saddr == sk_rcv_saddr) | ||
57 | break; | ||
58 | } | ||
59 | } | ||
60 | } | ||
61 | return node != NULL; | ||
62 | } | ||
63 | |||
64 | /* Obtain a reference to a local port for the given sock, | ||
65 | * if snum is zero it means select any available local port. | ||
66 | */ | ||
67 | int inet_csk_get_port(struct inet_hashinfo *hashinfo, | ||
68 | struct sock *sk, unsigned short snum) | ||
69 | { | ||
70 | struct inet_bind_hashbucket *head; | ||
71 | struct hlist_node *node; | ||
72 | struct inet_bind_bucket *tb; | ||
73 | int ret; | ||
74 | |||
75 | local_bh_disable(); | ||
76 | if (!snum) { | ||
77 | int low = sysctl_local_port_range[0]; | ||
78 | int high = sysctl_local_port_range[1]; | ||
79 | int remaining = (high - low) + 1; | ||
80 | int rover; | ||
81 | |||
82 | spin_lock(&hashinfo->portalloc_lock); | ||
83 | if (hashinfo->port_rover < low) | ||
84 | rover = low; | ||
85 | else | ||
86 | rover = hashinfo->port_rover; | ||
87 | do { | ||
88 | rover++; | ||
89 | if (rover > high) | ||
90 | rover = low; | ||
91 | head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; | ||
92 | spin_lock(&head->lock); | ||
93 | inet_bind_bucket_for_each(tb, node, &head->chain) | ||
94 | if (tb->port == rover) | ||
95 | goto next; | ||
96 | break; | ||
97 | next: | ||
98 | spin_unlock(&head->lock); | ||
99 | } while (--remaining > 0); | ||
100 | hashinfo->port_rover = rover; | ||
101 | spin_unlock(&hashinfo->portalloc_lock); | ||
102 | |||
103 | /* Exhausted local port range during search? It is not | ||
104 | * possible for us to be holding one of the bind hash | ||
105 | * locks if this test triggers, because if 'remaining' | ||
106 | * drops to zero, we broke out of the do/while loop at | ||
107 | * the top level, not from the 'break;' statement. | ||
108 | */ | ||
109 | ret = 1; | ||
110 | if (remaining <= 0) | ||
111 | goto fail; | ||
112 | |||
113 | /* OK, here is the one we will use. HEAD is | ||
114 | * non-NULL and we hold it's mutex. | ||
115 | */ | ||
116 | snum = rover; | ||
117 | } else { | ||
118 | head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; | ||
119 | spin_lock(&head->lock); | ||
120 | inet_bind_bucket_for_each(tb, node, &head->chain) | ||
121 | if (tb->port == snum) | ||
122 | goto tb_found; | ||
123 | } | ||
124 | tb = NULL; | ||
125 | goto tb_not_found; | ||
126 | tb_found: | ||
127 | if (!hlist_empty(&tb->owners)) { | ||
128 | if (sk->sk_reuse > 1) | ||
129 | goto success; | ||
130 | if (tb->fastreuse > 0 && | ||
131 | sk->sk_reuse && sk->sk_state != TCP_LISTEN) { | ||
132 | goto success; | ||
133 | } else { | ||
134 | ret = 1; | ||
135 | if (inet_csk_bind_conflict(sk, tb)) | ||
136 | goto fail_unlock; | ||
137 | } | ||
138 | } | ||
139 | tb_not_found: | ||
140 | ret = 1; | ||
141 | if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL) | ||
142 | goto fail_unlock; | ||
143 | if (hlist_empty(&tb->owners)) { | ||
144 | if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) | ||
145 | tb->fastreuse = 1; | ||
146 | else | ||
147 | tb->fastreuse = 0; | ||
148 | } else if (tb->fastreuse && | ||
149 | (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) | ||
150 | tb->fastreuse = 0; | ||
151 | success: | ||
152 | if (!inet_csk(sk)->icsk_bind_hash) | ||
153 | inet_bind_hash(sk, tb, snum); | ||
154 | BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); | ||
155 | ret = 0; | ||
156 | |||
157 | fail_unlock: | ||
158 | spin_unlock(&head->lock); | ||
159 | fail: | ||
160 | local_bh_enable(); | ||
161 | return ret; | ||
162 | } | ||
163 | |||
164 | EXPORT_SYMBOL_GPL(inet_csk_get_port); | ||
165 | |||
166 | /* | ||
167 | * Wait for an incoming connection, avoid race conditions. This must be called | ||
168 | * with the socket locked. | ||
169 | */ | ||
170 | static int inet_csk_wait_for_connect(struct sock *sk, long timeo) | ||
171 | { | ||
172 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
173 | DEFINE_WAIT(wait); | ||
174 | int err; | ||
175 | |||
176 | /* | ||
177 | * True wake-one mechanism for incoming connections: only | ||
178 | * one process gets woken up, not the 'whole herd'. | ||
179 | * Since we do not 'race & poll' for established sockets | ||
180 | * anymore, the common case will execute the loop only once. | ||
181 | * | ||
182 | * Subtle issue: "add_wait_queue_exclusive()" will be added | ||
183 | * after any current non-exclusive waiters, and we know that | ||
184 | * it will always _stay_ after any new non-exclusive waiters | ||
185 | * because all non-exclusive waiters are added at the | ||
186 | * beginning of the wait-queue. As such, it's ok to "drop" | ||
187 | * our exclusiveness temporarily when we get woken up without | ||
188 | * having to remove and re-insert us on the wait queue. | ||
189 | */ | ||
190 | for (;;) { | ||
191 | prepare_to_wait_exclusive(sk->sk_sleep, &wait, | ||
192 | TASK_INTERRUPTIBLE); | ||
193 | release_sock(sk); | ||
194 | if (reqsk_queue_empty(&icsk->icsk_accept_queue)) | ||
195 | timeo = schedule_timeout(timeo); | ||
196 | lock_sock(sk); | ||
197 | err = 0; | ||
198 | if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) | ||
199 | break; | ||
200 | err = -EINVAL; | ||
201 | if (sk->sk_state != TCP_LISTEN) | ||
202 | break; | ||
203 | err = sock_intr_errno(timeo); | ||
204 | if (signal_pending(current)) | ||
205 | break; | ||
206 | err = -EAGAIN; | ||
207 | if (!timeo) | ||
208 | break; | ||
209 | } | ||
210 | finish_wait(sk->sk_sleep, &wait); | ||
211 | return err; | ||
212 | } | ||
213 | |||
214 | /* | ||
215 | * This will accept the next outstanding connection. | ||
216 | */ | ||
217 | struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) | ||
218 | { | ||
219 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
220 | struct sock *newsk; | ||
221 | int error; | ||
222 | |||
223 | lock_sock(sk); | ||
224 | |||
225 | /* We need to make sure that this socket is listening, | ||
226 | * and that it has something pending. | ||
227 | */ | ||
228 | error = -EINVAL; | ||
229 | if (sk->sk_state != TCP_LISTEN) | ||
230 | goto out_err; | ||
231 | |||
232 | /* Find already established connection */ | ||
233 | if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { | ||
234 | long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); | ||
235 | |||
236 | /* If this is a non blocking socket don't sleep */ | ||
237 | error = -EAGAIN; | ||
238 | if (!timeo) | ||
239 | goto out_err; | ||
240 | |||
241 | error = inet_csk_wait_for_connect(sk, timeo); | ||
242 | if (error) | ||
243 | goto out_err; | ||
244 | } | ||
245 | |||
246 | newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); | ||
247 | BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); | ||
248 | out: | ||
249 | release_sock(sk); | ||
250 | return newsk; | ||
251 | out_err: | ||
252 | newsk = NULL; | ||
253 | *err = error; | ||
254 | goto out; | ||
255 | } | ||
256 | |||
257 | EXPORT_SYMBOL(inet_csk_accept); | ||
258 | |||
259 | /* | ||
260 | * Using different timers for retransmit, delayed acks and probes | ||
261 | * We may wish use just one timer maintaining a list of expire jiffies | ||
262 | * to optimize. | ||
263 | */ | ||
264 | void inet_csk_init_xmit_timers(struct sock *sk, | ||
265 | void (*retransmit_handler)(unsigned long), | ||
266 | void (*delack_handler)(unsigned long), | ||
267 | void (*keepalive_handler)(unsigned long)) | ||
268 | { | ||
269 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
270 | |||
271 | init_timer(&icsk->icsk_retransmit_timer); | ||
272 | init_timer(&icsk->icsk_delack_timer); | ||
273 | init_timer(&sk->sk_timer); | ||
274 | |||
275 | icsk->icsk_retransmit_timer.function = retransmit_handler; | ||
276 | icsk->icsk_delack_timer.function = delack_handler; | ||
277 | sk->sk_timer.function = keepalive_handler; | ||
278 | |||
279 | icsk->icsk_retransmit_timer.data = | ||
280 | icsk->icsk_delack_timer.data = | ||
281 | sk->sk_timer.data = (unsigned long)sk; | ||
282 | |||
283 | icsk->icsk_pending = icsk->icsk_ack.pending = 0; | ||
284 | } | ||
285 | |||
286 | EXPORT_SYMBOL(inet_csk_init_xmit_timers); | ||
287 | |||
288 | void inet_csk_clear_xmit_timers(struct sock *sk) | ||
289 | { | ||
290 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
291 | |||
292 | icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0; | ||
293 | |||
294 | sk_stop_timer(sk, &icsk->icsk_retransmit_timer); | ||
295 | sk_stop_timer(sk, &icsk->icsk_delack_timer); | ||
296 | sk_stop_timer(sk, &sk->sk_timer); | ||
297 | } | ||
298 | |||
299 | EXPORT_SYMBOL(inet_csk_clear_xmit_timers); | ||
300 | |||
301 | void inet_csk_delete_keepalive_timer(struct sock *sk) | ||
302 | { | ||
303 | sk_stop_timer(sk, &sk->sk_timer); | ||
304 | } | ||
305 | |||
306 | EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); | ||
307 | |||
308 | void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) | ||
309 | { | ||
310 | sk_reset_timer(sk, &sk->sk_timer, jiffies + len); | ||
311 | } | ||
312 | |||
313 | EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); | ||
314 | |||
315 | struct dst_entry* inet_csk_route_req(struct sock *sk, | ||
316 | const struct request_sock *req) | ||
317 | { | ||
318 | struct rtable *rt; | ||
319 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
320 | struct ip_options *opt = inet_rsk(req)->opt; | ||
321 | struct flowi fl = { .oif = sk->sk_bound_dev_if, | ||
322 | .nl_u = { .ip4_u = | ||
323 | { .daddr = ((opt && opt->srr) ? | ||
324 | opt->faddr : | ||
325 | ireq->rmt_addr), | ||
326 | .saddr = ireq->loc_addr, | ||
327 | .tos = RT_CONN_FLAGS(sk) } }, | ||
328 | .proto = sk->sk_protocol, | ||
329 | .uli_u = { .ports = | ||
330 | { .sport = inet_sk(sk)->sport, | ||
331 | .dport = ireq->rmt_port } } }; | ||
332 | |||
333 | if (ip_route_output_flow(&rt, &fl, sk, 0)) { | ||
334 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | ||
335 | return NULL; | ||
336 | } | ||
337 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { | ||
338 | ip_rt_put(rt); | ||
339 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | ||
340 | return NULL; | ||
341 | } | ||
342 | return &rt->u.dst; | ||
343 | } | ||
344 | |||
345 | EXPORT_SYMBOL_GPL(inet_csk_route_req); | ||
346 | |||
347 | static inline u32 inet_synq_hash(const u32 raddr, const u16 rport, | ||
348 | const u32 rnd, const u16 synq_hsize) | ||
349 | { | ||
350 | return jhash_2words(raddr, (u32)rport, rnd) & (synq_hsize - 1); | ||
351 | } | ||
352 | |||
353 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
354 | #define AF_INET_FAMILY(fam) ((fam) == AF_INET) | ||
355 | #else | ||
356 | #define AF_INET_FAMILY(fam) 1 | ||
357 | #endif | ||
358 | |||
359 | struct request_sock *inet_csk_search_req(const struct sock *sk, | ||
360 | struct request_sock ***prevp, | ||
361 | const __u16 rport, const __u32 raddr, | ||
362 | const __u32 laddr) | ||
363 | { | ||
364 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
365 | struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; | ||
366 | struct request_sock *req, **prev; | ||
367 | |||
368 | for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, | ||
369 | lopt->nr_table_entries)]; | ||
370 | (req = *prev) != NULL; | ||
371 | prev = &req->dl_next) { | ||
372 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
373 | |||
374 | if (ireq->rmt_port == rport && | ||
375 | ireq->rmt_addr == raddr && | ||
376 | ireq->loc_addr == laddr && | ||
377 | AF_INET_FAMILY(req->rsk_ops->family)) { | ||
378 | BUG_TRAP(!req->sk); | ||
379 | *prevp = prev; | ||
380 | break; | ||
381 | } | ||
382 | } | ||
383 | |||
384 | return req; | ||
385 | } | ||
386 | |||
387 | EXPORT_SYMBOL_GPL(inet_csk_search_req); | ||
388 | |||
389 | void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, | ||
390 | const unsigned timeout) | ||
391 | { | ||
392 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
393 | struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; | ||
394 | const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, | ||
395 | lopt->hash_rnd, lopt->nr_table_entries); | ||
396 | |||
397 | reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); | ||
398 | inet_csk_reqsk_queue_added(sk, timeout); | ||
399 | } | ||
400 | |||
401 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); | ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8177b86570db..581016a6a93f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -1805,98 +1805,6 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
1805 | } | 1805 | } |
1806 | 1806 | ||
1807 | /* | 1807 | /* |
1808 | * Wait for an incoming connection, avoid race | ||
1809 | * conditions. This must be called with the socket locked. | ||
1810 | */ | ||
1811 | static int wait_for_connect(struct sock *sk, long timeo) | ||
1812 | { | ||
1813 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
1814 | DEFINE_WAIT(wait); | ||
1815 | int err; | ||
1816 | |||
1817 | /* | ||
1818 | * True wake-one mechanism for incoming connections: only | ||
1819 | * one process gets woken up, not the 'whole herd'. | ||
1820 | * Since we do not 'race & poll' for established sockets | ||
1821 | * anymore, the common case will execute the loop only once. | ||
1822 | * | ||
1823 | * Subtle issue: "add_wait_queue_exclusive()" will be added | ||
1824 | * after any current non-exclusive waiters, and we know that | ||
1825 | * it will always _stay_ after any new non-exclusive waiters | ||
1826 | * because all non-exclusive waiters are added at the | ||
1827 | * beginning of the wait-queue. As such, it's ok to "drop" | ||
1828 | * our exclusiveness temporarily when we get woken up without | ||
1829 | * having to remove and re-insert us on the wait queue. | ||
1830 | */ | ||
1831 | for (;;) { | ||
1832 | prepare_to_wait_exclusive(sk->sk_sleep, &wait, | ||
1833 | TASK_INTERRUPTIBLE); | ||
1834 | release_sock(sk); | ||
1835 | if (reqsk_queue_empty(&icsk->icsk_accept_queue)) | ||
1836 | timeo = schedule_timeout(timeo); | ||
1837 | lock_sock(sk); | ||
1838 | err = 0; | ||
1839 | if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) | ||
1840 | break; | ||
1841 | err = -EINVAL; | ||
1842 | if (sk->sk_state != TCP_LISTEN) | ||
1843 | break; | ||
1844 | err = sock_intr_errno(timeo); | ||
1845 | if (signal_pending(current)) | ||
1846 | break; | ||
1847 | err = -EAGAIN; | ||
1848 | if (!timeo) | ||
1849 | break; | ||
1850 | } | ||
1851 | finish_wait(sk->sk_sleep, &wait); | ||
1852 | return err; | ||
1853 | } | ||
1854 | |||
1855 | /* | ||
1856 | * This will accept the next outstanding connection. | ||
1857 | */ | ||
1858 | |||
1859 | struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) | ||
1860 | { | ||
1861 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
1862 | struct sock *newsk; | ||
1863 | int error; | ||
1864 | |||
1865 | lock_sock(sk); | ||
1866 | |||
1867 | /* We need to make sure that this socket is listening, | ||
1868 | * and that it has something pending. | ||
1869 | */ | ||
1870 | error = -EINVAL; | ||
1871 | if (sk->sk_state != TCP_LISTEN) | ||
1872 | goto out_err; | ||
1873 | |||
1874 | /* Find already established connection */ | ||
1875 | if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { | ||
1876 | long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); | ||
1877 | |||
1878 | /* If this is a non blocking socket don't sleep */ | ||
1879 | error = -EAGAIN; | ||
1880 | if (!timeo) | ||
1881 | goto out_err; | ||
1882 | |||
1883 | error = wait_for_connect(sk, timeo); | ||
1884 | if (error) | ||
1885 | goto out_err; | ||
1886 | } | ||
1887 | |||
1888 | newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); | ||
1889 | BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); | ||
1890 | out: | ||
1891 | release_sock(sk); | ||
1892 | return newsk; | ||
1893 | out_err: | ||
1894 | newsk = NULL; | ||
1895 | *err = error; | ||
1896 | goto out; | ||
1897 | } | ||
1898 | |||
1899 | /* | ||
1900 | * Socket option code for TCP. | 1808 | * Socket option code for TCP. |
1901 | */ | 1809 | */ |
1902 | int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | 1810 | int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, |
@@ -2344,7 +2252,6 @@ void __init tcp_init(void) | |||
2344 | tcp_register_congestion_control(&tcp_reno); | 2252 | tcp_register_congestion_control(&tcp_reno); |
2345 | } | 2253 | } |
2346 | 2254 | ||
2347 | EXPORT_SYMBOL(inet_csk_accept); | ||
2348 | EXPORT_SYMBOL(tcp_close); | 2255 | EXPORT_SYMBOL(tcp_close); |
2349 | EXPORT_SYMBOL(tcp_destroy_sock); | 2256 | EXPORT_SYMBOL(tcp_destroy_sock); |
2350 | EXPORT_SYMBOL(tcp_disconnect); | 2257 | EXPORT_SYMBOL(tcp_disconnect); |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8a8c5c2d90cb..b35badf53aa5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -1278,7 +1278,7 @@ static int tcp_check_sack_reneging(struct sock *sk) | |||
1278 | inet_csk(sk)->icsk_retransmits++; | 1278 | inet_csk(sk)->icsk_retransmits++; |
1279 | tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)); | 1279 | tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)); |
1280 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 1280 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
1281 | inet_csk(sk)->icsk_rto); | 1281 | inet_csk(sk)->icsk_rto, TCP_RTO_MAX); |
1282 | return 1; | 1282 | return 1; |
1283 | } | 1283 | } |
1284 | return 0; | 1284 | return 0; |
@@ -1961,7 +1961,7 @@ static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp) | |||
1961 | if (!tp->packets_out) { | 1961 | if (!tp->packets_out) { |
1962 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); | 1962 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); |
1963 | } else { | 1963 | } else { |
1964 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); | 1964 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); |
1965 | } | 1965 | } |
1966 | } | 1966 | } |
1967 | 1967 | ||
@@ -2147,7 +2147,8 @@ static void tcp_ack_probe(struct sock *sk) | |||
2147 | */ | 2147 | */ |
2148 | } else { | 2148 | } else { |
2149 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, | 2149 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, |
2150 | min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX)); | 2150 | min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX), |
2151 | TCP_RTO_MAX); | ||
2151 | } | 2152 | } |
2152 | } | 2153 | } |
2153 | 2154 | ||
@@ -3968,7 +3969,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
3968 | inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; | 3969 | inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; |
3969 | tcp_incr_quickack(sk); | 3970 | tcp_incr_quickack(sk); |
3970 | tcp_enter_quickack_mode(sk); | 3971 | tcp_enter_quickack_mode(sk); |
3971 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX); | 3972 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, |
3973 | TCP_DELACK_MAX, TCP_RTO_MAX); | ||
3972 | 3974 | ||
3973 | discard: | 3975 | discard: |
3974 | __kfree_skb(skb); | 3976 | __kfree_skb(skb); |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2cd41265d17f..2f605b9e6b67 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -97,138 +97,6 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { | |||
97 | .port_rover = 1024 - 1, | 97 | .port_rover = 1024 - 1, |
98 | }; | 98 | }; |
99 | 99 | ||
100 | /* | ||
101 | * This array holds the first and last local port number. | ||
102 | * For high-usage systems, use sysctl to change this to | ||
103 | * 32768-61000 | ||
104 | */ | ||
105 | int sysctl_local_port_range[2] = { 1024, 4999 }; | ||
106 | |||
107 | static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) | ||
108 | { | ||
109 | const u32 sk_rcv_saddr = inet_rcv_saddr(sk); | ||
110 | struct sock *sk2; | ||
111 | struct hlist_node *node; | ||
112 | int reuse = sk->sk_reuse; | ||
113 | |||
114 | sk_for_each_bound(sk2, node, &tb->owners) { | ||
115 | if (sk != sk2 && | ||
116 | !inet_v6_ipv6only(sk2) && | ||
117 | (!sk->sk_bound_dev_if || | ||
118 | !sk2->sk_bound_dev_if || | ||
119 | sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { | ||
120 | if (!reuse || !sk2->sk_reuse || | ||
121 | sk2->sk_state == TCP_LISTEN) { | ||
122 | const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); | ||
123 | if (!sk2_rcv_saddr || !sk_rcv_saddr || | ||
124 | sk2_rcv_saddr == sk_rcv_saddr) | ||
125 | break; | ||
126 | } | ||
127 | } | ||
128 | } | ||
129 | return node != NULL; | ||
130 | } | ||
131 | |||
132 | /* Obtain a reference to a local port for the given sock, | ||
133 | * if snum is zero it means select any available local port. | ||
134 | */ | ||
135 | int inet_csk_get_port(struct inet_hashinfo *hashinfo, | ||
136 | struct sock *sk, unsigned short snum) | ||
137 | { | ||
138 | struct inet_bind_hashbucket *head; | ||
139 | struct hlist_node *node; | ||
140 | struct inet_bind_bucket *tb; | ||
141 | int ret; | ||
142 | |||
143 | local_bh_disable(); | ||
144 | if (!snum) { | ||
145 | int low = sysctl_local_port_range[0]; | ||
146 | int high = sysctl_local_port_range[1]; | ||
147 | int remaining = (high - low) + 1; | ||
148 | int rover; | ||
149 | |||
150 | spin_lock(&hashinfo->portalloc_lock); | ||
151 | if (hashinfo->port_rover < low) | ||
152 | rover = low; | ||
153 | else | ||
154 | rover = hashinfo->port_rover; | ||
155 | do { | ||
156 | rover++; | ||
157 | if (rover > high) | ||
158 | rover = low; | ||
159 | head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; | ||
160 | spin_lock(&head->lock); | ||
161 | inet_bind_bucket_for_each(tb, node, &head->chain) | ||
162 | if (tb->port == rover) | ||
163 | goto next; | ||
164 | break; | ||
165 | next: | ||
166 | spin_unlock(&head->lock); | ||
167 | } while (--remaining > 0); | ||
168 | hashinfo->port_rover = rover; | ||
169 | spin_unlock(&hashinfo->portalloc_lock); | ||
170 | |||
171 | /* Exhausted local port range during search? It is not | ||
172 | * possible for us to be holding one of the bind hash | ||
173 | * locks if this test triggers, because if 'remaining' | ||
174 | * drops to zero, we broke out of the do/while loop at | ||
175 | * the top level, not from the 'break;' statement. | ||
176 | */ | ||
177 | ret = 1; | ||
178 | if (unlikely(remaining <= 0)) | ||
179 | goto fail; | ||
180 | |||
181 | /* OK, here is the one we will use. HEAD is | ||
182 | * non-NULL and we hold it's mutex. | ||
183 | */ | ||
184 | snum = rover; | ||
185 | } else { | ||
186 | head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; | ||
187 | spin_lock(&head->lock); | ||
188 | inet_bind_bucket_for_each(tb, node, &head->chain) | ||
189 | if (tb->port == snum) | ||
190 | goto tb_found; | ||
191 | } | ||
192 | tb = NULL; | ||
193 | goto tb_not_found; | ||
194 | tb_found: | ||
195 | if (!hlist_empty(&tb->owners)) { | ||
196 | if (sk->sk_reuse > 1) | ||
197 | goto success; | ||
198 | if (tb->fastreuse > 0 && | ||
199 | sk->sk_reuse && sk->sk_state != TCP_LISTEN) { | ||
200 | goto success; | ||
201 | } else { | ||
202 | ret = 1; | ||
203 | if (inet_csk_bind_conflict(sk, tb)) | ||
204 | goto fail_unlock; | ||
205 | } | ||
206 | } | ||
207 | tb_not_found: | ||
208 | ret = 1; | ||
209 | if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL) | ||
210 | goto fail_unlock; | ||
211 | if (hlist_empty(&tb->owners)) { | ||
212 | if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) | ||
213 | tb->fastreuse = 1; | ||
214 | else | ||
215 | tb->fastreuse = 0; | ||
216 | } else if (tb->fastreuse && | ||
217 | (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) | ||
218 | tb->fastreuse = 0; | ||
219 | success: | ||
220 | if (!inet_csk(sk)->icsk_bind_hash) | ||
221 | inet_bind_hash(sk, tb, snum); | ||
222 | BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); | ||
223 | ret = 0; | ||
224 | |||
225 | fail_unlock: | ||
226 | spin_unlock(&head->lock); | ||
227 | fail: | ||
228 | local_bh_enable(); | ||
229 | return ret; | ||
230 | } | ||
231 | |||
232 | static int tcp_v4_get_port(struct sock *sk, unsigned short snum) | 100 | static int tcp_v4_get_port(struct sock *sk, unsigned short snum) |
233 | { | 101 | { |
234 | return inet_csk_get_port(&tcp_hashinfo, sk, snum); | 102 | return inet_csk_get_port(&tcp_hashinfo, sk, snum); |
@@ -568,52 +436,6 @@ static inline int inet_iif(const struct sk_buff *skb) | |||
568 | return ((struct rtable *)skb->dst)->rt_iif; | 436 | return ((struct rtable *)skb->dst)->rt_iif; |
569 | } | 437 | } |
570 | 438 | ||
571 | static inline u32 inet_synq_hash(const u32 raddr, const u16 rport, | ||
572 | const u32 rnd, const u16 synq_hsize) | ||
573 | { | ||
574 | return jhash_2words(raddr, (u32)rport, rnd) & (synq_hsize - 1); | ||
575 | } | ||
576 | |||
577 | struct request_sock *inet_csk_search_req(const struct sock *sk, | ||
578 | struct request_sock ***prevp, | ||
579 | const __u16 rport, const __u32 raddr, | ||
580 | const __u32 laddr) | ||
581 | { | ||
582 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
583 | struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; | ||
584 | struct request_sock *req, **prev; | ||
585 | |||
586 | for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, | ||
587 | lopt->nr_table_entries)]; | ||
588 | (req = *prev) != NULL; | ||
589 | prev = &req->dl_next) { | ||
590 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
591 | |||
592 | if (ireq->rmt_port == rport && | ||
593 | ireq->rmt_addr == raddr && | ||
594 | ireq->loc_addr == laddr && | ||
595 | AF_INET_FAMILY(req->rsk_ops->family)) { | ||
596 | BUG_TRAP(!req->sk); | ||
597 | *prevp = prev; | ||
598 | break; | ||
599 | } | ||
600 | } | ||
601 | |||
602 | return req; | ||
603 | } | ||
604 | |||
605 | static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req) | ||
606 | { | ||
607 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
608 | struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; | ||
609 | const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, | ||
610 | lopt->hash_rnd, lopt->nr_table_entries); | ||
611 | |||
612 | reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT); | ||
613 | inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT); | ||
614 | } | ||
615 | |||
616 | |||
617 | /* | 439 | /* |
618 | * This routine does path mtu discovery as defined in RFC1191. | 440 | * This routine does path mtu discovery as defined in RFC1191. |
619 | */ | 441 | */ |
@@ -963,36 +785,6 @@ static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) | |||
963 | req->ts_recent); | 785 | req->ts_recent); |
964 | } | 786 | } |
965 | 787 | ||
966 | struct dst_entry* inet_csk_route_req(struct sock *sk, | ||
967 | const struct request_sock *req) | ||
968 | { | ||
969 | struct rtable *rt; | ||
970 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
971 | struct ip_options *opt = inet_rsk(req)->opt; | ||
972 | struct flowi fl = { .oif = sk->sk_bound_dev_if, | ||
973 | .nl_u = { .ip4_u = | ||
974 | { .daddr = ((opt && opt->srr) ? | ||
975 | opt->faddr : | ||
976 | ireq->rmt_addr), | ||
977 | .saddr = ireq->loc_addr, | ||
978 | .tos = RT_CONN_FLAGS(sk) } }, | ||
979 | .proto = sk->sk_protocol, | ||
980 | .uli_u = { .ports = | ||
981 | { .sport = inet_sk(sk)->sport, | ||
982 | .dport = ireq->rmt_port } } }; | ||
983 | |||
984 | if (ip_route_output_flow(&rt, &fl, sk, 0)) { | ||
985 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | ||
986 | return NULL; | ||
987 | } | ||
988 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { | ||
989 | ip_rt_put(rt); | ||
990 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | ||
991 | return NULL; | ||
992 | } | ||
993 | return &rt->u.dst; | ||
994 | } | ||
995 | |||
996 | /* | 788 | /* |
997 | * Send a SYN-ACK after having received an ACK. | 789 | * Send a SYN-ACK after having received an ACK. |
998 | * This still operates on a request_sock only, not on a big | 790 | * This still operates on a request_sock only, not on a big |
@@ -1222,7 +1014,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1222 | if (want_cookie) { | 1014 | if (want_cookie) { |
1223 | reqsk_free(req); | 1015 | reqsk_free(req); |
1224 | } else { | 1016 | } else { |
1225 | tcp_v4_synq_add(sk, req); | 1017 | inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); |
1226 | } | 1018 | } |
1227 | return 0; | 1019 | return 0; |
1228 | 1020 | ||
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6f0a7e30ceac..f458eacb5ef2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -1493,7 +1493,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
1493 | if (skb == | 1493 | if (skb == |
1494 | skb_peek(&sk->sk_write_queue)) | 1494 | skb_peek(&sk->sk_write_queue)) |
1495 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 1495 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
1496 | inet_csk(sk)->icsk_rto); | 1496 | inet_csk(sk)->icsk_rto, |
1497 | TCP_RTO_MAX); | ||
1497 | } | 1498 | } |
1498 | 1499 | ||
1499 | packet_cnt -= tcp_skb_pcount(skb); | 1500 | packet_cnt -= tcp_skb_pcount(skb); |
@@ -1546,7 +1547,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
1546 | break; | 1547 | break; |
1547 | 1548 | ||
1548 | if (skb == skb_peek(&sk->sk_write_queue)) | 1549 | if (skb == skb_peek(&sk->sk_write_queue)) |
1549 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); | 1550 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
1551 | inet_csk(sk)->icsk_rto, | ||
1552 | TCP_RTO_MAX); | ||
1550 | 1553 | ||
1551 | NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS); | 1554 | NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS); |
1552 | } | 1555 | } |
@@ -1826,7 +1829,8 @@ int tcp_connect(struct sock *sk) | |||
1826 | TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); | 1829 | TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); |
1827 | 1830 | ||
1828 | /* Timer for repeating the SYN until an answer. */ | 1831 | /* Timer for repeating the SYN until an answer. */ |
1829 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); | 1832 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
1833 | inet_csk(sk)->icsk_rto, TCP_RTO_MAX); | ||
1830 | return 0; | 1834 | return 0; |
1831 | } | 1835 | } |
1832 | 1836 | ||
@@ -1901,7 +1905,8 @@ void tcp_send_ack(struct sock *sk) | |||
1901 | if (buff == NULL) { | 1905 | if (buff == NULL) { |
1902 | inet_csk_schedule_ack(sk); | 1906 | inet_csk_schedule_ack(sk); |
1903 | inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; | 1907 | inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; |
1904 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX); | 1908 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, |
1909 | TCP_DELACK_MAX, TCP_RTO_MAX); | ||
1905 | return; | 1910 | return; |
1906 | } | 1911 | } |
1907 | 1912 | ||
@@ -2033,7 +2038,8 @@ void tcp_send_probe0(struct sock *sk) | |||
2033 | icsk->icsk_backoff++; | 2038 | icsk->icsk_backoff++; |
2034 | tp->probes_out++; | 2039 | tp->probes_out++; |
2035 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, | 2040 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, |
2036 | min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX)); | 2041 | min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX), |
2042 | TCP_RTO_MAX); | ||
2037 | } else { | 2043 | } else { |
2038 | /* If packet was not sent due to local congestion, | 2044 | /* If packet was not sent due to local congestion, |
2039 | * do not backoff and do not remember probes_out. | 2045 | * do not backoff and do not remember probes_out. |
@@ -2045,7 +2051,8 @@ void tcp_send_probe0(struct sock *sk) | |||
2045 | tp->probes_out=1; | 2051 | tp->probes_out=1; |
2046 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, | 2052 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, |
2047 | min(icsk->icsk_rto << icsk->icsk_backoff, | 2053 | min(icsk->icsk_rto << icsk->icsk_backoff, |
2048 | TCP_RESOURCE_PROBE_INTERVAL)); | 2054 | TCP_RESOURCE_PROBE_INTERVAL), |
2055 | TCP_RTO_MAX); | ||
2049 | } | 2056 | } |
2050 | } | 2057 | } |
2051 | 2058 | ||
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0b71380ee42f..c03930c48f42 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -36,55 +36,14 @@ static void tcp_write_timer(unsigned long); | |||
36 | static void tcp_delack_timer(unsigned long); | 36 | static void tcp_delack_timer(unsigned long); |
37 | static void tcp_keepalive_timer (unsigned long data); | 37 | static void tcp_keepalive_timer (unsigned long data); |
38 | 38 | ||
39 | #ifdef INET_CSK_DEBUG | ||
40 | const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; | ||
41 | EXPORT_SYMBOL(inet_csk_timer_bug_msg); | ||
42 | #endif | ||
43 | |||
44 | /* | ||
45 | * Using different timers for retransmit, delayed acks and probes | ||
46 | * We may wish use just one timer maintaining a list of expire jiffies | ||
47 | * to optimize. | ||
48 | */ | ||
49 | void inet_csk_init_xmit_timers(struct sock *sk, | ||
50 | void (*retransmit_handler)(unsigned long), | ||
51 | void (*delack_handler)(unsigned long), | ||
52 | void (*keepalive_handler)(unsigned long)) | ||
53 | { | ||
54 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
55 | |||
56 | init_timer(&icsk->icsk_retransmit_timer); | ||
57 | init_timer(&icsk->icsk_delack_timer); | ||
58 | init_timer(&sk->sk_timer); | ||
59 | |||
60 | icsk->icsk_retransmit_timer.function = retransmit_handler; | ||
61 | icsk->icsk_delack_timer.function = delack_handler; | ||
62 | sk->sk_timer.function = keepalive_handler; | ||
63 | |||
64 | icsk->icsk_retransmit_timer.data = | ||
65 | icsk->icsk_delack_timer.data = | ||
66 | sk->sk_timer.data = (unsigned long)sk; | ||
67 | |||
68 | icsk->icsk_pending = icsk->icsk_ack.pending = 0; | ||
69 | } | ||
70 | |||
71 | void inet_csk_clear_xmit_timers(struct sock *sk) | ||
72 | { | ||
73 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
74 | |||
75 | icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0; | ||
76 | |||
77 | sk_stop_timer(sk, &icsk->icsk_retransmit_timer); | ||
78 | sk_stop_timer(sk, &icsk->icsk_delack_timer); | ||
79 | sk_stop_timer(sk, &sk->sk_timer); | ||
80 | } | ||
81 | |||
82 | void tcp_init_xmit_timers(struct sock *sk) | 39 | void tcp_init_xmit_timers(struct sock *sk) |
83 | { | 40 | { |
84 | inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, | 41 | inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, |
85 | &tcp_keepalive_timer); | 42 | &tcp_keepalive_timer); |
86 | } | 43 | } |
87 | 44 | ||
45 | EXPORT_SYMBOL(tcp_init_xmit_timers); | ||
46 | |||
88 | static void tcp_write_err(struct sock *sk) | 47 | static void tcp_write_err(struct sock *sk) |
89 | { | 48 | { |
90 | sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; | 49 | sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; |
@@ -392,7 +351,8 @@ static void tcp_retransmit_timer(struct sock *sk) | |||
392 | if (!icsk->icsk_retransmits) | 351 | if (!icsk->icsk_retransmits) |
393 | icsk->icsk_retransmits = 1; | 352 | icsk->icsk_retransmits = 1; |
394 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 353 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
395 | min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL)); | 354 | min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL), |
355 | TCP_RTO_MAX); | ||
396 | goto out; | 356 | goto out; |
397 | } | 357 | } |
398 | 358 | ||
@@ -416,7 +376,7 @@ static void tcp_retransmit_timer(struct sock *sk) | |||
416 | 376 | ||
417 | out_reset_timer: | 377 | out_reset_timer: |
418 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); | 378 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); |
419 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto); | 379 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); |
420 | if (icsk->icsk_retransmits > sysctl_tcp_retries1) | 380 | if (icsk->icsk_retransmits > sysctl_tcp_retries1) |
421 | __sk_dst_reset(sk); | 381 | __sk_dst_reset(sk); |
422 | 382 | ||
@@ -553,16 +513,6 @@ static void tcp_synack_timer(struct sock *sk) | |||
553 | inet_csk_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL); | 513 | inet_csk_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL); |
554 | } | 514 | } |
555 | 515 | ||
556 | void inet_csk_delete_keepalive_timer(struct sock *sk) | ||
557 | { | ||
558 | sk_stop_timer(sk, &sk->sk_timer); | ||
559 | } | ||
560 | |||
561 | void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) | ||
562 | { | ||
563 | sk_reset_timer(sk, &sk->sk_timer, jiffies + len); | ||
564 | } | ||
565 | |||
566 | void tcp_set_keepalive(struct sock *sk, int val) | 516 | void tcp_set_keepalive(struct sock *sk, int val) |
567 | { | 517 | { |
568 | if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) | 518 | if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) |
@@ -653,8 +603,3 @@ out: | |||
653 | bh_unlock_sock(sk); | 603 | bh_unlock_sock(sk); |
654 | sock_put(sk); | 604 | sock_put(sk); |
655 | } | 605 | } |
656 | |||
657 | EXPORT_SYMBOL(inet_csk_clear_xmit_timers); | ||
658 | EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); | ||
659 | EXPORT_SYMBOL(tcp_init_xmit_timers); | ||
660 | EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); | ||