aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/ipv6.h7
-rw-r--r--include/net/inet_connection_sock.h152
-rw-r--r--include/net/tcp.h160
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/inet_connection_sock.c401
-rw-r--r--net/ipv4/tcp.c93
-rw-r--r--net/ipv4/tcp_input.c10
-rw-r--r--net/ipv4/tcp_ipv4.c210
-rw-r--r--net/ipv4/tcp_output.c19
-rw-r--r--net/ipv4/tcp_timer.c65
10 files changed, 588 insertions, 531 deletions
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 88591913c94f..777339b68464 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -333,15 +333,10 @@ static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk)
333 return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; 333 return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL;
334} 334}
335 335
336static inline int inet_twsk_ipv6only(const struct sock *sk)
337{
338 return inet_twsk(sk)->tw_ipv6only;
339}
340
341static inline int inet_v6_ipv6only(const struct sock *sk) 336static inline int inet_v6_ipv6only(const struct sock *sk)
342{ 337{
343 return likely(sk->sk_state != TCP_TIME_WAIT) ? 338 return likely(sk->sk_state != TCP_TIME_WAIT) ?
344 ipv6_only_sock(sk) : inet_twsk_ipv6only(sk); 339 ipv6_only_sock(sk) : inet_twsk(sk)->tw_ipv6only;
345} 340}
346#else 341#else
347#define __ipv6_only_sock(sk) 0 342#define __ipv6_only_sock(sk) 0
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index ef609396e41b..97e002001c1a 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -16,9 +16,15 @@
16#define _INET_CONNECTION_SOCK_H 16#define _INET_CONNECTION_SOCK_H
17 17
18#include <linux/ip.h> 18#include <linux/ip.h>
19#include <linux/string.h>
19#include <linux/timer.h> 20#include <linux/timer.h>
20#include <net/request_sock.h> 21#include <net/request_sock.h>
21 22
23#define INET_CSK_DEBUG 1
24
25/* Cancel timers, when they are not required. */
26#undef INET_CSK_CLEAR_TIMERS
27
22struct inet_bind_bucket; 28struct inet_bind_bucket;
23struct inet_hashinfo; 29struct inet_hashinfo;
24 30
@@ -61,17 +67,107 @@ struct inet_connection_sock {
61 } icsk_ack; 67 } icsk_ack;
62}; 68};
63 69
70#define ICSK_TIME_RETRANS 1 /* Retransmit timer */
71#define ICSK_TIME_DACK 2 /* Delayed ack timer */
72#define ICSK_TIME_PROBE0 3 /* Zero window probe timer */
73#define ICSK_TIME_KEEPOPEN 4 /* Keepalive timer */
74
64static inline struct inet_connection_sock *inet_csk(const struct sock *sk) 75static inline struct inet_connection_sock *inet_csk(const struct sock *sk)
65{ 76{
66 return (struct inet_connection_sock *)sk; 77 return (struct inet_connection_sock *)sk;
67} 78}
68 79
80enum inet_csk_ack_state_t {
81 ICSK_ACK_SCHED = 1,
82 ICSK_ACK_TIMER = 2,
83 ICSK_ACK_PUSHED = 4
84};
85
69extern void inet_csk_init_xmit_timers(struct sock *sk, 86extern void inet_csk_init_xmit_timers(struct sock *sk,
70 void (*retransmit_handler)(unsigned long), 87 void (*retransmit_handler)(unsigned long),
71 void (*delack_handler)(unsigned long), 88 void (*delack_handler)(unsigned long),
72 void (*keepalive_handler)(unsigned long)); 89 void (*keepalive_handler)(unsigned long));
73extern void inet_csk_clear_xmit_timers(struct sock *sk); 90extern void inet_csk_clear_xmit_timers(struct sock *sk);
74 91
92static inline void inet_csk_schedule_ack(struct sock *sk)
93{
94 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED;
95}
96
97static inline int inet_csk_ack_scheduled(const struct sock *sk)
98{
99 return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED;
100}
101
102static inline void inet_csk_delack_init(struct sock *sk)
103{
104 memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack));
105}
106
107extern void inet_csk_delete_keepalive_timer(struct sock *sk);
108extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout);
109
110#ifdef INET_CSK_DEBUG
111extern const char inet_csk_timer_bug_msg[];
112#endif
113
114static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
115{
116 struct inet_connection_sock *icsk = inet_csk(sk);
117
118 if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
119 icsk->icsk_pending = 0;
120#ifdef INET_CSK_CLEAR_TIMERS
121 sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
122#endif
123 } else if (what == ICSK_TIME_DACK) {
124 icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0;
125#ifdef INET_CSK_CLEAR_TIMERS
126 sk_stop_timer(sk, &icsk->icsk_delack_timer);
127#endif
128 }
129#ifdef INET_CSK_DEBUG
130 else {
131 pr_debug(inet_csk_timer_bug_msg);
132 }
133#endif
134}
135
136/*
137 * Reset the retransmission timer
138 */
139static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
140 unsigned long when,
141 const unsigned long max_when)
142{
143 struct inet_connection_sock *icsk = inet_csk(sk);
144
145 if (when > max_when) {
146#ifdef INET_CSK_DEBUG
147 pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n",
148 sk, what, when, current_text_addr());
149#endif
150 when = max_when;
151 }
152
153 if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
154 icsk->icsk_pending = what;
155 icsk->icsk_timeout = jiffies + when;
156 sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
157 } else if (what == ICSK_TIME_DACK) {
158 icsk->icsk_ack.pending |= ICSK_ACK_TIMER;
159 icsk->icsk_ack.timeout = jiffies + when;
160 sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
161 }
162#ifdef INET_CSK_DEBUG
163 else {
164 pr_debug(inet_csk_timer_bug_msg);
165 }
166#endif
167}
168
169extern struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
170
75extern struct request_sock *inet_csk_search_req(const struct sock *sk, 171extern struct request_sock *inet_csk_search_req(const struct sock *sk,
76 struct request_sock ***prevp, 172 struct request_sock ***prevp,
77 const __u16 rport, 173 const __u16 rport,
@@ -83,4 +179,60 @@ extern int inet_csk_get_port(struct inet_hashinfo *hashinfo,
83extern struct dst_entry* inet_csk_route_req(struct sock *sk, 179extern struct dst_entry* inet_csk_route_req(struct sock *sk,
84 const struct request_sock *req); 180 const struct request_sock *req);
85 181
182static inline void inet_csk_reqsk_queue_add(struct sock *sk,
183 struct request_sock *req,
184 struct sock *child)
185{
186 reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child);
187}
188
189extern void inet_csk_reqsk_queue_hash_add(struct sock *sk,
190 struct request_sock *req,
191 const unsigned timeout);
192
193static inline void inet_csk_reqsk_queue_removed(struct sock *sk,
194 struct request_sock *req)
195{
196 if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0)
197 inet_csk_delete_keepalive_timer(sk);
198}
199
200static inline void inet_csk_reqsk_queue_added(struct sock *sk,
201 const unsigned long timeout)
202{
203 if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0)
204 inet_csk_reset_keepalive_timer(sk, timeout);
205}
206
207static inline int inet_csk_reqsk_queue_len(const struct sock *sk)
208{
209 return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue);
210}
211
212static inline int inet_csk_reqsk_queue_young(const struct sock *sk)
213{
214 return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue);
215}
216
217static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
218{
219 return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue);
220}
221
222static inline void inet_csk_reqsk_queue_unlink(struct sock *sk,
223 struct request_sock *req,
224 struct request_sock **prev)
225{
226 reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev);
227}
228
229static inline void inet_csk_reqsk_queue_drop(struct sock *sk,
230 struct request_sock *req,
231 struct request_sock **prev)
232{
233 inet_csk_reqsk_queue_unlink(sk, req, prev);
234 inet_csk_reqsk_queue_removed(sk, req);
235 reqsk_free(req);
236}
237
86#endif /* _INET_CONNECTION_SOCK_H */ 238#endif /* _INET_CONNECTION_SOCK_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a943c79c88b0..dd9a5a288f88 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -19,18 +19,16 @@
19#define _TCP_H 19#define _TCP_H
20 20
21#define TCP_DEBUG 1 21#define TCP_DEBUG 1
22#define INET_CSK_DEBUG 1
23#define FASTRETRANS_DEBUG 1 22#define FASTRETRANS_DEBUG 1
24 23
25/* Cancel timers, when they are not required. */
26#undef INET_CSK_CLEAR_TIMERS
27
28#include <linux/config.h> 24#include <linux/config.h>
29#include <linux/list.h> 25#include <linux/list.h>
30#include <linux/tcp.h> 26#include <linux/tcp.h>
31#include <linux/slab.h> 27#include <linux/slab.h>
32#include <linux/cache.h> 28#include <linux/cache.h>
33#include <linux/percpu.h> 29#include <linux/percpu.h>
30
31#include <net/inet_connection_sock.h>
34#include <net/inet_hashtables.h> 32#include <net/inet_hashtables.h>
35#include <net/checksum.h> 33#include <net/checksum.h>
36#include <net/request_sock.h> 34#include <net/request_sock.h>
@@ -206,11 +204,6 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw);
206#define TCPOLEN_SACK_BASE_ALIGNED 4 204#define TCPOLEN_SACK_BASE_ALIGNED 4
207#define TCPOLEN_SACK_PERBLOCK 8 205#define TCPOLEN_SACK_PERBLOCK 8
208 206
209#define ICSK_TIME_RETRANS 1 /* Retransmit timer */
210#define ICSK_TIME_DACK 2 /* Delayed ack timer */
211#define ICSK_TIME_PROBE0 3 /* Zero window probe timer */
212#define ICSK_TIME_KEEPOPEN 4 /* Keepalive timer */
213
214/* Flags in tp->nonagle */ 207/* Flags in tp->nonagle */
215#define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ 208#define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */
216#define TCP_NAGLE_CORK 2 /* Socket is corked */ 209#define TCP_NAGLE_CORK 2 /* Socket is corked */
@@ -257,12 +250,6 @@ extern atomic_t tcp_memory_allocated;
257extern atomic_t tcp_sockets_allocated; 250extern atomic_t tcp_sockets_allocated;
258extern int tcp_memory_pressure; 251extern int tcp_memory_pressure;
259 252
260#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
261#define AF_INET_FAMILY(fam) ((fam) == AF_INET)
262#else
263#define AF_INET_FAMILY(fam) 1
264#endif
265
266/* 253/*
267 * Pointers to address related TCP functions 254 * Pointers to address related TCP functions
268 * (i.e. things that depend on the address family) 255 * (i.e. things that depend on the address family)
@@ -373,22 +360,6 @@ extern int tcp_rcv_established(struct sock *sk,
373 360
374extern void tcp_rcv_space_adjust(struct sock *sk); 361extern void tcp_rcv_space_adjust(struct sock *sk);
375 362
376enum inet_csk_ack_state_t {
377 ICSK_ACK_SCHED = 1,
378 ICSK_ACK_TIMER = 2,
379 ICSK_ACK_PUSHED = 4
380};
381
382static inline void inet_csk_schedule_ack(struct sock *sk)
383{
384 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED;
385}
386
387static inline int inet_csk_ack_scheduled(const struct sock *sk)
388{
389 return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED;
390}
391
392static inline void tcp_dec_quickack_mode(struct sock *sk, 363static inline void tcp_dec_quickack_mode(struct sock *sk,
393 const unsigned int pkts) 364 const unsigned int pkts)
394{ 365{
@@ -406,11 +377,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk,
406 377
407extern void tcp_enter_quickack_mode(struct sock *sk); 378extern void tcp_enter_quickack_mode(struct sock *sk);
408 379
409static inline void inet_csk_delack_init(struct sock *sk)
410{
411 memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack));
412}
413
414static inline void tcp_clear_options(struct tcp_options_received *rx_opt) 380static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
415{ 381{
416 rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0; 382 rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
@@ -442,7 +408,6 @@ extern void tcp_update_metrics(struct sock *sk);
442 408
443extern void tcp_close(struct sock *sk, 409extern void tcp_close(struct sock *sk,
444 long timeout); 410 long timeout);
445extern struct sock * inet_csk_accept(struct sock *sk, int flags, int *err);
446extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); 411extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait);
447 412
448extern int tcp_getsockopt(struct sock *sk, int level, 413extern int tcp_getsockopt(struct sock *sk, int level,
@@ -541,15 +506,9 @@ static inline void tcp_clear_xmit_timers(struct sock *sk)
541 inet_csk_clear_xmit_timers(sk); 506 inet_csk_clear_xmit_timers(sk);
542} 507}
543 508
544extern void inet_csk_delete_keepalive_timer(struct sock *sk);
545extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout);
546extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); 509extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
547extern unsigned int tcp_current_mss(struct sock *sk, int large); 510extern unsigned int tcp_current_mss(struct sock *sk, int large);
548 511
549#ifdef INET_CSK_DEBUG
550extern const char inet_csk_timer_bug_msg[];
551#endif
552
553/* tcp_diag.c */ 512/* tcp_diag.c */
554extern void tcp_get_info(struct sock *, struct tcp_info *); 513extern void tcp_get_info(struct sock *, struct tcp_info *);
555 514
@@ -559,60 +518,6 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
559extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, 518extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
560 sk_read_actor_t recv_actor); 519 sk_read_actor_t recv_actor);
561 520
562static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
563{
564 struct inet_connection_sock *icsk = inet_csk(sk);
565
566 if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
567 icsk->icsk_pending = 0;
568#ifdef INET_CSK_CLEAR_TIMERS
569 sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
570#endif
571 } else if (what == ICSK_TIME_DACK) {
572 icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0;
573#ifdef INET_CSK_CLEAR_TIMERS
574 sk_stop_timer(sk, &icsk->icsk_delack_timer);
575#endif
576 }
577#ifdef INET_CSK_DEBUG
578 else {
579 pr_debug(inet_csk_timer_bug_msg);
580 }
581#endif
582}
583
584/*
585 * Reset the retransmission timer
586 */
587static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
588 unsigned long when)
589{
590 struct inet_connection_sock *icsk = inet_csk(sk);
591
592 if (when > TCP_RTO_MAX) {
593#ifdef INET_CSK_DEBUG
594 pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n",
595 sk, what, when, current_text_addr());
596#endif
597 when = TCP_RTO_MAX;
598 }
599
600 if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
601 icsk->icsk_pending = what;
602 icsk->icsk_timeout = jiffies + when;
603 sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
604 } else if (what == ICSK_TIME_DACK) {
605 icsk->icsk_ack.pending |= ICSK_ACK_TIMER;
606 icsk->icsk_ack.timeout = jiffies + when;
607 sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
608 }
609#ifdef INET_CSK_DEBUG
610 else {
611 pr_debug(inet_csk_timer_bug_msg);
612 }
613#endif
614}
615
616/* Initialize RCV_MSS value. 521/* Initialize RCV_MSS value.
617 * RCV_MSS is an our guess about MSS used by the peer. 522 * RCV_MSS is an our guess about MSS used by the peer.
618 * We haven't any direct information about the MSS. 523 * We haven't any direct information about the MSS.
@@ -765,7 +670,8 @@ static inline void tcp_packets_out_inc(struct sock *sk,
765 670
766 tp->packets_out += tcp_skb_pcount(skb); 671 tp->packets_out += tcp_skb_pcount(skb);
767 if (!orig) 672 if (!orig)
768 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); 673 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
674 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
769} 675}
770 676
771static inline void tcp_packets_out_dec(struct tcp_sock *tp, 677static inline void tcp_packets_out_dec(struct tcp_sock *tp,
@@ -934,7 +840,8 @@ static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *t
934{ 840{
935 const struct inet_connection_sock *icsk = inet_csk(sk); 841 const struct inet_connection_sock *icsk = inet_csk(sk);
936 if (!tp->packets_out && !icsk->icsk_pending) 842 if (!tp->packets_out && !icsk->icsk_pending)
937 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, icsk->icsk_rto); 843 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
844 icsk->icsk_rto, TCP_RTO_MAX);
938} 845}
939 846
940static __inline__ void tcp_push_pending_frames(struct sock *sk, 847static __inline__ void tcp_push_pending_frames(struct sock *sk,
@@ -1017,7 +924,8 @@ static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1017 wake_up_interruptible(sk->sk_sleep); 924 wake_up_interruptible(sk->sk_sleep);
1018 if (!inet_csk_ack_scheduled(sk)) 925 if (!inet_csk_ack_scheduled(sk))
1019 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 926 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1020 (3 * TCP_RTO_MIN) / 4); 927 (3 * TCP_RTO_MIN) / 4,
928 TCP_RTO_MAX);
1021 } 929 }
1022 return 1; 930 return 1;
1023 } 931 }
@@ -1181,58 +1089,6 @@ static inline int tcp_full_space(const struct sock *sk)
1181 return tcp_win_from_space(sk->sk_rcvbuf); 1089 return tcp_win_from_space(sk->sk_rcvbuf);
1182} 1090}
1183 1091
1184static inline void inet_csk_reqsk_queue_add(struct sock *sk,
1185 struct request_sock *req,
1186 struct sock *child)
1187{
1188 reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child);
1189}
1190
1191static inline void inet_csk_reqsk_queue_removed(struct sock *sk,
1192 struct request_sock *req)
1193{
1194 if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0)
1195 inet_csk_delete_keepalive_timer(sk);
1196}
1197
1198static inline void inet_csk_reqsk_queue_added(struct sock *sk,
1199 const unsigned long timeout)
1200{
1201 if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0)
1202 inet_csk_reset_keepalive_timer(sk, timeout);
1203}
1204
1205static inline int inet_csk_reqsk_queue_len(const struct sock *sk)
1206{
1207 return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue);
1208}
1209
1210static inline int inet_csk_reqsk_queue_young(const struct sock *sk)
1211{
1212 return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue);
1213}
1214
1215static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
1216{
1217 return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue);
1218}
1219
1220static inline void inet_csk_reqsk_queue_unlink(struct sock *sk,
1221 struct request_sock *req,
1222 struct request_sock **prev)
1223{
1224 reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev);
1225}
1226
1227static inline void inet_csk_reqsk_queue_drop(struct sock *sk,
1228 struct request_sock *req,
1229 struct request_sock **prev)
1230{
1231 inet_csk_reqsk_queue_unlink(sk, req, prev);
1232 inet_csk_reqsk_queue_removed(sk, req);
1233 reqsk_free(req);
1234}
1235
1236static __inline__ void tcp_openreq_init(struct request_sock *req, 1092static __inline__ void tcp_openreq_init(struct request_sock *req,
1237 struct tcp_options_received *rx_opt, 1093 struct tcp_options_received *rx_opt,
1238 struct sk_buff *skb) 1094 struct sk_buff *skb)
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 6650d18e400f..ea0e1d87dc7e 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -5,7 +5,7 @@
5obj-y := route.o inetpeer.o protocol.o \ 5obj-y := route.o inetpeer.o protocol.o \
6 ip_input.o ip_fragment.o ip_forward.o ip_options.o \ 6 ip_input.o ip_fragment.o ip_forward.o ip_options.o \
7 ip_output.o ip_sockglue.o inet_hashtables.o \ 7 ip_output.o ip_sockglue.o inet_hashtables.o \
8 inet_timewait_sock.o \ 8 inet_timewait_sock.o inet_connection_sock.o \
9 tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ 9 tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
10 tcp_minisocks.o tcp_cong.o \ 10 tcp_minisocks.o tcp_cong.o \
11 datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ 11 datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
new file mode 100644
index 000000000000..2712400a8bb8
--- /dev/null
+++ b/net/ipv4/inet_connection_sock.c
@@ -0,0 +1,401 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Support for INET connection oriented protocols.
7 *
8 * Authors: See the TCP sources
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or(at your option) any later version.
14 */
15
16#include <linux/config.h>
17#include <linux/module.h>
18#include <linux/jhash.h>
19
20#include <net/inet_connection_sock.h>
21#include <net/inet_hashtables.h>
22#include <net/inet_timewait_sock.h>
23#include <net/ip.h>
24#include <net/route.h>
25#include <net/tcp_states.h>
26
27#ifdef INET_CSK_DEBUG
28const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
29EXPORT_SYMBOL(inet_csk_timer_bug_msg);
30#endif
31
32/*
33 * This array holds the first and last local port number.
34 * For high-usage systems, use sysctl to change this to
35 * 32768-61000
36 */
37int sysctl_local_port_range[2] = { 1024, 4999 };
38
39static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb)
40{
41 const u32 sk_rcv_saddr = inet_rcv_saddr(sk);
42 struct sock *sk2;
43 struct hlist_node *node;
44 int reuse = sk->sk_reuse;
45
46 sk_for_each_bound(sk2, node, &tb->owners) {
47 if (sk != sk2 &&
48 !inet_v6_ipv6only(sk2) &&
49 (!sk->sk_bound_dev_if ||
50 !sk2->sk_bound_dev_if ||
51 sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
52 if (!reuse || !sk2->sk_reuse ||
53 sk2->sk_state == TCP_LISTEN) {
54 const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
55 if (!sk2_rcv_saddr || !sk_rcv_saddr ||
56 sk2_rcv_saddr == sk_rcv_saddr)
57 break;
58 }
59 }
60 }
61 return node != NULL;
62}
63
64/* Obtain a reference to a local port for the given sock,
65 * if snum is zero it means select any available local port.
66 */
67int inet_csk_get_port(struct inet_hashinfo *hashinfo,
68 struct sock *sk, unsigned short snum)
69{
70 struct inet_bind_hashbucket *head;
71 struct hlist_node *node;
72 struct inet_bind_bucket *tb;
73 int ret;
74
75 local_bh_disable();
76 if (!snum) {
77 int low = sysctl_local_port_range[0];
78 int high = sysctl_local_port_range[1];
79 int remaining = (high - low) + 1;
80 int rover;
81
82 spin_lock(&hashinfo->portalloc_lock);
83 if (hashinfo->port_rover < low)
84 rover = low;
85 else
86 rover = hashinfo->port_rover;
87 do {
88 rover++;
89 if (rover > high)
90 rover = low;
91 head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
92 spin_lock(&head->lock);
93 inet_bind_bucket_for_each(tb, node, &head->chain)
94 if (tb->port == rover)
95 goto next;
96 break;
97 next:
98 spin_unlock(&head->lock);
99 } while (--remaining > 0);
100 hashinfo->port_rover = rover;
101 spin_unlock(&hashinfo->portalloc_lock);
102
103 /* Exhausted local port range during search? It is not
104 * possible for us to be holding one of the bind hash
105 * locks if this test triggers, because if 'remaining'
106 * drops to zero, we broke out of the do/while loop at
107 * the top level, not from the 'break;' statement.
108 */
109 ret = 1;
110 if (remaining <= 0)
111 goto fail;
112
113 /* OK, here is the one we will use. HEAD is
114 * non-NULL and we hold it's mutex.
115 */
116 snum = rover;
117 } else {
118 head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)];
119 spin_lock(&head->lock);
120 inet_bind_bucket_for_each(tb, node, &head->chain)
121 if (tb->port == snum)
122 goto tb_found;
123 }
124 tb = NULL;
125 goto tb_not_found;
126tb_found:
127 if (!hlist_empty(&tb->owners)) {
128 if (sk->sk_reuse > 1)
129 goto success;
130 if (tb->fastreuse > 0 &&
131 sk->sk_reuse && sk->sk_state != TCP_LISTEN) {
132 goto success;
133 } else {
134 ret = 1;
135 if (inet_csk_bind_conflict(sk, tb))
136 goto fail_unlock;
137 }
138 }
139tb_not_found:
140 ret = 1;
141 if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL)
142 goto fail_unlock;
143 if (hlist_empty(&tb->owners)) {
144 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
145 tb->fastreuse = 1;
146 else
147 tb->fastreuse = 0;
148 } else if (tb->fastreuse &&
149 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
150 tb->fastreuse = 0;
151success:
152 if (!inet_csk(sk)->icsk_bind_hash)
153 inet_bind_hash(sk, tb, snum);
154 BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
155 ret = 0;
156
157fail_unlock:
158 spin_unlock(&head->lock);
159fail:
160 local_bh_enable();
161 return ret;
162}
163
164EXPORT_SYMBOL_GPL(inet_csk_get_port);
165
166/*
167 * Wait for an incoming connection, avoid race conditions. This must be called
168 * with the socket locked.
169 */
170static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
171{
172 struct inet_connection_sock *icsk = inet_csk(sk);
173 DEFINE_WAIT(wait);
174 int err;
175
176 /*
177 * True wake-one mechanism for incoming connections: only
178 * one process gets woken up, not the 'whole herd'.
179 * Since we do not 'race & poll' for established sockets
180 * anymore, the common case will execute the loop only once.
181 *
182 * Subtle issue: "add_wait_queue_exclusive()" will be added
183 * after any current non-exclusive waiters, and we know that
184 * it will always _stay_ after any new non-exclusive waiters
185 * because all non-exclusive waiters are added at the
186 * beginning of the wait-queue. As such, it's ok to "drop"
187 * our exclusiveness temporarily when we get woken up without
188 * having to remove and re-insert us on the wait queue.
189 */
190 for (;;) {
191 prepare_to_wait_exclusive(sk->sk_sleep, &wait,
192 TASK_INTERRUPTIBLE);
193 release_sock(sk);
194 if (reqsk_queue_empty(&icsk->icsk_accept_queue))
195 timeo = schedule_timeout(timeo);
196 lock_sock(sk);
197 err = 0;
198 if (!reqsk_queue_empty(&icsk->icsk_accept_queue))
199 break;
200 err = -EINVAL;
201 if (sk->sk_state != TCP_LISTEN)
202 break;
203 err = sock_intr_errno(timeo);
204 if (signal_pending(current))
205 break;
206 err = -EAGAIN;
207 if (!timeo)
208 break;
209 }
210 finish_wait(sk->sk_sleep, &wait);
211 return err;
212}
213
214/*
215 * This will accept the next outstanding connection.
216 */
217struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
218{
219 struct inet_connection_sock *icsk = inet_csk(sk);
220 struct sock *newsk;
221 int error;
222
223 lock_sock(sk);
224
225 /* We need to make sure that this socket is listening,
226 * and that it has something pending.
227 */
228 error = -EINVAL;
229 if (sk->sk_state != TCP_LISTEN)
230 goto out_err;
231
232 /* Find already established connection */
233 if (reqsk_queue_empty(&icsk->icsk_accept_queue)) {
234 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
235
236 /* If this is a non blocking socket don't sleep */
237 error = -EAGAIN;
238 if (!timeo)
239 goto out_err;
240
241 error = inet_csk_wait_for_connect(sk, timeo);
242 if (error)
243 goto out_err;
244 }
245
246 newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk);
247 BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
248out:
249 release_sock(sk);
250 return newsk;
251out_err:
252 newsk = NULL;
253 *err = error;
254 goto out;
255}
256
257EXPORT_SYMBOL(inet_csk_accept);
258
259/*
260 * Using different timers for retransmit, delayed acks and probes
261 * We may wish use just one timer maintaining a list of expire jiffies
262 * to optimize.
263 */
264void inet_csk_init_xmit_timers(struct sock *sk,
265 void (*retransmit_handler)(unsigned long),
266 void (*delack_handler)(unsigned long),
267 void (*keepalive_handler)(unsigned long))
268{
269 struct inet_connection_sock *icsk = inet_csk(sk);
270
271 init_timer(&icsk->icsk_retransmit_timer);
272 init_timer(&icsk->icsk_delack_timer);
273 init_timer(&sk->sk_timer);
274
275 icsk->icsk_retransmit_timer.function = retransmit_handler;
276 icsk->icsk_delack_timer.function = delack_handler;
277 sk->sk_timer.function = keepalive_handler;
278
279 icsk->icsk_retransmit_timer.data =
280 icsk->icsk_delack_timer.data =
281 sk->sk_timer.data = (unsigned long)sk;
282
283 icsk->icsk_pending = icsk->icsk_ack.pending = 0;
284}
285
286EXPORT_SYMBOL(inet_csk_init_xmit_timers);
287
288void inet_csk_clear_xmit_timers(struct sock *sk)
289{
290 struct inet_connection_sock *icsk = inet_csk(sk);
291
292 icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0;
293
294 sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
295 sk_stop_timer(sk, &icsk->icsk_delack_timer);
296 sk_stop_timer(sk, &sk->sk_timer);
297}
298
299EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
300
301void inet_csk_delete_keepalive_timer(struct sock *sk)
302{
303 sk_stop_timer(sk, &sk->sk_timer);
304}
305
306EXPORT_SYMBOL(inet_csk_delete_keepalive_timer);
307
308void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
309{
310 sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
311}
312
313EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
314
315struct dst_entry* inet_csk_route_req(struct sock *sk,
316 const struct request_sock *req)
317{
318 struct rtable *rt;
319 const struct inet_request_sock *ireq = inet_rsk(req);
320 struct ip_options *opt = inet_rsk(req)->opt;
321 struct flowi fl = { .oif = sk->sk_bound_dev_if,
322 .nl_u = { .ip4_u =
323 { .daddr = ((opt && opt->srr) ?
324 opt->faddr :
325 ireq->rmt_addr),
326 .saddr = ireq->loc_addr,
327 .tos = RT_CONN_FLAGS(sk) } },
328 .proto = sk->sk_protocol,
329 .uli_u = { .ports =
330 { .sport = inet_sk(sk)->sport,
331 .dport = ireq->rmt_port } } };
332
333 if (ip_route_output_flow(&rt, &fl, sk, 0)) {
334 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
335 return NULL;
336 }
337 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
338 ip_rt_put(rt);
339 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
340 return NULL;
341 }
342 return &rt->u.dst;
343}
344
345EXPORT_SYMBOL_GPL(inet_csk_route_req);
346
347static inline u32 inet_synq_hash(const u32 raddr, const u16 rport,
348 const u32 rnd, const u16 synq_hsize)
349{
350 return jhash_2words(raddr, (u32)rport, rnd) & (synq_hsize - 1);
351}
352
353#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
354#define AF_INET_FAMILY(fam) ((fam) == AF_INET)
355#else
356#define AF_INET_FAMILY(fam) 1
357#endif
358
359struct request_sock *inet_csk_search_req(const struct sock *sk,
360 struct request_sock ***prevp,
361 const __u16 rport, const __u32 raddr,
362 const __u32 laddr)
363{
364 const struct inet_connection_sock *icsk = inet_csk(sk);
365 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
366 struct request_sock *req, **prev;
367
368 for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd,
369 lopt->nr_table_entries)];
370 (req = *prev) != NULL;
371 prev = &req->dl_next) {
372 const struct inet_request_sock *ireq = inet_rsk(req);
373
374 if (ireq->rmt_port == rport &&
375 ireq->rmt_addr == raddr &&
376 ireq->loc_addr == laddr &&
377 AF_INET_FAMILY(req->rsk_ops->family)) {
378 BUG_TRAP(!req->sk);
379 *prevp = prev;
380 break;
381 }
382 }
383
384 return req;
385}
386
387EXPORT_SYMBOL_GPL(inet_csk_search_req);
388
389void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
390 const unsigned timeout)
391{
392 struct inet_connection_sock *icsk = inet_csk(sk);
393 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
394 const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port,
395 lopt->hash_rnd, lopt->nr_table_entries);
396
397 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
398 inet_csk_reqsk_queue_added(sk, timeout);
399}
400
401EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8177b86570db..581016a6a93f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1805,98 +1805,6 @@ int tcp_disconnect(struct sock *sk, int flags)
1805} 1805}
1806 1806
1807/* 1807/*
1808 * Wait for an incoming connection, avoid race
1809 * conditions. This must be called with the socket locked.
1810 */
1811static int wait_for_connect(struct sock *sk, long timeo)
1812{
1813 struct inet_connection_sock *icsk = inet_csk(sk);
1814 DEFINE_WAIT(wait);
1815 int err;
1816
1817 /*
1818 * True wake-one mechanism for incoming connections: only
1819 * one process gets woken up, not the 'whole herd'.
1820 * Since we do not 'race & poll' for established sockets
1821 * anymore, the common case will execute the loop only once.
1822 *
1823 * Subtle issue: "add_wait_queue_exclusive()" will be added
1824 * after any current non-exclusive waiters, and we know that
1825 * it will always _stay_ after any new non-exclusive waiters
1826 * because all non-exclusive waiters are added at the
1827 * beginning of the wait-queue. As such, it's ok to "drop"
1828 * our exclusiveness temporarily when we get woken up without
1829 * having to remove and re-insert us on the wait queue.
1830 */
1831 for (;;) {
1832 prepare_to_wait_exclusive(sk->sk_sleep, &wait,
1833 TASK_INTERRUPTIBLE);
1834 release_sock(sk);
1835 if (reqsk_queue_empty(&icsk->icsk_accept_queue))
1836 timeo = schedule_timeout(timeo);
1837 lock_sock(sk);
1838 err = 0;
1839 if (!reqsk_queue_empty(&icsk->icsk_accept_queue))
1840 break;
1841 err = -EINVAL;
1842 if (sk->sk_state != TCP_LISTEN)
1843 break;
1844 err = sock_intr_errno(timeo);
1845 if (signal_pending(current))
1846 break;
1847 err = -EAGAIN;
1848 if (!timeo)
1849 break;
1850 }
1851 finish_wait(sk->sk_sleep, &wait);
1852 return err;
1853}
1854
1855/*
1856 * This will accept the next outstanding connection.
1857 */
1858
1859struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
1860{
1861 struct inet_connection_sock *icsk = inet_csk(sk);
1862 struct sock *newsk;
1863 int error;
1864
1865 lock_sock(sk);
1866
1867 /* We need to make sure that this socket is listening,
1868 * and that it has something pending.
1869 */
1870 error = -EINVAL;
1871 if (sk->sk_state != TCP_LISTEN)
1872 goto out_err;
1873
1874 /* Find already established connection */
1875 if (reqsk_queue_empty(&icsk->icsk_accept_queue)) {
1876 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
1877
1878 /* If this is a non blocking socket don't sleep */
1879 error = -EAGAIN;
1880 if (!timeo)
1881 goto out_err;
1882
1883 error = wait_for_connect(sk, timeo);
1884 if (error)
1885 goto out_err;
1886 }
1887
1888 newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk);
1889 BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
1890out:
1891 release_sock(sk);
1892 return newsk;
1893out_err:
1894 newsk = NULL;
1895 *err = error;
1896 goto out;
1897}
1898
1899/*
1900 * Socket option code for TCP. 1808 * Socket option code for TCP.
1901 */ 1809 */
1902int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, 1810int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
@@ -2344,7 +2252,6 @@ void __init tcp_init(void)
2344 tcp_register_congestion_control(&tcp_reno); 2252 tcp_register_congestion_control(&tcp_reno);
2345} 2253}
2346 2254
2347EXPORT_SYMBOL(inet_csk_accept);
2348EXPORT_SYMBOL(tcp_close); 2255EXPORT_SYMBOL(tcp_close);
2349EXPORT_SYMBOL(tcp_destroy_sock); 2256EXPORT_SYMBOL(tcp_destroy_sock);
2350EXPORT_SYMBOL(tcp_disconnect); 2257EXPORT_SYMBOL(tcp_disconnect);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8a8c5c2d90cb..b35badf53aa5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1278,7 +1278,7 @@ static int tcp_check_sack_reneging(struct sock *sk)
1278 inet_csk(sk)->icsk_retransmits++; 1278 inet_csk(sk)->icsk_retransmits++;
1279 tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)); 1279 tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
1280 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 1280 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
1281 inet_csk(sk)->icsk_rto); 1281 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
1282 return 1; 1282 return 1;
1283 } 1283 }
1284 return 0; 1284 return 0;
@@ -1961,7 +1961,7 @@ static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
1961 if (!tp->packets_out) { 1961 if (!tp->packets_out) {
1962 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); 1962 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
1963 } else { 1963 } else {
1964 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); 1964 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
1965 } 1965 }
1966} 1966}
1967 1967
@@ -2147,7 +2147,8 @@ static void tcp_ack_probe(struct sock *sk)
2147 */ 2147 */
2148 } else { 2148 } else {
2149 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, 2149 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
2150 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX)); 2150 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
2151 TCP_RTO_MAX);
2151 } 2152 }
2152} 2153}
2153 2154
@@ -3968,7 +3969,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
3968 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; 3969 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
3969 tcp_incr_quickack(sk); 3970 tcp_incr_quickack(sk);
3970 tcp_enter_quickack_mode(sk); 3971 tcp_enter_quickack_mode(sk);
3971 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX); 3972 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
3973 TCP_DELACK_MAX, TCP_RTO_MAX);
3972 3974
3973discard: 3975discard:
3974 __kfree_skb(skb); 3976 __kfree_skb(skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2cd41265d17f..2f605b9e6b67 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -97,138 +97,6 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
97 .port_rover = 1024 - 1, 97 .port_rover = 1024 - 1,
98}; 98};
99 99
100/*
101 * This array holds the first and last local port number.
102 * For high-usage systems, use sysctl to change this to
103 * 32768-61000
104 */
105int sysctl_local_port_range[2] = { 1024, 4999 };
106
107static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb)
108{
109 const u32 sk_rcv_saddr = inet_rcv_saddr(sk);
110 struct sock *sk2;
111 struct hlist_node *node;
112 int reuse = sk->sk_reuse;
113
114 sk_for_each_bound(sk2, node, &tb->owners) {
115 if (sk != sk2 &&
116 !inet_v6_ipv6only(sk2) &&
117 (!sk->sk_bound_dev_if ||
118 !sk2->sk_bound_dev_if ||
119 sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
120 if (!reuse || !sk2->sk_reuse ||
121 sk2->sk_state == TCP_LISTEN) {
122 const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
123 if (!sk2_rcv_saddr || !sk_rcv_saddr ||
124 sk2_rcv_saddr == sk_rcv_saddr)
125 break;
126 }
127 }
128 }
129 return node != NULL;
130}
131
132/* Obtain a reference to a local port for the given sock,
133 * if snum is zero it means select any available local port.
134 */
135int inet_csk_get_port(struct inet_hashinfo *hashinfo,
136 struct sock *sk, unsigned short snum)
137{
138 struct inet_bind_hashbucket *head;
139 struct hlist_node *node;
140 struct inet_bind_bucket *tb;
141 int ret;
142
143 local_bh_disable();
144 if (!snum) {
145 int low = sysctl_local_port_range[0];
146 int high = sysctl_local_port_range[1];
147 int remaining = (high - low) + 1;
148 int rover;
149
150 spin_lock(&hashinfo->portalloc_lock);
151 if (hashinfo->port_rover < low)
152 rover = low;
153 else
154 rover = hashinfo->port_rover;
155 do {
156 rover++;
157 if (rover > high)
158 rover = low;
159 head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
160 spin_lock(&head->lock);
161 inet_bind_bucket_for_each(tb, node, &head->chain)
162 if (tb->port == rover)
163 goto next;
164 break;
165 next:
166 spin_unlock(&head->lock);
167 } while (--remaining > 0);
168 hashinfo->port_rover = rover;
169 spin_unlock(&hashinfo->portalloc_lock);
170
171 /* Exhausted local port range during search? It is not
172 * possible for us to be holding one of the bind hash
173 * locks if this test triggers, because if 'remaining'
174 * drops to zero, we broke out of the do/while loop at
175 * the top level, not from the 'break;' statement.
176 */
177 ret = 1;
178 if (unlikely(remaining <= 0))
179 goto fail;
180
181 /* OK, here is the one we will use. HEAD is
182 * non-NULL and we hold it's mutex.
183 */
184 snum = rover;
185 } else {
186 head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)];
187 spin_lock(&head->lock);
188 inet_bind_bucket_for_each(tb, node, &head->chain)
189 if (tb->port == snum)
190 goto tb_found;
191 }
192 tb = NULL;
193 goto tb_not_found;
194tb_found:
195 if (!hlist_empty(&tb->owners)) {
196 if (sk->sk_reuse > 1)
197 goto success;
198 if (tb->fastreuse > 0 &&
199 sk->sk_reuse && sk->sk_state != TCP_LISTEN) {
200 goto success;
201 } else {
202 ret = 1;
203 if (inet_csk_bind_conflict(sk, tb))
204 goto fail_unlock;
205 }
206 }
207tb_not_found:
208 ret = 1;
209 if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL)
210 goto fail_unlock;
211 if (hlist_empty(&tb->owners)) {
212 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
213 tb->fastreuse = 1;
214 else
215 tb->fastreuse = 0;
216 } else if (tb->fastreuse &&
217 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
218 tb->fastreuse = 0;
219success:
220 if (!inet_csk(sk)->icsk_bind_hash)
221 inet_bind_hash(sk, tb, snum);
222 BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
223 ret = 0;
224
225fail_unlock:
226 spin_unlock(&head->lock);
227fail:
228 local_bh_enable();
229 return ret;
230}
231
232static int tcp_v4_get_port(struct sock *sk, unsigned short snum) 100static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
233{ 101{
234 return inet_csk_get_port(&tcp_hashinfo, sk, snum); 102 return inet_csk_get_port(&tcp_hashinfo, sk, snum);
@@ -568,52 +436,6 @@ static inline int inet_iif(const struct sk_buff *skb)
568 return ((struct rtable *)skb->dst)->rt_iif; 436 return ((struct rtable *)skb->dst)->rt_iif;
569} 437}
570 438
571static inline u32 inet_synq_hash(const u32 raddr, const u16 rport,
572 const u32 rnd, const u16 synq_hsize)
573{
574 return jhash_2words(raddr, (u32)rport, rnd) & (synq_hsize - 1);
575}
576
577struct request_sock *inet_csk_search_req(const struct sock *sk,
578 struct request_sock ***prevp,
579 const __u16 rport, const __u32 raddr,
580 const __u32 laddr)
581{
582 const struct inet_connection_sock *icsk = inet_csk(sk);
583 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
584 struct request_sock *req, **prev;
585
586 for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd,
587 lopt->nr_table_entries)];
588 (req = *prev) != NULL;
589 prev = &req->dl_next) {
590 const struct inet_request_sock *ireq = inet_rsk(req);
591
592 if (ireq->rmt_port == rport &&
593 ireq->rmt_addr == raddr &&
594 ireq->loc_addr == laddr &&
595 AF_INET_FAMILY(req->rsk_ops->family)) {
596 BUG_TRAP(!req->sk);
597 *prevp = prev;
598 break;
599 }
600 }
601
602 return req;
603}
604
605static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req)
606{
607 struct inet_connection_sock *icsk = inet_csk(sk);
608 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
609 const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port,
610 lopt->hash_rnd, lopt->nr_table_entries);
611
612 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
613 inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
614}
615
616
617/* 439/*
618 * This routine does path mtu discovery as defined in RFC1191. 440 * This routine does path mtu discovery as defined in RFC1191.
619 */ 441 */
@@ -963,36 +785,6 @@ static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
963 req->ts_recent); 785 req->ts_recent);
964} 786}
965 787
966struct dst_entry* inet_csk_route_req(struct sock *sk,
967 const struct request_sock *req)
968{
969 struct rtable *rt;
970 const struct inet_request_sock *ireq = inet_rsk(req);
971 struct ip_options *opt = inet_rsk(req)->opt;
972 struct flowi fl = { .oif = sk->sk_bound_dev_if,
973 .nl_u = { .ip4_u =
974 { .daddr = ((opt && opt->srr) ?
975 opt->faddr :
976 ireq->rmt_addr),
977 .saddr = ireq->loc_addr,
978 .tos = RT_CONN_FLAGS(sk) } },
979 .proto = sk->sk_protocol,
980 .uli_u = { .ports =
981 { .sport = inet_sk(sk)->sport,
982 .dport = ireq->rmt_port } } };
983
984 if (ip_route_output_flow(&rt, &fl, sk, 0)) {
985 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
986 return NULL;
987 }
988 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
989 ip_rt_put(rt);
990 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
991 return NULL;
992 }
993 return &rt->u.dst;
994}
995
996/* 788/*
997 * Send a SYN-ACK after having received an ACK. 789 * Send a SYN-ACK after having received an ACK.
998 * This still operates on a request_sock only, not on a big 790 * This still operates on a request_sock only, not on a big
@@ -1222,7 +1014,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1222 if (want_cookie) { 1014 if (want_cookie) {
1223 reqsk_free(req); 1015 reqsk_free(req);
1224 } else { 1016 } else {
1225 tcp_v4_synq_add(sk, req); 1017 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1226 } 1018 }
1227 return 0; 1019 return 0;
1228 1020
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6f0a7e30ceac..f458eacb5ef2 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1493,7 +1493,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1493 if (skb == 1493 if (skb ==
1494 skb_peek(&sk->sk_write_queue)) 1494 skb_peek(&sk->sk_write_queue))
1495 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 1495 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
1496 inet_csk(sk)->icsk_rto); 1496 inet_csk(sk)->icsk_rto,
1497 TCP_RTO_MAX);
1497 } 1498 }
1498 1499
1499 packet_cnt -= tcp_skb_pcount(skb); 1500 packet_cnt -= tcp_skb_pcount(skb);
@@ -1546,7 +1547,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1546 break; 1547 break;
1547 1548
1548 if (skb == skb_peek(&sk->sk_write_queue)) 1549 if (skb == skb_peek(&sk->sk_write_queue))
1549 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); 1550 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
1551 inet_csk(sk)->icsk_rto,
1552 TCP_RTO_MAX);
1550 1553
1551 NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS); 1554 NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS);
1552 } 1555 }
@@ -1826,7 +1829,8 @@ int tcp_connect(struct sock *sk)
1826 TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); 1829 TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
1827 1830
1828 /* Timer for repeating the SYN until an answer. */ 1831 /* Timer for repeating the SYN until an answer. */
1829 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); 1832 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
1833 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
1830 return 0; 1834 return 0;
1831} 1835}
1832 1836
@@ -1901,7 +1905,8 @@ void tcp_send_ack(struct sock *sk)
1901 if (buff == NULL) { 1905 if (buff == NULL) {
1902 inet_csk_schedule_ack(sk); 1906 inet_csk_schedule_ack(sk);
1903 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; 1907 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
1904 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX); 1908 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1909 TCP_DELACK_MAX, TCP_RTO_MAX);
1905 return; 1910 return;
1906 } 1911 }
1907 1912
@@ -2033,7 +2038,8 @@ void tcp_send_probe0(struct sock *sk)
2033 icsk->icsk_backoff++; 2038 icsk->icsk_backoff++;
2034 tp->probes_out++; 2039 tp->probes_out++;
2035 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, 2040 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
2036 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX)); 2041 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
2042 TCP_RTO_MAX);
2037 } else { 2043 } else {
2038 /* If packet was not sent due to local congestion, 2044 /* If packet was not sent due to local congestion,
2039 * do not backoff and do not remember probes_out. 2045 * do not backoff and do not remember probes_out.
@@ -2045,7 +2051,8 @@ void tcp_send_probe0(struct sock *sk)
2045 tp->probes_out=1; 2051 tp->probes_out=1;
2046 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, 2052 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
2047 min(icsk->icsk_rto << icsk->icsk_backoff, 2053 min(icsk->icsk_rto << icsk->icsk_backoff,
2048 TCP_RESOURCE_PROBE_INTERVAL)); 2054 TCP_RESOURCE_PROBE_INTERVAL),
2055 TCP_RTO_MAX);
2049 } 2056 }
2050} 2057}
2051 2058
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 0b71380ee42f..c03930c48f42 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -36,55 +36,14 @@ static void tcp_write_timer(unsigned long);
36static void tcp_delack_timer(unsigned long); 36static void tcp_delack_timer(unsigned long);
37static void tcp_keepalive_timer (unsigned long data); 37static void tcp_keepalive_timer (unsigned long data);
38 38
39#ifdef INET_CSK_DEBUG
40const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
41EXPORT_SYMBOL(inet_csk_timer_bug_msg);
42#endif
43
44/*
45 * Using different timers for retransmit, delayed acks and probes
46 * We may wish use just one timer maintaining a list of expire jiffies
47 * to optimize.
48 */
49void inet_csk_init_xmit_timers(struct sock *sk,
50 void (*retransmit_handler)(unsigned long),
51 void (*delack_handler)(unsigned long),
52 void (*keepalive_handler)(unsigned long))
53{
54 struct inet_connection_sock *icsk = inet_csk(sk);
55
56 init_timer(&icsk->icsk_retransmit_timer);
57 init_timer(&icsk->icsk_delack_timer);
58 init_timer(&sk->sk_timer);
59
60 icsk->icsk_retransmit_timer.function = retransmit_handler;
61 icsk->icsk_delack_timer.function = delack_handler;
62 sk->sk_timer.function = keepalive_handler;
63
64 icsk->icsk_retransmit_timer.data =
65 icsk->icsk_delack_timer.data =
66 sk->sk_timer.data = (unsigned long)sk;
67
68 icsk->icsk_pending = icsk->icsk_ack.pending = 0;
69}
70
71void inet_csk_clear_xmit_timers(struct sock *sk)
72{
73 struct inet_connection_sock *icsk = inet_csk(sk);
74
75 icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0;
76
77 sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
78 sk_stop_timer(sk, &icsk->icsk_delack_timer);
79 sk_stop_timer(sk, &sk->sk_timer);
80}
81
82void tcp_init_xmit_timers(struct sock *sk) 39void tcp_init_xmit_timers(struct sock *sk)
83{ 40{
84 inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, 41 inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
85 &tcp_keepalive_timer); 42 &tcp_keepalive_timer);
86} 43}
87 44
45EXPORT_SYMBOL(tcp_init_xmit_timers);
46
88static void tcp_write_err(struct sock *sk) 47static void tcp_write_err(struct sock *sk)
89{ 48{
90 sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; 49 sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
@@ -392,7 +351,8 @@ static void tcp_retransmit_timer(struct sock *sk)
392 if (!icsk->icsk_retransmits) 351 if (!icsk->icsk_retransmits)
393 icsk->icsk_retransmits = 1; 352 icsk->icsk_retransmits = 1;
394 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 353 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
395 min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL)); 354 min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL),
355 TCP_RTO_MAX);
396 goto out; 356 goto out;
397 } 357 }
398 358
@@ -416,7 +376,7 @@ static void tcp_retransmit_timer(struct sock *sk)
416 376
417out_reset_timer: 377out_reset_timer:
418 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); 378 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
419 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto); 379 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
420 if (icsk->icsk_retransmits > sysctl_tcp_retries1) 380 if (icsk->icsk_retransmits > sysctl_tcp_retries1)
421 __sk_dst_reset(sk); 381 __sk_dst_reset(sk);
422 382
@@ -553,16 +513,6 @@ static void tcp_synack_timer(struct sock *sk)
553 inet_csk_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL); 513 inet_csk_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL);
554} 514}
555 515
556void inet_csk_delete_keepalive_timer(struct sock *sk)
557{
558 sk_stop_timer(sk, &sk->sk_timer);
559}
560
561void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
562{
563 sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
564}
565
566void tcp_set_keepalive(struct sock *sk, int val) 516void tcp_set_keepalive(struct sock *sk, int val)
567{ 517{
568 if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) 518 if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
@@ -653,8 +603,3 @@ out:
653 bh_unlock_sock(sk); 603 bh_unlock_sock(sk);
654 sock_put(sk); 604 sock_put(sk);
655} 605}
656
657EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
658EXPORT_SYMBOL(inet_csk_delete_keepalive_timer);
659EXPORT_SYMBOL(tcp_init_xmit_timers);
660EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);