aboutsummaryrefslogtreecommitdiffstats
path: root/include/net/tcp.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/tcp.h')
-rw-r--r--include/net/tcp.h723
1 files changed, 100 insertions, 623 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5010f0c5a56e..d6bcf1317a6a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -21,360 +21,29 @@
21#define TCP_DEBUG 1 21#define TCP_DEBUG 1
22#define FASTRETRANS_DEBUG 1 22#define FASTRETRANS_DEBUG 1
23 23
24/* Cancel timers, when they are not required. */
25#undef TCP_CLEAR_TIMERS
26
27#include <linux/config.h> 24#include <linux/config.h>
28#include <linux/list.h> 25#include <linux/list.h>
29#include <linux/tcp.h> 26#include <linux/tcp.h>
30#include <linux/slab.h> 27#include <linux/slab.h>
31#include <linux/cache.h> 28#include <linux/cache.h>
32#include <linux/percpu.h> 29#include <linux/percpu.h>
30
31#include <net/inet_connection_sock.h>
32#include <net/inet_timewait_sock.h>
33#include <net/inet_hashtables.h>
33#include <net/checksum.h> 34#include <net/checksum.h>
34#include <net/request_sock.h> 35#include <net/request_sock.h>
35#include <net/sock.h> 36#include <net/sock.h>
36#include <net/snmp.h> 37#include <net/snmp.h>
37#include <net/ip.h> 38#include <net/ip.h>
38#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 39#include <net/tcp_states.h>
39#include <linux/ipv6.h>
40#endif
41#include <linux/seq_file.h>
42
43/* This is for all connections with a full identity, no wildcards.
44 * New scheme, half the table is for TIME_WAIT, the other half is
45 * for the rest. I'll experiment with dynamic table growth later.
46 */
47struct tcp_ehash_bucket {
48 rwlock_t lock;
49 struct hlist_head chain;
50} __attribute__((__aligned__(8)));
51
52/* This is for listening sockets, thus all sockets which possess wildcards. */
53#define TCP_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */
54
55/* There are a few simple rules, which allow for local port reuse by
56 * an application. In essence:
57 *
58 * 1) Sockets bound to different interfaces may share a local port.
59 * Failing that, goto test 2.
60 * 2) If all sockets have sk->sk_reuse set, and none of them are in
61 * TCP_LISTEN state, the port may be shared.
62 * Failing that, goto test 3.
63 * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local
64 * address, and none of them are the same, the port may be
65 * shared.
66 * Failing this, the port cannot be shared.
67 *
68 * The interesting point, is test #2. This is what an FTP server does
69 * all day. To optimize this case we use a specific flag bit defined
70 * below. As we add sockets to a bind bucket list, we perform a
71 * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN))
72 * As long as all sockets added to a bind bucket pass this test,
73 * the flag bit will be set.
74 * The resulting situation is that tcp_v[46]_verify_bind() can just check
75 * for this flag bit, if it is set and the socket trying to bind has
76 * sk->sk_reuse set, we don't even have to walk the owners list at all,
77 * we return that it is ok to bind this socket to the requested local port.
78 *
79 * Sounds like a lot of work, but it is worth it. In a more naive
80 * implementation (ie. current FreeBSD etc.) the entire list of ports
81 * must be walked for each data port opened by an ftp server. Needless
82 * to say, this does not scale at all. With a couple thousand FTP
83 * users logged onto your box, isn't it nice to know that new data
84 * ports are created in O(1) time? I thought so. ;-) -DaveM
85 */
86struct tcp_bind_bucket {
87 unsigned short port;
88 signed short fastreuse;
89 struct hlist_node node;
90 struct hlist_head owners;
91};
92
93#define tb_for_each(tb, node, head) hlist_for_each_entry(tb, node, head, node)
94
95struct tcp_bind_hashbucket {
96 spinlock_t lock;
97 struct hlist_head chain;
98};
99
100static inline struct tcp_bind_bucket *__tb_head(struct tcp_bind_hashbucket *head)
101{
102 return hlist_entry(head->chain.first, struct tcp_bind_bucket, node);
103}
104
105static inline struct tcp_bind_bucket *tb_head(struct tcp_bind_hashbucket *head)
106{
107 return hlist_empty(&head->chain) ? NULL : __tb_head(head);
108}
109
110extern struct tcp_hashinfo {
111 /* This is for sockets with full identity only. Sockets here will
112 * always be without wildcards and will have the following invariant:
113 *
114 * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE
115 *
116 * First half of the table is for sockets not in TIME_WAIT, second half
117 * is for TIME_WAIT sockets only.
118 */
119 struct tcp_ehash_bucket *__tcp_ehash;
120
121 /* Ok, let's try this, I give up, we do need a local binding
122 * TCP hash as well as the others for fast bind/connect.
123 */
124 struct tcp_bind_hashbucket *__tcp_bhash;
125 40
126 int __tcp_bhash_size; 41#include <linux/seq_file.h>
127 int __tcp_ehash_size;
128
129 /* All sockets in TCP_LISTEN state will be in here. This is the only
130 * table where wildcard'd TCP sockets can exist. Hash function here
131 * is just local port number.
132 */
133 struct hlist_head __tcp_listening_hash[TCP_LHTABLE_SIZE];
134
135 /* All the above members are written once at bootup and
136 * never written again _or_ are predominantly read-access.
137 *
138 * Now align to a new cache line as all the following members
139 * are often dirty.
140 */
141 rwlock_t __tcp_lhash_lock ____cacheline_aligned;
142 atomic_t __tcp_lhash_users;
143 wait_queue_head_t __tcp_lhash_wait;
144 spinlock_t __tcp_portalloc_lock;
145} tcp_hashinfo;
146
147#define tcp_ehash (tcp_hashinfo.__tcp_ehash)
148#define tcp_bhash (tcp_hashinfo.__tcp_bhash)
149#define tcp_ehash_size (tcp_hashinfo.__tcp_ehash_size)
150#define tcp_bhash_size (tcp_hashinfo.__tcp_bhash_size)
151#define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash)
152#define tcp_lhash_lock (tcp_hashinfo.__tcp_lhash_lock)
153#define tcp_lhash_users (tcp_hashinfo.__tcp_lhash_users)
154#define tcp_lhash_wait (tcp_hashinfo.__tcp_lhash_wait)
155#define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock)
156
157extern kmem_cache_t *tcp_bucket_cachep;
158extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
159 unsigned short snum);
160extern void tcp_bucket_destroy(struct tcp_bind_bucket *tb);
161extern void tcp_bucket_unlock(struct sock *sk);
162extern int tcp_port_rover;
163
164/* These are AF independent. */
165static __inline__ int tcp_bhashfn(__u16 lport)
166{
167 return (lport & (tcp_bhash_size - 1));
168}
169
170extern void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb,
171 unsigned short snum);
172
173#if (BITS_PER_LONG == 64)
174#define TCP_ADDRCMP_ALIGN_BYTES 8
175#else
176#define TCP_ADDRCMP_ALIGN_BYTES 4
177#endif
178
179/* This is a TIME_WAIT bucket. It works around the memory consumption
180 * problems of sockets in such a state on heavily loaded servers, but
181 * without violating the protocol specification.
182 */
183struct tcp_tw_bucket {
184 /*
185 * Now struct sock also uses sock_common, so please just
186 * don't add nothing before this first member (__tw_common) --acme
187 */
188 struct sock_common __tw_common;
189#define tw_family __tw_common.skc_family
190#define tw_state __tw_common.skc_state
191#define tw_reuse __tw_common.skc_reuse
192#define tw_bound_dev_if __tw_common.skc_bound_dev_if
193#define tw_node __tw_common.skc_node
194#define tw_bind_node __tw_common.skc_bind_node
195#define tw_refcnt __tw_common.skc_refcnt
196 volatile unsigned char tw_substate;
197 unsigned char tw_rcv_wscale;
198 __u16 tw_sport;
199 /* Socket demultiplex comparisons on incoming packets. */
200 /* these five are in inet_sock */
201 __u32 tw_daddr
202 __attribute__((aligned(TCP_ADDRCMP_ALIGN_BYTES)));
203 __u32 tw_rcv_saddr;
204 __u16 tw_dport;
205 __u16 tw_num;
206 /* And these are ours. */
207 int tw_hashent;
208 int tw_timeout;
209 __u32 tw_rcv_nxt;
210 __u32 tw_snd_nxt;
211 __u32 tw_rcv_wnd;
212 __u32 tw_ts_recent;
213 long tw_ts_recent_stamp;
214 unsigned long tw_ttd;
215 struct tcp_bind_bucket *tw_tb;
216 struct hlist_node tw_death_node;
217#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
218 struct in6_addr tw_v6_daddr;
219 struct in6_addr tw_v6_rcv_saddr;
220 int tw_v6_ipv6only;
221#endif
222};
223
224static __inline__ void tw_add_node(struct tcp_tw_bucket *tw,
225 struct hlist_head *list)
226{
227 hlist_add_head(&tw->tw_node, list);
228}
229
230static __inline__ void tw_add_bind_node(struct tcp_tw_bucket *tw,
231 struct hlist_head *list)
232{
233 hlist_add_head(&tw->tw_bind_node, list);
234}
235
236static inline int tw_dead_hashed(struct tcp_tw_bucket *tw)
237{
238 return tw->tw_death_node.pprev != NULL;
239}
240
241static __inline__ void tw_dead_node_init(struct tcp_tw_bucket *tw)
242{
243 tw->tw_death_node.pprev = NULL;
244}
245
246static __inline__ void __tw_del_dead_node(struct tcp_tw_bucket *tw)
247{
248 __hlist_del(&tw->tw_death_node);
249 tw_dead_node_init(tw);
250}
251
252static __inline__ int tw_del_dead_node(struct tcp_tw_bucket *tw)
253{
254 if (tw_dead_hashed(tw)) {
255 __tw_del_dead_node(tw);
256 return 1;
257 }
258 return 0;
259}
260
261#define tw_for_each(tw, node, head) \
262 hlist_for_each_entry(tw, node, head, tw_node)
263
264#define tw_for_each_inmate(tw, node, jail) \
265 hlist_for_each_entry(tw, node, jail, tw_death_node)
266
267#define tw_for_each_inmate_safe(tw, node, safe, jail) \
268 hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node)
269
270#define tcptw_sk(__sk) ((struct tcp_tw_bucket *)(__sk))
271
272static inline u32 tcp_v4_rcv_saddr(const struct sock *sk)
273{
274 return likely(sk->sk_state != TCP_TIME_WAIT) ?
275 inet_sk(sk)->rcv_saddr : tcptw_sk(sk)->tw_rcv_saddr;
276}
277
278#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
279static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk)
280{
281 return likely(sk->sk_state != TCP_TIME_WAIT) ?
282 &inet6_sk(sk)->rcv_saddr : &tcptw_sk(sk)->tw_v6_rcv_saddr;
283}
284
285static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk)
286{
287 return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL;
288}
289
290#define tcptw_sk_ipv6only(__sk) (tcptw_sk(__sk)->tw_v6_ipv6only)
291
292static inline int tcp_v6_ipv6only(const struct sock *sk)
293{
294 return likely(sk->sk_state != TCP_TIME_WAIT) ?
295 ipv6_only_sock(sk) : tcptw_sk_ipv6only(sk);
296}
297#else
298# define __tcp_v6_rcv_saddr(__sk) NULL
299# define tcp_v6_rcv_saddr(__sk) NULL
300# define tcptw_sk_ipv6only(__sk) 0
301# define tcp_v6_ipv6only(__sk) 0
302#endif
303 42
304extern kmem_cache_t *tcp_timewait_cachep; 43extern struct inet_hashinfo tcp_hashinfo;
305
306static inline void tcp_tw_put(struct tcp_tw_bucket *tw)
307{
308 if (atomic_dec_and_test(&tw->tw_refcnt)) {
309#ifdef INET_REFCNT_DEBUG
310 printk(KERN_DEBUG "tw_bucket %p released\n", tw);
311#endif
312 kmem_cache_free(tcp_timewait_cachep, tw);
313 }
314}
315 44
316extern atomic_t tcp_orphan_count; 45extern atomic_t tcp_orphan_count;
317extern int tcp_tw_count;
318extern void tcp_time_wait(struct sock *sk, int state, int timeo); 46extern void tcp_time_wait(struct sock *sk, int state, int timeo);
319extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
320
321
322/* Socket demux engine toys. */
323#ifdef __BIG_ENDIAN
324#define TCP_COMBINED_PORTS(__sport, __dport) \
325 (((__u32)(__sport)<<16) | (__u32)(__dport))
326#else /* __LITTLE_ENDIAN */
327#define TCP_COMBINED_PORTS(__sport, __dport) \
328 (((__u32)(__dport)<<16) | (__u32)(__sport))
329#endif
330
331#if (BITS_PER_LONG == 64)
332#ifdef __BIG_ENDIAN
333#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
334 __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr));
335#else /* __LITTLE_ENDIAN */
336#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
337 __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr));
338#endif /* __BIG_ENDIAN */
339#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
340 (((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \
341 ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \
342 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
343#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
344 (((*((__u64 *)&(tcptw_sk(__sk)->tw_daddr))) == (__cookie)) && \
345 ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \
346 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
347#else /* 32-bit arch */
348#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr)
349#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
350 ((inet_sk(__sk)->daddr == (__saddr)) && \
351 (inet_sk(__sk)->rcv_saddr == (__daddr)) && \
352 ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \
353 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
354#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
355 ((tcptw_sk(__sk)->tw_daddr == (__saddr)) && \
356 (tcptw_sk(__sk)->tw_rcv_saddr == (__daddr)) && \
357 ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \
358 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
359#endif /* 64-bit arch */
360
361#define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \
362 (((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \
363 ((__sk)->sk_family == AF_INET6) && \
364 ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \
365 ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \
366 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
367
368/* These can have wildcards, don't try too hard. */
369static __inline__ int tcp_lhashfn(unsigned short num)
370{
371 return num & (TCP_LHTABLE_SIZE - 1);
372}
373
374static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
375{
376 return tcp_lhashfn(inet_sk(sk)->num);
377}
378 47
379#define MAX_TCP_HEADER (128 + MAX_HEADER) 48#define MAX_TCP_HEADER (128 + MAX_HEADER)
380 49
@@ -478,33 +147,6 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
478 * timestamps. It must be less than 147 * timestamps. It must be less than
479 * minimal timewait lifetime. 148 * minimal timewait lifetime.
480 */ 149 */
481
482#define TCP_TW_RECYCLE_SLOTS_LOG 5
483#define TCP_TW_RECYCLE_SLOTS (1<<TCP_TW_RECYCLE_SLOTS_LOG)
484
485/* If time > 4sec, it is "slow" path, no recycling is required,
486 so that we select tick to get range about 4 seconds.
487 */
488
489#if HZ <= 16 || HZ > 4096
490# error Unsupported: HZ <= 16 or HZ > 4096
491#elif HZ <= 32
492# define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG)
493#elif HZ <= 64
494# define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG)
495#elif HZ <= 128
496# define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG)
497#elif HZ <= 256
498# define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG)
499#elif HZ <= 512
500# define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG)
501#elif HZ <= 1024
502# define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG)
503#elif HZ <= 2048
504# define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG)
505#else
506# define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG)
507#endif
508/* 150/*
509 * TCP option 151 * TCP option
510 */ 152 */
@@ -534,22 +176,18 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
534#define TCPOLEN_SACK_BASE_ALIGNED 4 176#define TCPOLEN_SACK_BASE_ALIGNED 4
535#define TCPOLEN_SACK_PERBLOCK 8 177#define TCPOLEN_SACK_PERBLOCK 8
536 178
537#define TCP_TIME_RETRANS 1 /* Retransmit timer */
538#define TCP_TIME_DACK 2 /* Delayed ack timer */
539#define TCP_TIME_PROBE0 3 /* Zero window probe timer */
540#define TCP_TIME_KEEPOPEN 4 /* Keepalive timer */
541
542/* Flags in tp->nonagle */ 179/* Flags in tp->nonagle */
543#define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ 180#define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */
544#define TCP_NAGLE_CORK 2 /* Socket is corked */ 181#define TCP_NAGLE_CORK 2 /* Socket is corked */
545#define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */ 182#define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */
546 183
184extern struct inet_timewait_death_row tcp_death_row;
185
547/* sysctl variables for tcp */ 186/* sysctl variables for tcp */
548extern int sysctl_tcp_timestamps; 187extern int sysctl_tcp_timestamps;
549extern int sysctl_tcp_window_scaling; 188extern int sysctl_tcp_window_scaling;
550extern int sysctl_tcp_sack; 189extern int sysctl_tcp_sack;
551extern int sysctl_tcp_fin_timeout; 190extern int sysctl_tcp_fin_timeout;
552extern int sysctl_tcp_tw_recycle;
553extern int sysctl_tcp_keepalive_time; 191extern int sysctl_tcp_keepalive_time;
554extern int sysctl_tcp_keepalive_probes; 192extern int sysctl_tcp_keepalive_probes;
555extern int sysctl_tcp_keepalive_intvl; 193extern int sysctl_tcp_keepalive_intvl;
@@ -564,7 +202,6 @@ extern int sysctl_tcp_stdurg;
564extern int sysctl_tcp_rfc1337; 202extern int sysctl_tcp_rfc1337;
565extern int sysctl_tcp_abort_on_overflow; 203extern int sysctl_tcp_abort_on_overflow;
566extern int sysctl_tcp_max_orphans; 204extern int sysctl_tcp_max_orphans;
567extern int sysctl_tcp_max_tw_buckets;
568extern int sysctl_tcp_fack; 205extern int sysctl_tcp_fack;
569extern int sysctl_tcp_reordering; 206extern int sysctl_tcp_reordering;
570extern int sysctl_tcp_ecn; 207extern int sysctl_tcp_ecn;
@@ -585,12 +222,6 @@ extern atomic_t tcp_memory_allocated;
585extern atomic_t tcp_sockets_allocated; 222extern atomic_t tcp_sockets_allocated;
586extern int tcp_memory_pressure; 223extern int tcp_memory_pressure;
587 224
588#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
589#define TCP_INET_FAMILY(fam) ((fam) == AF_INET)
590#else
591#define TCP_INET_FAMILY(fam) 1
592#endif
593
594/* 225/*
595 * Pointers to address related TCP functions 226 * Pointers to address related TCP functions
596 * (i.e. things that depend on the address family) 227 * (i.e. things that depend on the address family)
@@ -671,9 +302,6 @@ DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
671#define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val) 302#define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val)
672#define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val) 303#define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val)
673 304
674extern void tcp_put_port(struct sock *sk);
675extern void tcp_inherit_port(struct sock *sk, struct sock *child);
676
677extern void tcp_v4_err(struct sk_buff *skb, u32); 305extern void tcp_v4_err(struct sk_buff *skb, u32);
678 306
679extern void tcp_shutdown (struct sock *sk, int how); 307extern void tcp_shutdown (struct sock *sk, int how);
@@ -682,7 +310,7 @@ extern int tcp_v4_rcv(struct sk_buff *skb);
682 310
683extern int tcp_v4_remember_stamp(struct sock *sk); 311extern int tcp_v4_remember_stamp(struct sock *sk);
684 312
685extern int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw); 313extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
686 314
687extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, 315extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk,
688 struct msghdr *msg, size_t size); 316 struct msghdr *msg, size_t size);
@@ -704,42 +332,22 @@ extern int tcp_rcv_established(struct sock *sk,
704 332
705extern void tcp_rcv_space_adjust(struct sock *sk); 333extern void tcp_rcv_space_adjust(struct sock *sk);
706 334
707enum tcp_ack_state_t 335static inline void tcp_dec_quickack_mode(struct sock *sk,
708{ 336 const unsigned int pkts)
709 TCP_ACK_SCHED = 1,
710 TCP_ACK_TIMER = 2,
711 TCP_ACK_PUSHED= 4
712};
713
714static inline void tcp_schedule_ack(struct tcp_sock *tp)
715{ 337{
716 tp->ack.pending |= TCP_ACK_SCHED; 338 struct inet_connection_sock *icsk = inet_csk(sk);
717}
718
719static inline int tcp_ack_scheduled(struct tcp_sock *tp)
720{
721 return tp->ack.pending&TCP_ACK_SCHED;
722}
723
724static __inline__ void tcp_dec_quickack_mode(struct tcp_sock *tp, unsigned int pkts)
725{
726 if (tp->ack.quick) {
727 if (pkts >= tp->ack.quick) {
728 tp->ack.quick = 0;
729 339
340 if (icsk->icsk_ack.quick) {
341 if (pkts >= icsk->icsk_ack.quick) {
342 icsk->icsk_ack.quick = 0;
730 /* Leaving quickack mode we deflate ATO. */ 343 /* Leaving quickack mode we deflate ATO. */
731 tp->ack.ato = TCP_ATO_MIN; 344 icsk->icsk_ack.ato = TCP_ATO_MIN;
732 } else 345 } else
733 tp->ack.quick -= pkts; 346 icsk->icsk_ack.quick -= pkts;
734 } 347 }
735} 348}
736 349
737extern void tcp_enter_quickack_mode(struct tcp_sock *tp); 350extern void tcp_enter_quickack_mode(struct sock *sk);
738
739static __inline__ void tcp_delack_init(struct tcp_sock *tp)
740{
741 memset(&tp->ack, 0, sizeof(tp->ack));
742}
743 351
744static inline void tcp_clear_options(struct tcp_options_received *rx_opt) 352static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
745{ 353{
@@ -755,10 +363,9 @@ enum tcp_tw_status
755}; 363};
756 364
757 365
758extern enum tcp_tw_status tcp_timewait_state_process(struct tcp_tw_bucket *tw, 366extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
759 struct sk_buff *skb, 367 struct sk_buff *skb,
760 struct tcphdr *th, 368 const struct tcphdr *th);
761 unsigned len);
762 369
763extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, 370extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
764 struct request_sock *req, 371 struct request_sock *req,
@@ -773,7 +380,6 @@ extern void tcp_update_metrics(struct sock *sk);
773 380
774extern void tcp_close(struct sock *sk, 381extern void tcp_close(struct sock *sk,
775 long timeout); 382 long timeout);
776extern struct sock * tcp_accept(struct sock *sk, int flags, int *err);
777extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); 383extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait);
778 384
779extern int tcp_getsockopt(struct sock *sk, int level, 385extern int tcp_getsockopt(struct sock *sk, int level,
@@ -789,8 +395,6 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
789 size_t len, int nonblock, 395 size_t len, int nonblock,
790 int flags, int *addr_len); 396 int flags, int *addr_len);
791 397
792extern int tcp_listen_start(struct sock *sk);
793
794extern void tcp_parse_options(struct sk_buff *skb, 398extern void tcp_parse_options(struct sk_buff *skb,
795 struct tcp_options_received *opt_rx, 399 struct tcp_options_received *opt_rx,
796 int estab); 400 int estab);
@@ -799,11 +403,6 @@ extern void tcp_parse_options(struct sk_buff *skb,
799 * TCP v4 functions exported for the inet6 API 403 * TCP v4 functions exported for the inet6 API
800 */ 404 */
801 405
802extern int tcp_v4_rebuild_header(struct sock *sk);
803
804extern int tcp_v4_build_header(struct sock *sk,
805 struct sk_buff *skb);
806
807extern void tcp_v4_send_check(struct sock *sk, 406extern void tcp_v4_send_check(struct sock *sk,
808 struct tcphdr *th, int len, 407 struct tcphdr *th, int len,
809 struct sk_buff *skb); 408 struct sk_buff *skb);
@@ -872,18 +471,15 @@ extern void tcp_cwnd_application_limited(struct sock *sk);
872 471
873/* tcp_timer.c */ 472/* tcp_timer.c */
874extern void tcp_init_xmit_timers(struct sock *); 473extern void tcp_init_xmit_timers(struct sock *);
875extern void tcp_clear_xmit_timers(struct sock *); 474static inline void tcp_clear_xmit_timers(struct sock *sk)
475{
476 inet_csk_clear_xmit_timers(sk);
477}
876 478
877extern void tcp_delete_keepalive_timer(struct sock *);
878extern void tcp_reset_keepalive_timer(struct sock *, unsigned long);
879extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); 479extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
880extern unsigned int tcp_current_mss(struct sock *sk, int large); 480extern unsigned int tcp_current_mss(struct sock *sk, int large);
881 481
882#ifdef TCP_DEBUG 482/* tcp.c */
883extern const char tcp_timer_bug_msg[];
884#endif
885
886/* tcp_diag.c */
887extern void tcp_get_info(struct sock *, struct tcp_info *); 483extern void tcp_get_info(struct sock *, struct tcp_info *);
888 484
889/* Read 'sendfile()'-style from a TCP socket */ 485/* Read 'sendfile()'-style from a TCP socket */
@@ -892,72 +488,6 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
892extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, 488extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
893 sk_read_actor_t recv_actor); 489 sk_read_actor_t recv_actor);
894 490
895static inline void tcp_clear_xmit_timer(struct sock *sk, int what)
896{
897 struct tcp_sock *tp = tcp_sk(sk);
898
899 switch (what) {
900 case TCP_TIME_RETRANS:
901 case TCP_TIME_PROBE0:
902 tp->pending = 0;
903
904#ifdef TCP_CLEAR_TIMERS
905 sk_stop_timer(sk, &tp->retransmit_timer);
906#endif
907 break;
908 case TCP_TIME_DACK:
909 tp->ack.blocked = 0;
910 tp->ack.pending = 0;
911
912#ifdef TCP_CLEAR_TIMERS
913 sk_stop_timer(sk, &tp->delack_timer);
914#endif
915 break;
916 default:
917#ifdef TCP_DEBUG
918 printk(tcp_timer_bug_msg);
919#endif
920 return;
921 };
922
923}
924
925/*
926 * Reset the retransmission timer
927 */
928static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when)
929{
930 struct tcp_sock *tp = tcp_sk(sk);
931
932 if (when > TCP_RTO_MAX) {
933#ifdef TCP_DEBUG
934 printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr());
935#endif
936 when = TCP_RTO_MAX;
937 }
938
939 switch (what) {
940 case TCP_TIME_RETRANS:
941 case TCP_TIME_PROBE0:
942 tp->pending = what;
943 tp->timeout = jiffies+when;
944 sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout);
945 break;
946
947 case TCP_TIME_DACK:
948 tp->ack.pending |= TCP_ACK_TIMER;
949 tp->ack.timeout = jiffies+when;
950 sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout);
951 break;
952
953 default:
954#ifdef TCP_DEBUG
955 printk(tcp_timer_bug_msg);
956#endif
957 return;
958 };
959}
960
961/* Initialize RCV_MSS value. 491/* Initialize RCV_MSS value.
962 * RCV_MSS is an our guess about MSS used by the peer. 492 * RCV_MSS is an our guess about MSS used by the peer.
963 * We haven't any direct information about the MSS. 493 * We haven't any direct information about the MSS.
@@ -975,7 +505,7 @@ static inline void tcp_initialize_rcv_mss(struct sock *sk)
975 hint = min(hint, TCP_MIN_RCVMSS); 505 hint = min(hint, TCP_MIN_RCVMSS);
976 hint = max(hint, TCP_MIN_MSS); 506 hint = max(hint, TCP_MIN_MSS);
977 507
978 tp->ack.rcv_mss = hint; 508 inet_csk(sk)->icsk_ack.rcv_mss = hint;
979} 509}
980 510
981static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) 511static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
@@ -1110,7 +640,8 @@ static inline void tcp_packets_out_inc(struct sock *sk,
1110 640
1111 tp->packets_out += tcp_skb_pcount(skb); 641 tp->packets_out += tcp_skb_pcount(skb);
1112 if (!orig) 642 if (!orig)
1113 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); 643 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
644 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
1114} 645}
1115 646
1116static inline void tcp_packets_out_dec(struct tcp_sock *tp, 647static inline void tcp_packets_out_dec(struct tcp_sock *tp,
@@ -1138,29 +669,29 @@ struct tcp_congestion_ops {
1138 struct list_head list; 669 struct list_head list;
1139 670
1140 /* initialize private data (optional) */ 671 /* initialize private data (optional) */
1141 void (*init)(struct tcp_sock *tp); 672 void (*init)(struct sock *sk);
1142 /* cleanup private data (optional) */ 673 /* cleanup private data (optional) */
1143 void (*release)(struct tcp_sock *tp); 674 void (*release)(struct sock *sk);
1144 675
1145 /* return slow start threshold (required) */ 676 /* return slow start threshold (required) */
1146 u32 (*ssthresh)(struct tcp_sock *tp); 677 u32 (*ssthresh)(struct sock *sk);
1147 /* lower bound for congestion window (optional) */ 678 /* lower bound for congestion window (optional) */
1148 u32 (*min_cwnd)(struct tcp_sock *tp); 679 u32 (*min_cwnd)(struct sock *sk);
1149 /* do new cwnd calculation (required) */ 680 /* do new cwnd calculation (required) */
1150 void (*cong_avoid)(struct tcp_sock *tp, u32 ack, 681 void (*cong_avoid)(struct sock *sk, u32 ack,
1151 u32 rtt, u32 in_flight, int good_ack); 682 u32 rtt, u32 in_flight, int good_ack);
1152 /* round trip time sample per acked packet (optional) */ 683 /* round trip time sample per acked packet (optional) */
1153 void (*rtt_sample)(struct tcp_sock *tp, u32 usrtt); 684 void (*rtt_sample)(struct sock *sk, u32 usrtt);
1154 /* call before changing ca_state (optional) */ 685 /* call before changing ca_state (optional) */
1155 void (*set_state)(struct tcp_sock *tp, u8 new_state); 686 void (*set_state)(struct sock *sk, u8 new_state);
1156 /* call when cwnd event occurs (optional) */ 687 /* call when cwnd event occurs (optional) */
1157 void (*cwnd_event)(struct tcp_sock *tp, enum tcp_ca_event ev); 688 void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
1158 /* new value of cwnd after loss (optional) */ 689 /* new value of cwnd after loss (optional) */
1159 u32 (*undo_cwnd)(struct tcp_sock *tp); 690 u32 (*undo_cwnd)(struct sock *sk);
1160 /* hook for packet ack accounting (optional) */ 691 /* hook for packet ack accounting (optional) */
1161 void (*pkts_acked)(struct tcp_sock *tp, u32 num_acked); 692 void (*pkts_acked)(struct sock *sk, u32 num_acked);
1162 /* get info for tcp_diag (optional) */ 693 /* get info for inet_diag (optional) */
1163 void (*get_info)(struct tcp_sock *tp, u32 ext, struct sk_buff *skb); 694 void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
1164 695
1165 char name[TCP_CA_NAME_MAX]; 696 char name[TCP_CA_NAME_MAX];
1166 struct module *owner; 697 struct module *owner;
@@ -1169,30 +700,34 @@ struct tcp_congestion_ops {
1169extern int tcp_register_congestion_control(struct tcp_congestion_ops *type); 700extern int tcp_register_congestion_control(struct tcp_congestion_ops *type);
1170extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); 701extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
1171 702
1172extern void tcp_init_congestion_control(struct tcp_sock *tp); 703extern void tcp_init_congestion_control(struct sock *sk);
1173extern void tcp_cleanup_congestion_control(struct tcp_sock *tp); 704extern void tcp_cleanup_congestion_control(struct sock *sk);
1174extern int tcp_set_default_congestion_control(const char *name); 705extern int tcp_set_default_congestion_control(const char *name);
1175extern void tcp_get_default_congestion_control(char *name); 706extern void tcp_get_default_congestion_control(char *name);
1176extern int tcp_set_congestion_control(struct tcp_sock *tp, const char *name); 707extern int tcp_set_congestion_control(struct sock *sk, const char *name);
1177 708
1178extern struct tcp_congestion_ops tcp_init_congestion_ops; 709extern struct tcp_congestion_ops tcp_init_congestion_ops;
1179extern u32 tcp_reno_ssthresh(struct tcp_sock *tp); 710extern u32 tcp_reno_ssthresh(struct sock *sk);
1180extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, 711extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack,
1181 u32 rtt, u32 in_flight, int flag); 712 u32 rtt, u32 in_flight, int flag);
1182extern u32 tcp_reno_min_cwnd(struct tcp_sock *tp); 713extern u32 tcp_reno_min_cwnd(struct sock *sk);
1183extern struct tcp_congestion_ops tcp_reno; 714extern struct tcp_congestion_ops tcp_reno;
1184 715
1185static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state) 716static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
1186{ 717{
1187 if (tp->ca_ops->set_state) 718 struct inet_connection_sock *icsk = inet_csk(sk);
1188 tp->ca_ops->set_state(tp, ca_state); 719
1189 tp->ca_state = ca_state; 720 if (icsk->icsk_ca_ops->set_state)
721 icsk->icsk_ca_ops->set_state(sk, ca_state);
722 icsk->icsk_ca_state = ca_state;
1190} 723}
1191 724
1192static inline void tcp_ca_event(struct tcp_sock *tp, enum tcp_ca_event event) 725static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
1193{ 726{
1194 if (tp->ca_ops->cwnd_event) 727 const struct inet_connection_sock *icsk = inet_csk(sk);
1195 tp->ca_ops->cwnd_event(tp, event); 728
729 if (icsk->icsk_ca_ops->cwnd_event)
730 icsk->icsk_ca_ops->cwnd_event(sk, event);
1196} 731}
1197 732
1198/* This determines how many packets are "in the network" to the best 733/* This determines how many packets are "in the network" to the best
@@ -1218,9 +753,10 @@ static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
1218 * The exception is rate halving phase, when cwnd is decreasing towards 753 * The exception is rate halving phase, when cwnd is decreasing towards
1219 * ssthresh. 754 * ssthresh.
1220 */ 755 */
1221static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp) 756static inline __u32 tcp_current_ssthresh(const struct sock *sk)
1222{ 757{
1223 if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery)) 758 const struct tcp_sock *tp = tcp_sk(sk);
759 if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery))
1224 return tp->snd_ssthresh; 760 return tp->snd_ssthresh;
1225 else 761 else
1226 return max(tp->snd_ssthresh, 762 return max(tp->snd_ssthresh,
@@ -1237,10 +773,13 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp)
1237} 773}
1238 774
1239/* Set slow start threshold and cwnd not falling to slow start */ 775/* Set slow start threshold and cwnd not falling to slow start */
1240static inline void __tcp_enter_cwr(struct tcp_sock *tp) 776static inline void __tcp_enter_cwr(struct sock *sk)
1241{ 777{
778 const struct inet_connection_sock *icsk = inet_csk(sk);
779 struct tcp_sock *tp = tcp_sk(sk);
780
1242 tp->undo_marker = 0; 781 tp->undo_marker = 0;
1243 tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); 782 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1244 tp->snd_cwnd = min(tp->snd_cwnd, 783 tp->snd_cwnd = min(tp->snd_cwnd,
1245 tcp_packets_in_flight(tp) + 1U); 784 tcp_packets_in_flight(tp) + 1U);
1246 tp->snd_cwnd_cnt = 0; 785 tp->snd_cwnd_cnt = 0;
@@ -1249,12 +788,14 @@ static inline void __tcp_enter_cwr(struct tcp_sock *tp)
1249 TCP_ECN_queue_cwr(tp); 788 TCP_ECN_queue_cwr(tp);
1250} 789}
1251 790
1252static inline void tcp_enter_cwr(struct tcp_sock *tp) 791static inline void tcp_enter_cwr(struct sock *sk)
1253{ 792{
793 struct tcp_sock *tp = tcp_sk(sk);
794
1254 tp->prior_ssthresh = 0; 795 tp->prior_ssthresh = 0;
1255 if (tp->ca_state < TCP_CA_CWR) { 796 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
1256 __tcp_enter_cwr(tp); 797 __tcp_enter_cwr(sk);
1257 tcp_set_ca_state(tp, TCP_CA_CWR); 798 tcp_set_ca_state(sk, TCP_CA_CWR);
1258 } 799 }
1259} 800}
1260 801
@@ -1277,8 +818,10 @@ static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss,
1277 818
1278static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) 819static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp)
1279{ 820{
1280 if (!tp->packets_out && !tp->pending) 821 const struct inet_connection_sock *icsk = inet_csk(sk);
1281 tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto); 822 if (!tp->packets_out && !icsk->icsk_pending)
823 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
824 icsk->icsk_rto, TCP_RTO_MAX);
1282} 825}
1283 826
1284static __inline__ void tcp_push_pending_frames(struct sock *sk, 827static __inline__ void tcp_push_pending_frames(struct sock *sk,
@@ -1297,9 +840,6 @@ static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq)
1297 tp->snd_wl1 = seq; 840 tp->snd_wl1 = seq;
1298} 841}
1299 842
1300extern void tcp_destroy_sock(struct sock *sk);
1301
1302
1303/* 843/*
1304 * Calculate(/check) TCP checksum 844 * Calculate(/check) TCP checksum
1305 */ 845 */
@@ -1359,8 +899,10 @@ static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1359 tp->ucopy.memory = 0; 899 tp->ucopy.memory = 0;
1360 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { 900 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1361 wake_up_interruptible(sk->sk_sleep); 901 wake_up_interruptible(sk->sk_sleep);
1362 if (!tcp_ack_scheduled(tp)) 902 if (!inet_csk_ack_scheduled(sk))
1363 tcp_reset_xmit_timer(sk, TCP_TIME_DACK, (3*TCP_RTO_MIN)/4); 903 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
904 (3 * TCP_RTO_MIN) / 4,
905 TCP_RTO_MAX);
1364 } 906 }
1365 return 1; 907 return 1;
1366 } 908 }
@@ -1393,9 +935,9 @@ static __inline__ void tcp_set_state(struct sock *sk, int state)
1393 TCP_INC_STATS(TCP_MIB_ESTABRESETS); 935 TCP_INC_STATS(TCP_MIB_ESTABRESETS);
1394 936
1395 sk->sk_prot->unhash(sk); 937 sk->sk_prot->unhash(sk);
1396 if (tcp_sk(sk)->bind_hash && 938 if (inet_csk(sk)->icsk_bind_hash &&
1397 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) 939 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
1398 tcp_put_port(sk); 940 inet_put_port(&tcp_hashinfo, sk);
1399 /* fall through */ 941 /* fall through */
1400 default: 942 default:
1401 if (oldstate==TCP_ESTABLISHED) 943 if (oldstate==TCP_ESTABLISHED)
@@ -1422,7 +964,7 @@ static __inline__ void tcp_done(struct sock *sk)
1422 if (!sock_flag(sk, SOCK_DEAD)) 964 if (!sock_flag(sk, SOCK_DEAD))
1423 sk->sk_state_change(sk); 965 sk->sk_state_change(sk);
1424 else 966 else
1425 tcp_destroy_sock(sk); 967 inet_csk_destroy_sock(sk);
1426} 968}
1427 969
1428static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt) 970static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt)
@@ -1524,54 +1066,6 @@ static inline int tcp_full_space(const struct sock *sk)
1524 return tcp_win_from_space(sk->sk_rcvbuf); 1066 return tcp_win_from_space(sk->sk_rcvbuf);
1525} 1067}
1526 1068
1527static inline void tcp_acceptq_queue(struct sock *sk, struct request_sock *req,
1528 struct sock *child)
1529{
1530 reqsk_queue_add(&tcp_sk(sk)->accept_queue, req, sk, child);
1531}
1532
1533static inline void
1534tcp_synq_removed(struct sock *sk, struct request_sock *req)
1535{
1536 if (reqsk_queue_removed(&tcp_sk(sk)->accept_queue, req) == 0)
1537 tcp_delete_keepalive_timer(sk);
1538}
1539
1540static inline void tcp_synq_added(struct sock *sk)
1541{
1542 if (reqsk_queue_added(&tcp_sk(sk)->accept_queue) == 0)
1543 tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT);
1544}
1545
1546static inline int tcp_synq_len(struct sock *sk)
1547{
1548 return reqsk_queue_len(&tcp_sk(sk)->accept_queue);
1549}
1550
1551static inline int tcp_synq_young(struct sock *sk)
1552{
1553 return reqsk_queue_len_young(&tcp_sk(sk)->accept_queue);
1554}
1555
1556static inline int tcp_synq_is_full(struct sock *sk)
1557{
1558 return reqsk_queue_is_full(&tcp_sk(sk)->accept_queue);
1559}
1560
1561static inline void tcp_synq_unlink(struct tcp_sock *tp, struct request_sock *req,
1562 struct request_sock **prev)
1563{
1564 reqsk_queue_unlink(&tp->accept_queue, req, prev);
1565}
1566
1567static inline void tcp_synq_drop(struct sock *sk, struct request_sock *req,
1568 struct request_sock **prev)
1569{
1570 tcp_synq_unlink(tcp_sk(sk), req, prev);
1571 tcp_synq_removed(sk, req);
1572 reqsk_free(req);
1573}
1574
1575static __inline__ void tcp_openreq_init(struct request_sock *req, 1069static __inline__ void tcp_openreq_init(struct request_sock *req,
1576 struct tcp_options_received *rx_opt, 1070 struct tcp_options_received *rx_opt,
1577 struct sk_buff *skb) 1071 struct sk_buff *skb)
@@ -1593,27 +1087,6 @@ static __inline__ void tcp_openreq_init(struct request_sock *req,
1593 1087
1594extern void tcp_enter_memory_pressure(void); 1088extern void tcp_enter_memory_pressure(void);
1595 1089
1596extern void tcp_listen_wlock(void);
1597
1598/* - We may sleep inside this lock.
1599 * - If sleeping is not required (or called from BH),
1600 * use plain read_(un)lock(&tcp_lhash_lock).
1601 */
1602
1603static inline void tcp_listen_lock(void)
1604{
1605 /* read_lock synchronizes to candidates to writers */
1606 read_lock(&tcp_lhash_lock);
1607 atomic_inc(&tcp_lhash_users);
1608 read_unlock(&tcp_lhash_lock);
1609}
1610
1611static inline void tcp_listen_unlock(void)
1612{
1613 if (atomic_dec_and_test(&tcp_lhash_users))
1614 wake_up(&tcp_lhash_wait);
1615}
1616
1617static inline int keepalive_intvl_when(const struct tcp_sock *tp) 1090static inline int keepalive_intvl_when(const struct tcp_sock *tp)
1618{ 1091{
1619 return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl; 1092 return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
@@ -1624,12 +1097,13 @@ static inline int keepalive_time_when(const struct tcp_sock *tp)
1624 return tp->keepalive_time ? : sysctl_tcp_keepalive_time; 1097 return tp->keepalive_time ? : sysctl_tcp_keepalive_time;
1625} 1098}
1626 1099
1627static inline int tcp_fin_time(const struct tcp_sock *tp) 1100static inline int tcp_fin_time(const struct sock *sk)
1628{ 1101{
1629 int fin_timeout = tp->linger2 ? : sysctl_tcp_fin_timeout; 1102 int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout;
1103 const int rto = inet_csk(sk)->icsk_rto;
1630 1104
1631 if (fin_timeout < (tp->rto<<2) - (tp->rto>>1)) 1105 if (fin_timeout < (rto << 2) - (rto >> 1))
1632 fin_timeout = (tp->rto<<2) - (tp->rto>>1); 1106 fin_timeout = (rto << 2) - (rto >> 1);
1633 1107
1634 return fin_timeout; 1108 return fin_timeout;
1635} 1109}
@@ -1658,15 +1132,6 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int
1658 return 1; 1132 return 1;
1659} 1133}
1660 1134
1661static inline void tcp_v4_setup_caps(struct sock *sk, struct dst_entry *dst)
1662{
1663 sk->sk_route_caps = dst->dev->features;
1664 if (sk->sk_route_caps & NETIF_F_TSO) {
1665 if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len)
1666 sk->sk_route_caps &= ~NETIF_F_TSO;
1667 }
1668}
1669
1670#define TCP_CHECK_TIMER(sk) do { } while (0) 1135#define TCP_CHECK_TIMER(sk) do { } while (0)
1671 1136
1672static inline int tcp_use_frto(const struct sock *sk) 1137static inline int tcp_use_frto(const struct sock *sk)
@@ -1718,4 +1183,16 @@ struct tcp_iter_state {
1718extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo); 1183extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo);
1719extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo); 1184extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo);
1720 1185
1186extern struct request_sock_ops tcp_request_sock_ops;
1187
1188extern int tcp_v4_destroy_sock(struct sock *sk);
1189
1190#ifdef CONFIG_PROC_FS
1191extern int tcp4_proc_init(void);
1192extern void tcp4_proc_exit(void);
1193#endif
1194
1195extern void tcp_v4_init(struct net_proto_family *ops);
1196extern void tcp_init(void);
1197
1721#endif /* _TCP_H */ 1198#endif /* _TCP_H */