From 8728b834b226ffcf2c94a58530090e292af2a7bf Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 9 Aug 2005 19:25:21 -0700
Subject: [NET]: Kill skb->list

Remove the "list" member of struct sk_buff, as it is entirely
redundant.  All SKB list removal callers know which list the
SKB is on, so storing this in sk_buff does nothing other than
taking up some space.

Two tricky bits were SCTP, which I took care of, and two ATM
drivers which Francois Romieu <romieu@fr.zoreil.com> fixed
up.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Francois Romieu <romieu@fr.zoreil.com>
---
 net/ipv4/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 69b1fcf70077..d2696af46c70 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -975,7 +975,7 @@ do_fault:
 	if (!skb->len) {
 		if (sk->sk_send_head == skb)
 			sk->sk_send_head = NULL;
-		__skb_unlink(skb, skb->list);
+		__skb_unlink(skb, &sk->sk_write_queue);
 		sk_stream_free_skb(sk, skb);
 	}
 
-- 
cgit v1.2.2


From 83e3609eba3818f6e18b8bf9442195169ac306b7 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Tue, 9 Aug 2005 19:33:31 -0700
Subject: [REQSK]: Move the syn_table destroy from tcp_listen_stop to
 reqsk_queue_destroy

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 35 ++++++++++-------------------------
 1 file changed, 10 insertions(+), 25 deletions(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d2696af46c70..42a2e2ccd430 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -487,7 +487,7 @@ int tcp_listen_start(struct sock *sk)
 	}
 
 	sk->sk_state = TCP_CLOSE;
-	reqsk_queue_destroy(&tp->accept_queue);
+	__reqsk_queue_destroy(&tp->accept_queue);
 	return -EADDRINUSE;
 }
 
@@ -499,38 +499,23 @@ int tcp_listen_start(struct sock *sk)
 static void tcp_listen_stop (struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct listen_sock *lopt;
 	struct request_sock *acc_req;
 	struct request_sock *req;
-	int i;
 
 	tcp_delete_keepalive_timer(sk);
 
 	/* make all the listen_opt local to us */
-	lopt = reqsk_queue_yank_listen_sk(&tp->accept_queue);
 	acc_req = reqsk_queue_yank_acceptq(&tp->accept_queue);
 
-	if (lopt->qlen) {
-		for (i = 0; i < TCP_SYNQ_HSIZE; i++) {
-			while ((req = lopt->syn_table[i]) != NULL) {
-				lopt->syn_table[i] = req->dl_next;
-				lopt->qlen--;
-				reqsk_free(req);
-
-		/* Following specs, it would be better either to send FIN
-		 * (and enter FIN-WAIT-1, it is normal close)
-		 * or to send active reset (abort).
-		 * Certainly, it is pretty dangerous while synflood, but it is
-		 * bad justification for our negligence 8)
-		 * To be honest, we are not able to make either
-		 * of the variants now.			--ANK
-		 */
-			}
-		}
-	}
-	BUG_TRAP(!lopt->qlen);
-
-	kfree(lopt);
+	/* Following specs, it would be better either to send FIN
+	 * (and enter FIN-WAIT-1, it is normal close)
+	 * or to send active reset (abort).
+	 * Certainly, it is pretty dangerous while synflood, but it is
+	 * bad justification for our negligence 8)
+	 * To be honest, we are not able to make either
+	 * of the variants now.			--ANK
+	 */
+	reqsk_queue_destroy(&tp->accept_queue);
 
 	while ((req = acc_req) != NULL) {
 		struct sock *child = req->sk;
-- 
cgit v1.2.2


From e6848976b721eeb5551cd94673faafeef78d9f35 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Tue, 9 Aug 2005 19:45:38 -0700
Subject: [NET]: Cleanup INET_REFCNT_DEBUG code

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 42a2e2ccd430..20159a3dafb3 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1580,12 +1580,7 @@ void tcp_destroy_sock(struct sock *sk)
 
 	xfrm_sk_free_policy(sk);
 
-#ifdef INET_REFCNT_DEBUG
-	if (atomic_read(&sk->sk_refcnt) != 1) {
-		printk(KERN_DEBUG "Destruction TCP %p delayed, c=%d\n",
-		       sk, atomic_read(&sk->sk_refcnt));
-	}
-#endif
+	sk_refcnt_debug_release(sk);
 
 	atomic_dec(&tcp_orphan_count);
 	sock_put(sk);
-- 
cgit v1.2.2


From 0f7ff9274e72fd254fbd1ab117bbc1db6e7cdb34 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Tue, 9 Aug 2005 19:59:44 -0700
Subject: [INET]: Just rename the TCP hashtable functions/structs to inet_

This is to break down the complexity of the series of patches,
making it very clear that this one just does:

1. renames tcp_ prefixed hashtable functions and data structures that
   were already mostly generic to inet_ to share it with DCCP and
   other INET transport protocols.

2. Removes not used functions (__tb_head & tb_head)

3. Removes some leftover prototypes in the headers (tcp_bucket_unlock &
   tcp_v4_build_header)

Next changesets will move tcp_sk(sk)->bind_hash to inet_sock so that we can
make functions such as tcp_inherit_port, __tcp_inherit_port, tcp_v4_get_port,
__tcp_put_port,  generic and get others like tcp_destroy_sock closer to generic
(tcp_orphan_count will go to sk->sk_prot to allow this).

Eventually most of these functions will be used passing the transport protocol
inet_hashinfo structure.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 20159a3dafb3..1ec03db7dcd9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -272,6 +272,9 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
 DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
 
 kmem_cache_t *tcp_bucket_cachep;
+
+EXPORT_SYMBOL_GPL(tcp_bucket_cachep);
+
 kmem_cache_t *tcp_timewait_cachep;
 
 atomic_t tcp_orphan_count = ATOMIC_INIT(0);
@@ -2259,7 +2262,7 @@ void __init tcp_init(void)
 					   sizeof(skb->cb));
 
 	tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket",
-					      sizeof(struct tcp_bind_bucket),
+					      sizeof(struct inet_bind_bucket),
 					      0, SLAB_HWCACHE_ALIGN,
 					      NULL, NULL);
 	if (!tcp_bucket_cachep)
@@ -2277,9 +2280,9 @@ void __init tcp_init(void)
 	 *
 	 * The methodology is similar to that of the buffer cache.
 	 */
-	tcp_ehash = (struct tcp_ehash_bucket *)
+	tcp_ehash =
 		alloc_large_system_hash("TCP established",
-					sizeof(struct tcp_ehash_bucket),
+					sizeof(struct inet_ehash_bucket),
 					thash_entries,
 					(num_physpages >= 128 * 1024) ?
 						(25 - PAGE_SHIFT) :
@@ -2294,9 +2297,9 @@ void __init tcp_init(void)
 		INIT_HLIST_HEAD(&tcp_ehash[i].chain);
 	}
 
-	tcp_bhash = (struct tcp_bind_hashbucket *)
+	tcp_bhash =
 		alloc_large_system_hash("TCP bind",
-					sizeof(struct tcp_bind_hashbucket),
+					sizeof(struct inet_bind_hashbucket),
 					tcp_ehash_size,
 					(num_physpages >= 128 * 1024) ?
 						(25 - PAGE_SHIFT) :
@@ -2315,7 +2318,7 @@ void __init tcp_init(void)
 	 * on available memory.
 	 */
 	for (order = 0; ((1 << order) << PAGE_SHIFT) <
-			(tcp_bhash_size * sizeof(struct tcp_bind_hashbucket));
+			(tcp_bhash_size * sizeof(struct inet_bind_hashbucket));
 			order++)
 		;
 	if (order >= 4) {
-- 
cgit v1.2.2


From a55ebcc4c4532107ad9eee1c9bb698ab5f12c00f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Tue, 9 Aug 2005 20:01:14 -0700
Subject: [INET]: Move bind_hash from tcp_sk to inet_sk

This should really be in a inet_connection_sock, but I'm leaving it
for a later optimization, when some more fields common to INET
transport protocols now in tcp_sk or inet_sk will be chunked out into
inet_connection_sock, for now its better to concentrate on getting the
changes in the core merged to leave the DCCP tree with only DCCP
specific code.

Next changesets will take advantage of this move to generalise things
like tcp_bind_hash, tcp_put_port, tcp_inherit_port, making the later
receive a inet_hashinfo parameter, and even __tcp_tw_hashdance, etc in
the future, when tcp_tw_bucket gets transformed into the struct
timewait_sock hierarchy.

tcp_destroy_sock also is eligible as soon as tcp_orphan_count gets
moved to sk_prot.

A cascade of incremental changes will ultimately make the tcp_lookup
functions be fully generic.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1ec03db7dcd9..e54a410ca701 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1575,7 +1575,7 @@ void tcp_destroy_sock(struct sock *sk)
 	BUG_TRAP(sk_unhashed(sk));
 
 	/* If it has not 0 inet_sk(sk)->num, it must be bound */
-	BUG_TRAP(!inet_sk(sk)->num || tcp_sk(sk)->bind_hash);
+	BUG_TRAP(!inet_sk(sk)->num || inet_sk(sk)->bind_hash);
 
 	sk->sk_prot->destroy(sk);
 
@@ -1802,7 +1802,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tcp_sack_reset(&tp->rx_opt);
 	__sk_dst_reset(sk);
 
-	BUG_TRAP(!inet->num || tp->bind_hash);
+	BUG_TRAP(!inet->num || inet->bind_hash);
 
 	sk->sk_error_report(sk);
 	return err;
-- 
cgit v1.2.2


From 2d8c4ce51903636ce0f60addc8134aa50ab8fa76 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Tue, 9 Aug 2005 20:07:13 -0700
Subject: [INET]: Generalise tcp_bind_hash & tcp_inherit_port

This required moving tcp_bucket_cachep to inet_hashinfo.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e54a410ca701..38c04c1a754c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -271,10 +271,6 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
 
 DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
 
-kmem_cache_t *tcp_bucket_cachep;
-
-EXPORT_SYMBOL_GPL(tcp_bucket_cachep);
-
 kmem_cache_t *tcp_timewait_cachep;
 
 atomic_t tcp_orphan_count = ATOMIC_INIT(0);
-- 
cgit v1.2.2


From 6e04e02165a7209a71db553b7bc48d68421e5ebf Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Tue, 9 Aug 2005 20:07:35 -0700
Subject: [INET]: Move tcp_port_rover to inet_hashinfo

Also expose all of the tcp_hashinfo members, i.e. killing those
tcp_ehash, etc macros, this will more clearly expose already generic
functions and some that need just a bit of work to become generic, as
we'll see in the upcoming changesets.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 38c04c1a754c..2f4b1a374bb7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2257,11 +2257,11 @@ void __init tcp_init(void)
 		__skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb),
 					   sizeof(skb->cb));
 
-	tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket",
-					      sizeof(struct inet_bind_bucket),
-					      0, SLAB_HWCACHE_ALIGN,
-					      NULL, NULL);
-	if (!tcp_bucket_cachep)
+	tcp_hashinfo.bind_bucket_cachep =
+		kmem_cache_create("tcp_bind_bucket",
+				  sizeof(struct inet_bind_bucket), 0,
+				  SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!tcp_hashinfo.bind_bucket_cachep)
 		panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
 
 	tcp_timewait_cachep = kmem_cache_create("tcp_tw_bucket",
@@ -2276,7 +2276,7 @@ void __init tcp_init(void)
 	 *
 	 * The methodology is similar to that of the buffer cache.
 	 */
-	tcp_ehash =
+	tcp_hashinfo.ehash =
 		alloc_large_system_hash("TCP established",
 					sizeof(struct inet_ehash_bucket),
 					thash_entries,
@@ -2284,37 +2284,37 @@ void __init tcp_init(void)
 						(25 - PAGE_SHIFT) :
 						(27 - PAGE_SHIFT),
 					HASH_HIGHMEM,
-					&tcp_ehash_size,
+					&tcp_hashinfo.ehash_size,
 					NULL,
 					0);
-	tcp_ehash_size = (1 << tcp_ehash_size) >> 1;
-	for (i = 0; i < (tcp_ehash_size << 1); i++) {
-		rwlock_init(&tcp_ehash[i].lock);
-		INIT_HLIST_HEAD(&tcp_ehash[i].chain);
+	tcp_hashinfo.ehash_size = (1 << tcp_hashinfo.ehash_size) >> 1;
+	for (i = 0; i < (tcp_hashinfo.ehash_size << 1); i++) {
+		rwlock_init(&tcp_hashinfo.ehash[i].lock);
+		INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain);
 	}
 
-	tcp_bhash =
+	tcp_hashinfo.bhash =
 		alloc_large_system_hash("TCP bind",
 					sizeof(struct inet_bind_hashbucket),
-					tcp_ehash_size,
+					tcp_hashinfo.ehash_size,
 					(num_physpages >= 128 * 1024) ?
 						(25 - PAGE_SHIFT) :
 						(27 - PAGE_SHIFT),
 					HASH_HIGHMEM,
-					&tcp_bhash_size,
+					&tcp_hashinfo.bhash_size,
 					NULL,
 					64 * 1024);
-	tcp_bhash_size = 1 << tcp_bhash_size;
-	for (i = 0; i < tcp_bhash_size; i++) {
-		spin_lock_init(&tcp_bhash[i].lock);
-		INIT_HLIST_HEAD(&tcp_bhash[i].chain);
+	tcp_hashinfo.bhash_size = 1 << tcp_hashinfo.bhash_size;
+	for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
+		spin_lock_init(&tcp_hashinfo.bhash[i].lock);
+		INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
 	}
 
 	/* Try to be a bit smarter and adjust defaults depending
 	 * on available memory.
 	 */
 	for (order = 0; ((1 << order) << PAGE_SHIFT) <
-			(tcp_bhash_size * sizeof(struct inet_bind_hashbucket));
+			(tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket));
 			order++)
 		;
 	if (order >= 4) {
@@ -2329,7 +2329,7 @@ void __init tcp_init(void)
 		sysctl_tcp_max_orphans >>= (3 - order);
 		sysctl_max_syn_backlog = 128;
 	}
-	tcp_port_rover = sysctl_local_port_range[0] - 1;
+	tcp_hashinfo.port_rover = sysctl_local_port_range[0] - 1;
 
 	sysctl_tcp_mem[0] =  768 << order;
 	sysctl_tcp_mem[1] = 1024 << order;
@@ -2344,7 +2344,7 @@ void __init tcp_init(void)
 
 	printk(KERN_INFO "TCP: Hash tables configured "
 	       "(established %d bind %d)\n",
-	       tcp_ehash_size << 1, tcp_bhash_size);
+	       tcp_hashinfo.ehash_size << 1, tcp_hashinfo.bhash_size);
 
 	tcp_register_congestion_control(&tcp_reno);
 }
-- 
cgit v1.2.2


From 8feaf0c0a5488b3d898a9c207eb6678f44ba3f26 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Tue, 9 Aug 2005 20:09:30 -0700
Subject: [INET]: Generalise tcp_tw_bucket, aka TIME_WAIT sockets

This paves the way to generalise the rest of the sock ID lookup
routines and saves some bytes in TCPv4 TIME_WAIT sockets on distro
kernels (where IPv6 is always built as a module):

[root@qemu ~]# grep tw_sock /proc/slabinfo
tw_sock_TCPv6  0  0  128  31  1
tw_sock_TCP    0  0   96  41  1
[root@qemu ~]#

Now if a protocol wants to use the TIME_WAIT generic infrastructure it
only has to set the sk_prot->twsk_obj_size field with the size of its
inet_timewait_sock derived sock and proto_register will create
sk_prot->twsk_slab, for now its only for INET sockets, but we can
introduce timewait_sock later if some non INET transport protocolo
wants to use this stuff.

Next changesets will take advantage of this new infrastructure to
generalise even more TCP code.

[acme@toy net-2.6.14]$ grep built-in /tmp/before.size /tmp/after.size
/tmp/before.size: 188646   11764    5068  205478   322a6 net/ipv4/built-in.o
/tmp/after.size:  188144   11764    5068  204976   320b0 net/ipv4/built-in.o
[acme@toy net-2.6.14]$

Tested with both IPv4 & IPv6 (::1 (localhost) & ::ffff:172.20.0.1
(qemu host)).

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2f4b1a374bb7..f1a708bf7a97 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -271,8 +271,6 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
 
 DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
 
-kmem_cache_t *tcp_timewait_cachep;
-
 atomic_t tcp_orphan_count = ATOMIC_INIT(0);
 
 int sysctl_tcp_mem[3];
@@ -2264,13 +2262,6 @@ void __init tcp_init(void)
 	if (!tcp_hashinfo.bind_bucket_cachep)
 		panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
 
-	tcp_timewait_cachep = kmem_cache_create("tcp_tw_bucket",
-						sizeof(struct tcp_tw_bucket),
-						0, SLAB_HWCACHE_ALIGN,
-						NULL, NULL);
-	if (!tcp_timewait_cachep)
-		panic("tcp_init: Cannot alloc tcp_tw_bucket cache.");
-
 	/* Size and allocate the main established and bind bucket
 	 * hash tables.
 	 *
@@ -2363,4 +2354,3 @@ EXPORT_SYMBOL(tcp_sendpage);
 EXPORT_SYMBOL(tcp_setsockopt);
 EXPORT_SYMBOL(tcp_shutdown);
 EXPORT_SYMBOL(tcp_statistics);
-EXPORT_SYMBOL(tcp_timewait_cachep);
-- 
cgit v1.2.2


From 463c84b97f24010a67cd871746d6a7e4c925a5f9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Tue, 9 Aug 2005 20:10:42 -0700
Subject: [NET]: Introduce inet_connection_sock

This creates struct inet_connection_sock, moving members out of struct
tcp_sock that are shareable with other INET connection oriented
protocols, such as DCCP, that in my private tree already uses most of
these members.

The functions that operate on these members were renamed, using a
inet_csk_ prefix while not being moved yet to a new file, so as to
ease the review of these changes.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 90 ++++++++++++++++++++++++++++++----------------------------
 1 file changed, 47 insertions(+), 43 deletions(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f1a708bf7a97..8177b86570db 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -313,7 +313,7 @@ EXPORT_SYMBOL(tcp_enter_memory_pressure);
 static __inline__ unsigned int tcp_listen_poll(struct sock *sk,
 					       poll_table *wait)
 {
-	return !reqsk_queue_empty(&tcp_sk(sk)->accept_queue) ? (POLLIN | POLLRDNORM) : 0;
+	return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ? (POLLIN | POLLRDNORM) : 0;
 }
 
 /*
@@ -458,15 +458,15 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 int tcp_listen_start(struct sock *sk)
 {
 	struct inet_sock *inet = inet_sk(sk);
-	struct tcp_sock *tp = tcp_sk(sk);
-	int rc = reqsk_queue_alloc(&tp->accept_queue, TCP_SYNQ_HSIZE);
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, TCP_SYNQ_HSIZE);
 
 	if (rc != 0)
 		return rc;
 
 	sk->sk_max_ack_backlog = 0;
 	sk->sk_ack_backlog = 0;
-	tcp_delack_init(tp);
+	inet_csk_delack_init(sk);
 
 	/* There is race window here: we announce ourselves listening,
 	 * but this transition is still not validated by get_port().
@@ -484,7 +484,7 @@ int tcp_listen_start(struct sock *sk)
 	}
 
 	sk->sk_state = TCP_CLOSE;
-	__reqsk_queue_destroy(&tp->accept_queue);
+	__reqsk_queue_destroy(&icsk->icsk_accept_queue);
 	return -EADDRINUSE;
 }
 
@@ -495,14 +495,14 @@ int tcp_listen_start(struct sock *sk)
 
 static void tcp_listen_stop (struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct request_sock *acc_req;
 	struct request_sock *req;
 
-	tcp_delete_keepalive_timer(sk);
+	inet_csk_delete_keepalive_timer(sk);
 
 	/* make all the listen_opt local to us */
-	acc_req = reqsk_queue_yank_acceptq(&tp->accept_queue);
+	acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue);
 
 	/* Following specs, it would be better either to send FIN
 	 * (and enter FIN-WAIT-1, it is normal close)
@@ -512,7 +512,7 @@ static void tcp_listen_stop (struct sock *sk)
 	 * To be honest, we are not able to make either
 	 * of the variants now.			--ANK
 	 */
-	reqsk_queue_destroy(&tp->accept_queue);
+	reqsk_queue_destroy(&icsk->icsk_accept_queue);
 
 	while ((req = acc_req) != NULL) {
 		struct sock *child = req->sk;
@@ -1039,20 +1039,21 @@ static void cleanup_rbuf(struct sock *sk, int copied)
 	BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));
 #endif
 
-	if (tcp_ack_scheduled(tp)) {
+	if (inet_csk_ack_scheduled(sk)) {
+		const struct inet_connection_sock *icsk = inet_csk(sk);
 		   /* Delayed ACKs frequently hit locked sockets during bulk
 		    * receive. */
-		if (tp->ack.blocked ||
+		if (icsk->icsk_ack.blocked ||
 		    /* Once-per-two-segments ACK was not sent by tcp_input.c */
-		    tp->rcv_nxt - tp->rcv_wup > tp->ack.rcv_mss ||
+		    tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss ||
 		    /*
 		     * If this read emptied read buffer, we send ACK, if
 		     * connection is not bidirectional, user drained
 		     * receive buffer and there was a small segment
 		     * in queue.
 		     */
-		    (copied > 0 && (tp->ack.pending & TCP_ACK_PUSHED) &&
-		     !tp->ack.pingpong && !atomic_read(&sk->sk_rmem_alloc)))
+		    (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
+		     !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc)))
 			time_to_ack = 1;
 	}
 
@@ -1569,7 +1570,7 @@ void tcp_destroy_sock(struct sock *sk)
 	BUG_TRAP(sk_unhashed(sk));
 
 	/* If it has not 0 inet_sk(sk)->num, it must be bound */
-	BUG_TRAP(!inet_sk(sk)->num || inet_sk(sk)->bind_hash);
+	BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash);
 
 	sk->sk_prot->destroy(sk);
 
@@ -1698,10 +1699,10 @@ adjudge_to_death:
 			tcp_send_active_reset(sk, GFP_ATOMIC);
 			NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER);
 		} else {
-			int tmo = tcp_fin_time(tp);
+			const int tmo = tcp_fin_time(sk);
 
 			if (tmo > TCP_TIMEWAIT_LEN) {
-				tcp_reset_keepalive_timer(sk, tcp_fin_time(tp));
+				inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk));
 			} else {
 				atomic_inc(&tcp_orphan_count);
 				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
@@ -1746,6 +1747,7 @@ static inline int tcp_need_reset(int state)
 int tcp_disconnect(struct sock *sk, int flags)
 {
 	struct inet_sock *inet = inet_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	int err = 0;
 	int old_state = sk->sk_state;
@@ -1782,7 +1784,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tp->srtt = 0;
 	if ((tp->write_seq += tp->max_window + 2) == 0)
 		tp->write_seq = 1;
-	tp->backoff = 0;
+	icsk->icsk_backoff = 0;
 	tp->snd_cwnd = 2;
 	tp->probes_out = 0;
 	tp->packets_out = 0;
@@ -1790,13 +1792,13 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tp->snd_cwnd_cnt = 0;
 	tcp_set_ca_state(tp, TCP_CA_Open);
 	tcp_clear_retrans(tp);
-	tcp_delack_init(tp);
+	inet_csk_delack_init(sk);
 	sk->sk_send_head = NULL;
 	tp->rx_opt.saw_tstamp = 0;
 	tcp_sack_reset(&tp->rx_opt);
 	__sk_dst_reset(sk);
 
-	BUG_TRAP(!inet->num || inet->bind_hash);
+	BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
 
 	sk->sk_error_report(sk);
 	return err;
@@ -1808,7 +1810,7 @@ int tcp_disconnect(struct sock *sk, int flags)
  */
 static int wait_for_connect(struct sock *sk, long timeo)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	DEFINE_WAIT(wait);
 	int err;
 
@@ -1830,11 +1832,11 @@ static int wait_for_connect(struct sock *sk, long timeo)
 		prepare_to_wait_exclusive(sk->sk_sleep, &wait,
 					  TASK_INTERRUPTIBLE);
 		release_sock(sk);
-		if (reqsk_queue_empty(&tp->accept_queue))
+		if (reqsk_queue_empty(&icsk->icsk_accept_queue))
 			timeo = schedule_timeout(timeo);
 		lock_sock(sk);
 		err = 0;
-		if (!reqsk_queue_empty(&tp->accept_queue))
+		if (!reqsk_queue_empty(&icsk->icsk_accept_queue))
 			break;
 		err = -EINVAL;
 		if (sk->sk_state != TCP_LISTEN)
@@ -1854,9 +1856,9 @@ static int wait_for_connect(struct sock *sk, long timeo)
  *	This will accept the next outstanding connection.
  */
 
-struct sock *tcp_accept(struct sock *sk, int flags, int *err)
+struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct sock *newsk;
 	int error;
 
@@ -1870,7 +1872,7 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err)
 		goto out_err;
 
 	/* Find already established connection */
-	if (reqsk_queue_empty(&tp->accept_queue)) {
+	if (reqsk_queue_empty(&icsk->icsk_accept_queue)) {
 		long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
 
 		/* If this is a non blocking socket don't sleep */
@@ -1883,7 +1885,7 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err)
 			goto out_err;
 	}
 
-	newsk = reqsk_queue_get_child(&tp->accept_queue, sk);
+	newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk);
 	BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
 out:
 	release_sock(sk);
@@ -1901,6 +1903,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 		   int optlen)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	int val;
 	int err = 0;
 
@@ -1999,7 +2002,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 					elapsed = tp->keepalive_time - elapsed;
 				else
 					elapsed = 0;
-				tcp_reset_keepalive_timer(sk, elapsed);
+				inet_csk_reset_keepalive_timer(sk, elapsed);
 			}
 		}
 		break;
@@ -2019,7 +2022,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 		if (val < 1 || val > MAX_TCP_SYNCNT)
 			err = -EINVAL;
 		else
-			tp->syn_retries = val;
+			icsk->icsk_syn_retries = val;
 		break;
 
 	case TCP_LINGER2:
@@ -2058,16 +2061,16 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 
 	case TCP_QUICKACK:
 		if (!val) {
-			tp->ack.pingpong = 1;
+			icsk->icsk_ack.pingpong = 1;
 		} else {
-			tp->ack.pingpong = 0;
+			icsk->icsk_ack.pingpong = 0;
 			if ((1 << sk->sk_state) &
 			    (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
-			    tcp_ack_scheduled(tp)) {
-				tp->ack.pending |= TCP_ACK_PUSHED;
+			    inet_csk_ack_scheduled(sk)) {
+				icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
 				cleanup_rbuf(sk, 1);
 				if (!(val & 1))
-					tp->ack.pingpong = 1;
+					icsk->icsk_ack.pingpong = 1;
 			}
 		}
 		break;
@@ -2084,15 +2087,16 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 void tcp_get_info(struct sock *sk, struct tcp_info *info)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	const struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 now = tcp_time_stamp;
 
 	memset(info, 0, sizeof(*info));
 
 	info->tcpi_state = sk->sk_state;
 	info->tcpi_ca_state = tp->ca_state;
-	info->tcpi_retransmits = tp->retransmits;
+	info->tcpi_retransmits = icsk->icsk_retransmits;
 	info->tcpi_probes = tp->probes_out;
-	info->tcpi_backoff = tp->backoff;
+	info->tcpi_backoff = icsk->icsk_backoff;
 
 	if (tp->rx_opt.tstamp_ok)
 		info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
@@ -2107,10 +2111,10 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	if (tp->ecn_flags&TCP_ECN_OK)
 		info->tcpi_options |= TCPI_OPT_ECN;
 
-	info->tcpi_rto = jiffies_to_usecs(tp->rto);
-	info->tcpi_ato = jiffies_to_usecs(tp->ack.ato);
+	info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
+	info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato);
 	info->tcpi_snd_mss = tp->mss_cache;
-	info->tcpi_rcv_mss = tp->ack.rcv_mss;
+	info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
 
 	info->tcpi_unacked = tp->packets_out;
 	info->tcpi_sacked = tp->sacked_out;
@@ -2119,7 +2123,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_fackets = tp->fackets_out;
 
 	info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
-	info->tcpi_last_data_recv = jiffies_to_msecs(now - tp->ack.lrcvtime);
+	info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
 	info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
 
 	info->tcpi_pmtu = tp->pmtu_cookie;
@@ -2179,7 +2183,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
 		val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
 		break;
 	case TCP_SYNCNT:
-		val = tp->syn_retries ? : sysctl_tcp_syn_retries;
+		val = inet_csk(sk)->icsk_syn_retries ? : sysctl_tcp_syn_retries;
 		break;
 	case TCP_LINGER2:
 		val = tp->linger2;
@@ -2209,7 +2213,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
 		return 0;
 	}
 	case TCP_QUICKACK:
-		val = !tp->ack.pingpong;
+		val = !inet_csk(sk)->icsk_ack.pingpong;
 		break;
 
 	case TCP_CONGESTION:
@@ -2340,7 +2344,7 @@ void __init tcp_init(void)
 	tcp_register_congestion_control(&tcp_reno);
 }
 
-EXPORT_SYMBOL(tcp_accept);
+EXPORT_SYMBOL(inet_csk_accept);
 EXPORT_SYMBOL(tcp_close);
 EXPORT_SYMBOL(tcp_destroy_sock);
 EXPORT_SYMBOL(tcp_disconnect);
-- 
cgit v1.2.2


From 3f421baa4720b708022f8bcc52a61e5cd6f10bf8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Tue, 9 Aug 2005 20:11:08 -0700
Subject: [NET]: Just move the inet_connection_sock function from tcp sources

Completing the previous changeset, this also generalises tcp_v4_synq_add,
renaming it to inet_csk_reqsk_queue_hash_add, already geing used in the
DCCP tree, which I plan to merge RSN.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 93 ----------------------------------------------------------
 1 file changed, 93 deletions(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8177b86570db..581016a6a93f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1804,98 +1804,6 @@ int tcp_disconnect(struct sock *sk, int flags)
 	return err;
 }
 
-/*
- *	Wait for an incoming connection, avoid race
- *	conditions. This must be called with the socket locked.
- */
-static int wait_for_connect(struct sock *sk, long timeo)
-{
-	struct inet_connection_sock *icsk = inet_csk(sk);
-	DEFINE_WAIT(wait);
-	int err;
-
-	/*
-	 * True wake-one mechanism for incoming connections: only
-	 * one process gets woken up, not the 'whole herd'.
-	 * Since we do not 'race & poll' for established sockets
-	 * anymore, the common case will execute the loop only once.
-	 *
-	 * Subtle issue: "add_wait_queue_exclusive()" will be added
-	 * after any current non-exclusive waiters, and we know that
-	 * it will always _stay_ after any new non-exclusive waiters
-	 * because all non-exclusive waiters are added at the
-	 * beginning of the wait-queue. As such, it's ok to "drop"
-	 * our exclusiveness temporarily when we get woken up without
-	 * having to remove and re-insert us on the wait queue.
-	 */
-	for (;;) {
-		prepare_to_wait_exclusive(sk->sk_sleep, &wait,
-					  TASK_INTERRUPTIBLE);
-		release_sock(sk);
-		if (reqsk_queue_empty(&icsk->icsk_accept_queue))
-			timeo = schedule_timeout(timeo);
-		lock_sock(sk);
-		err = 0;
-		if (!reqsk_queue_empty(&icsk->icsk_accept_queue))
-			break;
-		err = -EINVAL;
-		if (sk->sk_state != TCP_LISTEN)
-			break;
-		err = sock_intr_errno(timeo);
-		if (signal_pending(current))
-			break;
-		err = -EAGAIN;
-		if (!timeo)
-			break;
-	}
-	finish_wait(sk->sk_sleep, &wait);
-	return err;
-}
-
-/*
- *	This will accept the next outstanding connection.
- */
-
-struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
-{
-	struct inet_connection_sock *icsk = inet_csk(sk);
-	struct sock *newsk;
-	int error;
-
-	lock_sock(sk);
-
-	/* We need to make sure that this socket is listening,
-	 * and that it has something pending.
-	 */
-	error = -EINVAL;
-	if (sk->sk_state != TCP_LISTEN)
-		goto out_err;
-
-	/* Find already established connection */
-	if (reqsk_queue_empty(&icsk->icsk_accept_queue)) {
-		long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
-
-		/* If this is a non blocking socket don't sleep */
-		error = -EAGAIN;
-		if (!timeo)
-			goto out_err;
-
-		error = wait_for_connect(sk, timeo);
-		if (error)
-			goto out_err;
-	}
-
-	newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk);
-	BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
-out:
-	release_sock(sk);
-	return newsk;
-out_err:
-	newsk = NULL;
-	*err = error;
-	goto out;
-}
-
 /*
  *	Socket option code for TCP.
  */
@@ -2344,7 +2252,6 @@ void __init tcp_init(void)
 	tcp_register_congestion_control(&tcp_reno);
 }
 
-EXPORT_SYMBOL(inet_csk_accept);
 EXPORT_SYMBOL(tcp_close);
 EXPORT_SYMBOL(tcp_destroy_sock);
 EXPORT_SYMBOL(tcp_disconnect);
-- 
cgit v1.2.2


From 0a5578cf8e5e045aaa68643c17ce885426697c6b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Tue, 9 Aug 2005 20:11:41 -0700
Subject: [ICSK]: Generalise tcp_listen_{start,stop}

This also moved inet_iif from tcp to inet_hashtables.h, as it is
needed by the inet_lookup callers, perhaps this needs a bit of
polishing, but for now seems fine.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 581016a6a93f..a1f812159ced 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -273,6 +273,8 @@ DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
 
 atomic_t tcp_orphan_count = ATOMIC_INIT(0);
 
+EXPORT_SYMBOL_GPL(tcp_orphan_count);
+
 int sysctl_tcp_mem[3];
 int sysctl_tcp_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
 int sysctl_tcp_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
@@ -454,12 +456,11 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 	return put_user(answ, (int __user *)arg);
 }
 
-
-int tcp_listen_start(struct sock *sk)
+int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
-	int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, TCP_SYNQ_HSIZE);
+	int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
 
 	if (rc != 0)
 		return rc;
@@ -488,12 +489,13 @@ int tcp_listen_start(struct sock *sk)
 	return -EADDRINUSE;
 }
 
+EXPORT_SYMBOL_GPL(inet_csk_listen_start);
+
 /*
  *	This routine closes sockets which have been at least partially
  *	opened, but not yet accepted.
  */
-
-static void tcp_listen_stop (struct sock *sk)
+static void inet_csk_listen_stop(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct request_sock *acc_req;
@@ -524,13 +526,13 @@ static void tcp_listen_stop (struct sock *sk)
 		BUG_TRAP(!sock_owned_by_user(child));
 		sock_hold(child);
 
-		tcp_disconnect(child, O_NONBLOCK);
+		sk->sk_prot->disconnect(child, O_NONBLOCK);
 
 		sock_orphan(child);
 
-		atomic_inc(&tcp_orphan_count);
+		atomic_inc(sk->sk_prot->orphan_count);
 
-		tcp_destroy_sock(child);
+		inet_csk_destroy_sock(child);
 
 		bh_unlock_sock(child);
 		local_bh_enable();
@@ -542,6 +544,8 @@ static void tcp_listen_stop (struct sock *sk)
 	BUG_TRAP(!sk->sk_ack_backlog);
 }
 
+EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
+
 static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
 {
 	TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
@@ -1561,7 +1565,7 @@ void tcp_shutdown(struct sock *sk, int how)
  * can assume the socket waitqueue is inactive and nobody will
  * try to jump onto it.
  */
-void tcp_destroy_sock(struct sock *sk)
+void inet_csk_destroy_sock(struct sock *sk)
 {
 	BUG_TRAP(sk->sk_state == TCP_CLOSE);
 	BUG_TRAP(sock_flag(sk, SOCK_DEAD));
@@ -1580,7 +1584,7 @@ void tcp_destroy_sock(struct sock *sk)
 
 	sk_refcnt_debug_release(sk);
 
-	atomic_dec(&tcp_orphan_count);
+	atomic_dec(sk->sk_prot->orphan_count);
 	sock_put(sk);
 }
 
@@ -1596,7 +1600,7 @@ void tcp_close(struct sock *sk, long timeout)
 		tcp_set_state(sk, TCP_CLOSE);
 
 		/* Special case. */
-		tcp_listen_stop(sk);
+		inet_csk_listen_stop(sk);
 
 		goto adjudge_to_death;
 	}
@@ -1704,7 +1708,7 @@ adjudge_to_death:
 			if (tmo > TCP_TIMEWAIT_LEN) {
 				inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk));
 			} else {
-				atomic_inc(&tcp_orphan_count);
+				atomic_inc(sk->sk_prot->orphan_count);
 				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
 				goto out;
 			}
@@ -1712,7 +1716,7 @@ adjudge_to_death:
 	}
 	if (sk->sk_state != TCP_CLOSE) {
 		sk_stream_mem_reclaim(sk);
-		if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans ||
+		if (atomic_read(sk->sk_prot->orphan_count) > sysctl_tcp_max_orphans ||
 		    (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
 		     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
 			if (net_ratelimit())
@@ -1723,10 +1727,10 @@ adjudge_to_death:
 			NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
 		}
 	}
-	atomic_inc(&tcp_orphan_count);
+	atomic_inc(sk->sk_prot->orphan_count);
 
 	if (sk->sk_state == TCP_CLOSE)
-		tcp_destroy_sock(sk);
+		inet_csk_destroy_sock(sk);
 	/* Otherwise, socket is reprieved until protocol close. */
 
 out:
@@ -1757,7 +1761,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 
 	/* ABORT function of RFC793 */
 	if (old_state == TCP_LISTEN) {
-		tcp_listen_stop(sk);
+		inet_csk_listen_stop(sk);
 	} else if (tcp_need_reset(old_state) ||
 		   (tp->snd_nxt != tp->write_seq &&
 		    (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
@@ -2253,7 +2257,7 @@ void __init tcp_init(void)
 }
 
 EXPORT_SYMBOL(tcp_close);
-EXPORT_SYMBOL(tcp_destroy_sock);
+EXPORT_SYMBOL(inet_csk_destroy_sock);
 EXPORT_SYMBOL(tcp_disconnect);
 EXPORT_SYMBOL(tcp_getsockopt);
 EXPORT_SYMBOL(tcp_ioctl);
-- 
cgit v1.2.2


From 295f7324ff8d9ea58b4d3ec93b1aaa1d80e048a9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Tue, 9 Aug 2005 20:11:56 -0700
Subject: [ICSK]: Introduce reqsk_queue_prune from code in tcp_synack_timer

With this we're very close to getting all of the current TCP
refactorings in my dccp-2.6 tree merged, next changeset will export
some functions needed by the current DCCP code and then dccp-2.6.git
will be born!

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a1f812159ced..a4e9eec44895 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -495,7 +495,7 @@ EXPORT_SYMBOL_GPL(inet_csk_listen_start);
  *	This routine closes sockets which have been at least partially
  *	opened, but not yet accepted.
  */
-static void inet_csk_listen_stop(struct sock *sk)
+void inet_csk_listen_stop(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct request_sock *acc_req;
@@ -1947,15 +1947,15 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 		break;
 
 	case TCP_DEFER_ACCEPT:
-		tp->defer_accept = 0;
+		icsk->icsk_accept_queue.rskq_defer_accept = 0;
 		if (val > 0) {
 			/* Translate value in seconds to number of
 			 * retransmits */
-			while (tp->defer_accept < 32 &&
+			while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
 			       val > ((TCP_TIMEOUT_INIT / HZ) <<
-				       tp->defer_accept))
-				tp->defer_accept++;
-			tp->defer_accept++;
+				       icsk->icsk_accept_queue.rskq_defer_accept))
+				icsk->icsk_accept_queue.rskq_defer_accept++;
+			icsk->icsk_accept_queue.rskq_defer_accept++;
 		}
 		break;
 
@@ -2058,6 +2058,7 @@ EXPORT_SYMBOL_GPL(tcp_get_info);
 int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
 		   int __user *optlen)
 {
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	int val, len;
 
@@ -2095,7 +2096,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
 		val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
 		break;
 	case TCP_SYNCNT:
-		val = inet_csk(sk)->icsk_syn_retries ? : sysctl_tcp_syn_retries;
+		val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
 		break;
 	case TCP_LINGER2:
 		val = tp->linger2;
@@ -2103,8 +2104,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
 			val = (val ? : sysctl_tcp_fin_timeout) / HZ;
 		break;
 	case TCP_DEFER_ACCEPT:
-		val = !tp->defer_accept ? 0 : ((TCP_TIMEOUT_INIT / HZ) <<
-					       (tp->defer_accept - 1));
+		val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
+			((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
 		break;
 	case TCP_WINDOW_CLAMP:
 		val = tp->window_clamp;
@@ -2125,7 +2126,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
 		return 0;
 	}
 	case TCP_QUICKACK:
-		val = !inet_csk(sk)->icsk_ack.pingpong;
+		val = !icsk->icsk_ack.pingpong;
 		break;
 
 	case TCP_CONGESTION:
-- 
cgit v1.2.2


From a019d6fe2b9da68ea4ba6cf3c4e86fc1dbf554c3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Tue, 9 Aug 2005 20:15:09 -0700
Subject: [ICSK]: Move generalised functions from tcp to inet_connection_sock

This also improves reqsk_queue_prune and renames it to
inet_csk_reqsk_queue_prune, as it deals with both inet_connection_sock
and inet_request_sock objects, not just with request_sock ones thus
belonging to inet_request_sock.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 120 ---------------------------------------------------------
 1 file changed, 120 deletions(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a4e9eec44895..4bda522d25cf 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -456,96 +456,6 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 	return put_user(answ, (int __user *)arg);
 }
 
-int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
-{
-	struct inet_sock *inet = inet_sk(sk);
-	struct inet_connection_sock *icsk = inet_csk(sk);
-	int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
-
-	if (rc != 0)
-		return rc;
-
-	sk->sk_max_ack_backlog = 0;
-	sk->sk_ack_backlog = 0;
-	inet_csk_delack_init(sk);
-
-	/* There is race window here: we announce ourselves listening,
-	 * but this transition is still not validated by get_port().
-	 * It is OK, because this socket enters to hash table only
-	 * after validation is complete.
-	 */
-	sk->sk_state = TCP_LISTEN;
-	if (!sk->sk_prot->get_port(sk, inet->num)) {
-		inet->sport = htons(inet->num);
-
-		sk_dst_reset(sk);
-		sk->sk_prot->hash(sk);
-
-		return 0;
-	}
-
-	sk->sk_state = TCP_CLOSE;
-	__reqsk_queue_destroy(&icsk->icsk_accept_queue);
-	return -EADDRINUSE;
-}
-
-EXPORT_SYMBOL_GPL(inet_csk_listen_start);
-
-/*
- *	This routine closes sockets which have been at least partially
- *	opened, but not yet accepted.
- */
-void inet_csk_listen_stop(struct sock *sk)
-{
-	struct inet_connection_sock *icsk = inet_csk(sk);
-	struct request_sock *acc_req;
-	struct request_sock *req;
-
-	inet_csk_delete_keepalive_timer(sk);
-
-	/* make all the listen_opt local to us */
-	acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue);
-
-	/* Following specs, it would be better either to send FIN
-	 * (and enter FIN-WAIT-1, it is normal close)
-	 * or to send active reset (abort).
-	 * Certainly, it is pretty dangerous while synflood, but it is
-	 * bad justification for our negligence 8)
-	 * To be honest, we are not able to make either
-	 * of the variants now.			--ANK
-	 */
-	reqsk_queue_destroy(&icsk->icsk_accept_queue);
-
-	while ((req = acc_req) != NULL) {
-		struct sock *child = req->sk;
-
-		acc_req = req->dl_next;
-
-		local_bh_disable();
-		bh_lock_sock(child);
-		BUG_TRAP(!sock_owned_by_user(child));
-		sock_hold(child);
-
-		sk->sk_prot->disconnect(child, O_NONBLOCK);
-
-		sock_orphan(child);
-
-		atomic_inc(sk->sk_prot->orphan_count);
-
-		inet_csk_destroy_sock(child);
-
-		bh_unlock_sock(child);
-		local_bh_enable();
-		sock_put(child);
-
-		sk_acceptq_removed(sk);
-		__reqsk_free(req);
-	}
-	BUG_TRAP(!sk->sk_ack_backlog);
-}
-
-EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
-
 static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
 {
 	TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
@@ -1559,35 +1469,6 @@ void tcp_shutdown(struct sock *sk, int how)
 	}
 }
 
-/*
- * At this point, there should be no process reference to this
- * socket, and thus no user references at all.  Therefore we
- * can assume the socket waitqueue is inactive and nobody will
- * try to jump onto it.
- */
-void inet_csk_destroy_sock(struct sock *sk)
-{
-	BUG_TRAP(sk->sk_state == TCP_CLOSE);
-	BUG_TRAP(sock_flag(sk, SOCK_DEAD));
-
-	/* It cannot be in hash table! */
-	BUG_TRAP(sk_unhashed(sk));
-
-	/* If it has not 0 inet_sk(sk)->num, it must be bound */
-	BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash);
-
-	sk->sk_prot->destroy(sk);
-
-	sk_stream_kill_queues(sk);
-
-	xfrm_sk_free_policy(sk);
-
-	sk_refcnt_debug_release(sk);
-
-	atomic_dec(sk->sk_prot->orphan_count);
-	sock_put(sk);
-}
-
 void tcp_close(struct sock *sk, long timeout)
 {
 	struct sk_buff *skb;
@@ -2258,7 +2139,6 @@ void __init tcp_init(void)
 }
 
 EXPORT_SYMBOL(tcp_close);
-EXPORT_SYMBOL(inet_csk_destroy_sock);
 EXPORT_SYMBOL(tcp_disconnect);
 EXPORT_SYMBOL(tcp_getsockopt);
 EXPORT_SYMBOL(tcp_ioctl);
-- 
cgit v1.2.2


From 295ff7edb8f72b77d524759266f7524deae379b3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Tue, 9 Aug 2005 20:44:40 -0700
Subject: [TIMEWAIT]: Introduce inet_timewait_death_row

That groups all of the tables and variables associated to the TCP timewait
schedulling/recycling/killing code, that now can be isolated from the TCP
specific code and used by other transport protocols, such as DCCP.

Next changeset will move this code to net/ipv4/inet_timewait_sock.c

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4bda522d25cf..0eed64a1991d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2109,12 +2109,12 @@ void __init tcp_init(void)
 	if (order >= 4) {
 		sysctl_local_port_range[0] = 32768;
 		sysctl_local_port_range[1] = 61000;
-		sysctl_tcp_max_tw_buckets = 180000;
+		tcp_death_row.sysctl_max_tw_buckets = 180000;
 		sysctl_tcp_max_orphans = 4096 << (order - 4);
 		sysctl_max_syn_backlog = 1024;
 	} else if (order < 3) {
 		sysctl_local_port_range[0] = 1024 * (3 - order);
-		sysctl_tcp_max_tw_buckets >>= (3 - order);
+		tcp_death_row.sysctl_max_tw_buckets >>= (3 - order);
 		sysctl_tcp_max_orphans >>= (3 - order);
 		sysctl_max_syn_backlog = 128;
 	}
-- 
cgit v1.2.2


From 6687e988d9aeaccad6774e6a8304f681f3ec0a03 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Wed, 10 Aug 2005 04:03:31 -0300
Subject: [ICSK]: Move TCP congestion avoidance members to icsk

This changeset basically moves tcp_sk()->{ca_ops,ca_state,etc} to inet_csk(),
minimal renaming/moving done in this changeset to ease review.

Most of it is just changes of struct tcp_sock * to struct sock * parameters.

With this we move to a state closer to two interesting goals:

1. Generalisation of net/ipv4/tcp_diag.c, becoming inet_diag.c, being used
   for any INET transport protocol that has struct inet_hashinfo and are
   derived from struct inet_connection_sock. Keeps the userspace API, that will
   just not display DCCP sockets, while newer versions of tools can support
   DCCP.

2. INET generic transport pluggable Congestion Avoidance infrastructure, using
   the current TCP CA infrastructure with DCCP.

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0eed64a1991d..02848e72e9c1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1671,11 +1671,11 @@ int tcp_disconnect(struct sock *sk, int flags)
 		tp->write_seq = 1;
 	icsk->icsk_backoff = 0;
 	tp->snd_cwnd = 2;
-	tp->probes_out = 0;
+	icsk->icsk_probes_out = 0;
 	tp->packets_out = 0;
 	tp->snd_ssthresh = 0x7fffffff;
 	tp->snd_cwnd_cnt = 0;
-	tcp_set_ca_state(tp, TCP_CA_Open);
+	tcp_set_ca_state(sk, TCP_CA_Open);
 	tcp_clear_retrans(tp);
 	inet_csk_delack_init(sk);
 	sk->sk_send_head = NULL;
@@ -1718,7 +1718,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 		name[val] = 0;
 
 		lock_sock(sk);
-		err = tcp_set_congestion_control(tp, name);
+		err = tcp_set_congestion_control(sk, name);
 		release_sock(sk);
 		return err;
 	}
@@ -1886,9 +1886,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	memset(info, 0, sizeof(*info));
 
 	info->tcpi_state = sk->sk_state;
-	info->tcpi_ca_state = tp->ca_state;
+	info->tcpi_ca_state = icsk->icsk_ca_state;
 	info->tcpi_retransmits = icsk->icsk_retransmits;
-	info->tcpi_probes = tp->probes_out;
+	info->tcpi_probes = icsk->icsk_probes_out;
 	info->tcpi_backoff = icsk->icsk_backoff;
 
 	if (tp->rx_opt.tstamp_ok)
@@ -2016,7 +2016,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
 		len = min_t(unsigned int, len, TCP_CA_NAME_MAX);
 		if (put_user(len, optlen))
 			return -EFAULT;
-		if (copy_to_user(optval, tp->ca_ops->name, len))
+		if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
 			return -EFAULT;
 		return 0;
 	default:
-- 
cgit v1.2.2


From dc40c7bc76054f5e4382835ca2bafb895b993a8a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Tue, 23 Aug 2005 21:52:58 -0700
Subject: [ICSK]: Generalise tcp_listen_poll

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 02848e72e9c1..68626de6d69c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -309,15 +309,6 @@ void tcp_enter_memory_pressure(void)
 
 EXPORT_SYMBOL(tcp_enter_memory_pressure);
 
-/*
- * LISTEN is a special case for poll..
- */
-static __inline__ unsigned int tcp_listen_poll(struct sock *sk,
-					       poll_table *wait)
-{
-	return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ? (POLLIN | POLLRDNORM) : 0;
-}
-
 /*
  *	Wait for a TCP event.
  *
@@ -333,7 +324,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 
 	poll_wait(file, sk->sk_sleep, wait);
 	if (sk->sk_state == TCP_LISTEN)
-		return tcp_listen_poll(sk, wait);
+		return inet_csk_listen_poll(sk);
 
 	/* Socket is not locked. We are protected from async events
 	   by poll logic and correct handling of state changes
-- 
cgit v1.2.2


From ba89966c1984513f4f2cc0a6c182266be44ddd03 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Fri, 26 Aug 2005 12:05:31 -0700
Subject: [NET]: use __read_mostly on kmem_cache_t , DEFINE_SNMP_STAT pointers

This patch puts mostly read only data in the right section
(read_mostly), to help sharing of these data between CPUS without
memory ping pongs.

On one of my production machine, tcp_statistics was sitting in a
heavily modified cache line, so *every* SNMP update had to force a
reload.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 68626de6d69c..02fdda68718d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -269,7 +269,7 @@
 
 int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
 
-DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
+DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly;
 
 atomic_t tcp_orphan_count = ATOMIC_INIT(0);
 
-- 
cgit v1.2.2