[NET]: Size listen hash tables using backlog hint

We currently allocate a fixed size (TCP_SYNQ_HSIZE=512) slots hash table for each LISTEN socket, regardless of various parameters (listen backlog for example) On x86_64, this means order-1 allocations (might fail), even for 'small' sockets, expecting few connections. On the contrary, a huge server wanting a backlog of 50000 is slowed down a bit because of this fixed limit. This patch makes the sizing of listen hash table a dynamic parameter, depending of : - net.core.somaxconn tunable (default is 128) - net.ipv4.tcp_max_syn_backlog tunable (default : 256, 1024 or 128) - backlog value given by user application (2nd parameter of listen()) For large allocations (bigger than PAGE_SIZE), we use vmalloc() instead of kmalloc(). We still limit memory allocation with the two existing tunables (somaxconn & tcp_max_syn_backlog). So for standard setups, this patch actually reduce RAM usage. Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Eric Dumazet <dada1@cosmosbay.com> 2006-11-16 05:30:37 -0500
committer: David S. Miller <davem@sunset.davemloft.net> 2006-12-03 00:21:44 -0500
commit: 72a3effaf633bcae9034b7e176bdbd78d64a71db (patch)
tree: b7a331527f1b15335a358f97809134f35587e57a /net/core
parent: 3c62f75aac7348ee262b1295cfcfeb3473f76815 (diff)
1 files changed, 25 insertions, 10 deletions
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 79ebd75fbe4d..5f0818d815e6 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -15,6 +15,7 @@
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/vmalloc.h>
 #include <net/request_sock.h>
@@ -29,22 +30,31 @@
 * it is absolutely not enough even at 100conn/sec. 256 cures most
 * of problems. This value is adjusted to 128 for very small machines
 * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
- * Further increasing requires to change hash table size.
+ * Note : Dont forget somaxconn that may limit backlog too.
 */
 int sysctl_max_syn_backlog = 256;
 int reqsk_queue_alloc(struct request_sock_queue *queue,
-                      const int nr_table_entries)
+                      unsigned int nr_table_entries)
 {
-        const int lopt_size = sizeof(struct listen_sock) +
+        size_t lopt_size = sizeof(struct listen_sock);
-                              nr_table_entries * sizeof(struct request_sock *);
+        struct listen_sock *lopt;
-        struct listen_sock *lopt = kzalloc(lopt_size, GFP_KERNEL);
+        nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);
+        nr_table_entries = max_t(u32, nr_table_entries, 8);
+        nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
+        lopt_size += nr_table_entries * sizeof(struct request_sock *);
+        if (lopt_size > PAGE_SIZE)
+                lopt = __vmalloc(lopt_size,
+                        GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
+                        PAGE_KERNEL);
+        else
+                lopt = kzalloc(lopt_size, GFP_KERNEL);
        if (lopt == NULL)
                return -ENOMEM;
-        for (lopt->max_qlen_log = 6;
+        for (lopt->max_qlen_log = 3;
-             (1 << lopt->max_qlen_log) < sysctl_max_syn_backlog;
+             (1 << lopt->max_qlen_log) < nr_table_entries;
             lopt->max_qlen_log++);
        get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
@@ -65,9 +75,11 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
 {
        /* make all the listen_opt local to us */
        struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
+        size_t lopt_size = sizeof(struct listen_sock) +
+                lopt->nr_table_entries * sizeof(struct request_sock *);
        if (lopt->qlen != 0) {
-                int i;
+                unsigned int i;
                for (i = 0; i < lopt->nr_table_entries; i++) {
                        struct request_sock *req;
@@ -81,7 +93,10 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
        }
        BUG_TRAP(lopt->qlen == 0);
-        kfree(lopt);
+        if (lopt_size > PAGE_SIZE)
+                vfree(lopt);
+        else
+                kfree(lopt);
 }
 EXPORT_SYMBOL(reqsk_queue_destroy);
author	Eric Dumazet <dada1@cosmosbay.com>	2006-11-16 05:30:37 -0500
committer	David S. Miller <davem@sunset.davemloft.net>	2006-12-03 00:21:44 -0500
commit	72a3effaf633bcae9034b7e176bdbd78d64a71db (patch)
tree	b7a331527f1b15335a358f97809134f35587e57a /net/core
parent	3c62f75aac7348ee262b1295cfcfeb3473f76815 (diff)

diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 79ebd75fbe4d..5f0818d815e6 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c
@@ -15,6 +15,7 @@
15	#include <linux/random.h>	15	#include <linux/random.h>
16	#include <linux/slab.h>	16	#include <linux/slab.h>
17	#include <linux/string.h>	17	#include <linux/string.h>
		18	#include <linux/vmalloc.h>
18		19
19	#include <net/request_sock.h>	20	#include <net/request_sock.h>
20		21
@@ -29,22 +30,31 @@
29	* it is absolutely not enough even at 100conn/sec. 256 cures most	30	* it is absolutely not enough even at 100conn/sec. 256 cures most
30	* of problems. This value is adjusted to 128 for very small machines	31	* of problems. This value is adjusted to 128 for very small machines
31	* (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).	32	* (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
32	* Further increasing requires to change hash table size.	33	* Note : Dont forget somaxconn that may limit backlog too.
33	*/	34	*/
34	int sysctl_max_syn_backlog = 256;	35	int sysctl_max_syn_backlog = 256;
35		36
36	int reqsk_queue_alloc(struct request_sock_queue *queue,	37	int reqsk_queue_alloc(struct request_sock_queue *queue,
37	const int nr_table_entries)	38	unsigned int nr_table_entries)
38	{	39	{
39	const int lopt_size = sizeof(struct listen_sock) +	40	size_t lopt_size = sizeof(struct listen_sock);
40	nr_table_entries * sizeof(struct request_sock *);	41	struct listen_sock *lopt;
41	struct listen_sock *lopt = kzalloc(lopt_size, GFP_KERNEL);	42
42		43	nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);
		44	nr_table_entries = max_t(u32, nr_table_entries, 8);
		45	nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
		46	lopt_size += nr_table_entries * sizeof(struct request_sock *);
		47	if (lopt_size > PAGE_SIZE)
		48	lopt = __vmalloc(lopt_size,
		49	GFP_KERNEL \| __GFP_HIGHMEM \| __GFP_ZERO,
		50	PAGE_KERNEL);
		51	else
		52	lopt = kzalloc(lopt_size, GFP_KERNEL);
43	if (lopt == NULL)	53	if (lopt == NULL)
44	return -ENOMEM;	54	return -ENOMEM;
45		55
46	for (lopt->max_qlen_log = 6;	56	for (lopt->max_qlen_log = 3;
47	(1 << lopt->max_qlen_log) < sysctl_max_syn_backlog;	57	(1 << lopt->max_qlen_log) < nr_table_entries;
48	lopt->max_qlen_log++);	58	lopt->max_qlen_log++);
49		59
50	get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));	60	get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
@@ -65,9 +75,11 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
65	{	75	{
66	/* make all the listen_opt local to us */	76	/* make all the listen_opt local to us */
67	struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);	77	struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
		78	size_t lopt_size = sizeof(struct listen_sock) +
		79	lopt->nr_table_entries * sizeof(struct request_sock *);
68		80
69	if (lopt->qlen != 0) {	81	if (lopt->qlen != 0) {
70	int i;	82	unsigned int i;
71		83
72	for (i = 0; i < lopt->nr_table_entries; i++) {	84	for (i = 0; i < lopt->nr_table_entries; i++) {
73	struct request_sock *req;	85	struct request_sock *req;
@@ -81,7 +93,10 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
81	}	93	}
82		94
83	BUG_TRAP(lopt->qlen == 0);	95	BUG_TRAP(lopt->qlen == 0);
84	kfree(lopt);	96	if (lopt_size > PAGE_SIZE)
		97	vfree(lopt);
		98	else
		99	kfree(lopt);
85	}	100	}
86		101
87	EXPORT_SYMBOL(reqsk_queue_destroy);	102	EXPORT_SYMBOL(reqsk_queue_destroy);