aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeil Horman <nhorman@tuxdriver.com>2009-07-30 21:52:15 -0400
committerDavid S. Miller <davem@davemloft.net>2009-07-30 21:52:15 -0400
commita33bc5c15154c835aae26f16e6a3a7d9ad4acb45 (patch)
treecf7683b1b2d2fd170bfc6650ea84a4b9a81eebbf
parent9aada7ac047f789ffb27540cc1695989897b2dfe (diff)
xfrm: select sane defaults for xfrm[4|6] gc_thresh
Choose saner defaults for xfrm[4|6] gc_thresh values on init Currently, the xfrm[4|6] code has hard-coded initial gc_thresh values (set to 1024). Given that the ipv4 and ipv6 routing caches are sized dynamically at boot time, the static selections can be non-sensical. This patch dynamically selects an appropriate gc threshold based on the corresponding main routing table size, using the assumption that we should in the worst case be able to handle as many connections as the routing table can. For ipv4, the maximum route cache size is 16 * the number of hash buckets in the route cache. Given that xfrm4 starts garbage collection at the gc_thresh and prevents new allocations at 2 * gc_thresh, we set gc_thresh to half the maximum route cache size. For ipv6, its a bit trickier. there is no maximum route cache size, but the ipv6 dst_ops gc_thresh is statically set to 1024. It seems sane to select a simmilar gc_thresh for the xfrm6 code that is half the number of hash buckets in the v6 route cache times 16 (like the v4 code does). Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/ip6_fib.h6
-rw-r--r--include/net/xfrm.h2
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/xfrm4_policy.c13
-rw-r--r--net/ipv6/ip6_fib.c16
-rw-r--r--net/ipv6/xfrm6_policy.c15
6 files changed, 40 insertions, 14 deletions
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 7c5c0f79168a..15b492a9aa79 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -22,6 +22,12 @@
22#include <net/flow.h> 22#include <net/flow.h>
23#include <net/netlink.h> 23#include <net/netlink.h>
24 24
25#ifdef CONFIG_IPV6_MULTIPLE_TABLES
26#define FIB6_TABLE_HASHSZ 256
27#else
28#define FIB6_TABLE_HASHSZ 1
29#endif
30
25struct rt6_info; 31struct rt6_info;
26 32
27struct fib6_config 33struct fib6_config
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 9e3a3f4c1f60..223e90a44824 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1280,7 +1280,7 @@ struct xfrm6_tunnel {
1280}; 1280};
1281 1281
1282extern void xfrm_init(void); 1282extern void xfrm_init(void);
1283extern void xfrm4_init(void); 1283extern void xfrm4_init(int rt_hash_size);
1284extern int xfrm_state_init(struct net *net); 1284extern int xfrm_state_init(struct net *net);
1285extern void xfrm_state_fini(struct net *net); 1285extern void xfrm_state_fini(struct net *net);
1286extern void xfrm4_state_init(void); 1286extern void xfrm4_state_init(void);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 278f46f5011b..fafbe163e2b5 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3442,7 +3442,7 @@ int __init ip_rt_init(void)
3442 printk(KERN_ERR "Unable to create route proc files\n"); 3442 printk(KERN_ERR "Unable to create route proc files\n");
3443#ifdef CONFIG_XFRM 3443#ifdef CONFIG_XFRM
3444 xfrm_init(); 3444 xfrm_init();
3445 xfrm4_init(); 3445 xfrm4_init(ip_rt_max_size);
3446#endif 3446#endif
3447 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); 3447 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL);
3448 3448
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 26496babdf3a..1ba44742ebbf 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -290,10 +290,21 @@ static void __exit xfrm4_policy_fini(void)
290 xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo); 290 xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo);
291} 291}
292 292
293void __init xfrm4_init(void) 293void __init xfrm4_init(int rt_max_size)
294{ 294{
295 xfrm4_state_init(); 295 xfrm4_state_init();
296 xfrm4_policy_init(); 296 xfrm4_policy_init();
297 /*
298 * Select a default value for the gc_thresh based on the main route
299 * table hash size. It seems to me the worst case scenario is when
300 * we have ipsec operating in transport mode, in which we create a
301 * dst_entry per socket. The xfrm gc algorithm starts trying to remove
302 * entries at gc_thresh, and prevents new allocations as 2*gc_thresh
303 * so lets set an initial xfrm gc_thresh value at the rt_max_size/2.
304 * That will let us store an ipsec connection per route table entry,
305 * and start cleaning when were 1/2 full
306 */
307 xfrm4_dst_ops.gc_thresh = rt_max_size/2;
297 sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path, 308 sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path,
298 xfrm4_policy_table); 309 xfrm4_policy_table);
299} 310}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 52ee1dced2ff..0e93ca56eb69 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -164,12 +164,6 @@ static __inline__ void rt6_release(struct rt6_info *rt)
164 dst_free(&rt->u.dst); 164 dst_free(&rt->u.dst);
165} 165}
166 166
167#ifdef CONFIG_IPV6_MULTIPLE_TABLES
168#define FIB_TABLE_HASHSZ 256
169#else
170#define FIB_TABLE_HASHSZ 1
171#endif
172
173static void fib6_link_table(struct net *net, struct fib6_table *tb) 167static void fib6_link_table(struct net *net, struct fib6_table *tb)
174{ 168{
175 unsigned int h; 169 unsigned int h;
@@ -180,7 +174,7 @@ static void fib6_link_table(struct net *net, struct fib6_table *tb)
180 */ 174 */
181 rwlock_init(&tb->tb6_lock); 175 rwlock_init(&tb->tb6_lock);
182 176
183 h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1); 177 h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1);
184 178
185 /* 179 /*
186 * No protection necessary, this is the only list mutatation 180 * No protection necessary, this is the only list mutatation
@@ -231,7 +225,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
231 225
232 if (id == 0) 226 if (id == 0)
233 id = RT6_TABLE_MAIN; 227 id = RT6_TABLE_MAIN;
234 h = id & (FIB_TABLE_HASHSZ - 1); 228 h = id & (FIB6_TABLE_HASHSZ - 1);
235 rcu_read_lock(); 229 rcu_read_lock();
236 head = &net->ipv6.fib_table_hash[h]; 230 head = &net->ipv6.fib_table_hash[h];
237 hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) { 231 hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) {
@@ -382,7 +376,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
382 arg.net = net; 376 arg.net = net;
383 w->args = &arg; 377 w->args = &arg;
384 378
385 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { 379 for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
386 e = 0; 380 e = 0;
387 head = &net->ipv6.fib_table_hash[h]; 381 head = &net->ipv6.fib_table_hash[h];
388 hlist_for_each_entry(tb, node, head, tb6_hlist) { 382 hlist_for_each_entry(tb, node, head, tb6_hlist) {
@@ -1368,7 +1362,7 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
1368 unsigned int h; 1362 unsigned int h;
1369 1363
1370 rcu_read_lock(); 1364 rcu_read_lock();
1371 for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 1365 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
1372 head = &net->ipv6.fib_table_hash[h]; 1366 head = &net->ipv6.fib_table_hash[h];
1373 hlist_for_each_entry_rcu(table, node, head, tb6_hlist) { 1367 hlist_for_each_entry_rcu(table, node, head, tb6_hlist) {
1374 write_lock_bh(&table->tb6_lock); 1368 write_lock_bh(&table->tb6_lock);
@@ -1483,7 +1477,7 @@ static int fib6_net_init(struct net *net)
1483 if (!net->ipv6.rt6_stats) 1477 if (!net->ipv6.rt6_stats)
1484 goto out_timer; 1478 goto out_timer;
1485 1479
1486 net->ipv6.fib_table_hash = kcalloc(FIB_TABLE_HASHSZ, 1480 net->ipv6.fib_table_hash = kcalloc(FIB6_TABLE_HASHSZ,
1487 sizeof(*net->ipv6.fib_table_hash), 1481 sizeof(*net->ipv6.fib_table_hash),
1488 GFP_KERNEL); 1482 GFP_KERNEL);
1489 if (!net->ipv6.fib_table_hash) 1483 if (!net->ipv6.fib_table_hash)
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 4acc308eac7f..611cffcf554f 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -323,6 +323,7 @@ static struct ctl_table_header *sysctl_hdr;
323int __init xfrm6_init(void) 323int __init xfrm6_init(void)
324{ 324{
325 int ret; 325 int ret;
326 unsigned int gc_thresh;
326 327
327 ret = xfrm6_policy_init(); 328 ret = xfrm6_policy_init();
328 if (ret) 329 if (ret)
@@ -331,6 +332,20 @@ int __init xfrm6_init(void)
331 ret = xfrm6_state_init(); 332 ret = xfrm6_state_init();
332 if (ret) 333 if (ret)
333 goto out_policy; 334 goto out_policy;
335 /*
336 * We need a good default value for the xfrm6 gc threshold.
337 * In ipv4 we set it to the route hash table size * 8, which
338 * is half the size of the maximaum route cache for ipv4. It
339 * would be good to do the same thing for v6, except the table is
340 * constructed differently here. Here each table for a net namespace
341 * can have FIB_TABLE_HASHSZ entries, so lets go with the same
342 * computation that we used for ipv4 here. Also, lets keep the initial
343 * gc_thresh to a minimum of 1024, since, the ipv6 route cache defaults
344 * to that as a minimum as well
345 */
346 gc_thresh = FIB6_TABLE_HASHSZ * 8;
347 xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh;
348
334 sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv6_ctl_path, 349 sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv6_ctl_path,
335 xfrm6_policy_table); 350 xfrm6_policy_table);
336out: 351out: