aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@sunset.davemloft.net>2006-08-24 06:08:07 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2006-09-22 18:08:41 -0400
commitf034b5d4efdfe0fb9e2a1ce1d95fa7914f24de49 (patch)
treee166f1e87606f7e53a78cac543284c3484481727
parent8f126e37c0b250310a48a609bedf92a19a5559ec (diff)
[XFRM]: Dynamic xfrm_state hash table sizing.
The grow algorithm is simple, we grow if: 1) we see a hash chain collision at insert, and 2) we haven't hit the hash size limit (currently 1*1024*1024 slots), and 3) the number of xfrm_state objects is > the current hash mask All of this needs some tweaking. Remove __initdata from "hashdist" so we can use it safely at run time. Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/bootmem.h2
-rw-r--r--mm/page_alloc.c2
-rw-r--r--net/xfrm/xfrm_state.c247
3 files changed, 197 insertions, 54 deletions
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 1021f508d82c..e319c649e4fd 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -114,7 +114,7 @@ extern void *__init alloc_large_system_hash(const char *tablename,
114#else 114#else
115#define HASHDIST_DEFAULT 0 115#define HASHDIST_DEFAULT 0
116#endif 116#endif
117extern int __initdata hashdist; /* Distribute hashes across NUMA nodes? */ 117extern int hashdist; /* Distribute hashes across NUMA nodes? */
118 118
119 119
120#endif /* _LINUX_BOOTMEM_H */ 120#endif /* _LINUX_BOOTMEM_H */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 54a4f5375bba..3b5358a0561f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2363,7 +2363,7 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
2363 return 0; 2363 return 0;
2364} 2364}
2365 2365
2366__initdata int hashdist = HASHDIST_DEFAULT; 2366int hashdist = HASHDIST_DEFAULT;
2367 2367
2368#ifdef CONFIG_NUMA 2368#ifdef CONFIG_NUMA
2369static int __init set_hashdist(char *str) 2369static int __init set_hashdist(char *str)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index fe3c8c38d5e1..445263c54c94 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -18,6 +18,9 @@
18#include <linux/pfkeyv2.h> 18#include <linux/pfkeyv2.h>
19#include <linux/ipsec.h> 19#include <linux/ipsec.h>
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/bootmem.h>
22#include <linux/vmalloc.h>
23#include <linux/cache.h>
21#include <asm/uaccess.h> 24#include <asm/uaccess.h>
22 25
23struct sock *xfrm_nl; 26struct sock *xfrm_nl;
@@ -38,102 +41,230 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
38 41
39static DEFINE_SPINLOCK(xfrm_state_lock); 42static DEFINE_SPINLOCK(xfrm_state_lock);
40 43
41#define XFRM_DST_HSIZE 1024
42
43/* Hash table to find appropriate SA towards given target (endpoint 44/* Hash table to find appropriate SA towards given target (endpoint
44 * of tunnel or destination of transport mode) allowed by selector. 45 * of tunnel or destination of transport mode) allowed by selector.
45 * 46 *
46 * Main use is finding SA after policy selected tunnel or transport mode. 47 * Main use is finding SA after policy selected tunnel or transport mode.
47 * Also, it can be used by ah/esp icmp error handler to find offending SA. 48 * Also, it can be used by ah/esp icmp error handler to find offending SA.
48 */ 49 */
49static struct hlist_head xfrm_state_bydst[XFRM_DST_HSIZE]; 50static struct hlist_head *xfrm_state_bydst __read_mostly;
50static struct hlist_head xfrm_state_bysrc[XFRM_DST_HSIZE]; 51static struct hlist_head *xfrm_state_bysrc __read_mostly;
51static struct hlist_head xfrm_state_byspi[XFRM_DST_HSIZE]; 52static struct hlist_head *xfrm_state_byspi __read_mostly;
52 53static unsigned int xfrm_state_hmask __read_mostly;
53static __inline__ 54static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
54unsigned __xfrm4_dst_hash(xfrm_address_t *addr) 55static unsigned int xfrm_state_num;
56
57static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask)
55{ 58{
56 unsigned h; 59 unsigned int h;
57 h = ntohl(addr->a4); 60 h = ntohl(addr->a4);
58 h = (h ^ (h>>16)) % XFRM_DST_HSIZE; 61 h = (h ^ (h>>16)) & hmask;
59 return h; 62 return h;
60} 63}
61 64
62static __inline__ 65static inline unsigned int __xfrm6_dst_hash(xfrm_address_t *addr, unsigned int hmask)
63unsigned __xfrm6_dst_hash(xfrm_address_t *addr)
64{ 66{
65 unsigned h; 67 unsigned int h;
66 h = ntohl(addr->a6[2]^addr->a6[3]); 68 h = ntohl(addr->a6[2]^addr->a6[3]);
67 h = (h ^ (h>>16)) % XFRM_DST_HSIZE; 69 h = (h ^ (h>>16)) & hmask;
68 return h; 70 return h;
69} 71}
70 72
71static __inline__ 73static inline unsigned int __xfrm4_src_hash(xfrm_address_t *addr, unsigned int hmask)
72unsigned __xfrm4_src_hash(xfrm_address_t *addr)
73{ 74{
74 return __xfrm4_dst_hash(addr); 75 return __xfrm4_dst_hash(addr, hmask);
75} 76}
76 77
77static __inline__ 78static inline unsigned int __xfrm6_src_hash(xfrm_address_t *addr, unsigned int hmask)
78unsigned __xfrm6_src_hash(xfrm_address_t *addr)
79{ 79{
80 return __xfrm6_dst_hash(addr); 80 return __xfrm6_dst_hash(addr, hmask);
81} 81}
82 82
83static __inline__ 83static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask)
84unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
85{ 84{
86 switch (family) { 85 switch (family) {
87 case AF_INET: 86 case AF_INET:
88 return __xfrm4_src_hash(addr); 87 return __xfrm4_src_hash(addr, hmask);
89 case AF_INET6: 88 case AF_INET6:
90 return __xfrm6_src_hash(addr); 89 return __xfrm6_src_hash(addr, hmask);
91 } 90 }
92 return 0; 91 return 0;
93} 92}
94 93
95static __inline__ 94static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
96unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) 95{
96 return __xfrm_src_hash(addr, family, xfrm_state_hmask);
97}
98
99static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask)
97{ 100{
98 switch (family) { 101 switch (family) {
99 case AF_INET: 102 case AF_INET:
100 return __xfrm4_dst_hash(addr); 103 return __xfrm4_dst_hash(addr, hmask);
101 case AF_INET6: 104 case AF_INET6:
102 return __xfrm6_dst_hash(addr); 105 return __xfrm6_dst_hash(addr, hmask);
103 } 106 }
104 return 0; 107 return 0;
105} 108}
106 109
107static __inline__ 110static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, unsigned short family)
108unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) 111{
112 return __xfrm_dst_hash(addr, family, xfrm_state_hmask);
113}
114
115static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
116 unsigned int hmask)
109{ 117{
110 unsigned h; 118 unsigned int h;
111 h = ntohl(addr->a4^spi^proto); 119 h = ntohl(addr->a4^spi^proto);
112 h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; 120 h = (h ^ (h>>10) ^ (h>>20)) & hmask;
113 return h; 121 return h;
114} 122}
115 123
116static __inline__ 124static inline unsigned int __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
117unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) 125 unsigned int hmask)
118{ 126{
119 unsigned h; 127 unsigned int h;
120 h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto); 128 h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto);
121 h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; 129 h = (h ^ (h>>10) ^ (h>>20)) & hmask;
122 return h; 130 return h;
123} 131}
124 132
125static __inline__ 133static inline
126unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) 134unsigned __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family,
135 unsigned int hmask)
127{ 136{
128 switch (family) { 137 switch (family) {
129 case AF_INET: 138 case AF_INET:
130 return __xfrm4_spi_hash(addr, spi, proto); 139 return __xfrm4_spi_hash(addr, spi, proto, hmask);
131 case AF_INET6: 140 case AF_INET6:
132 return __xfrm6_spi_hash(addr, spi, proto); 141 return __xfrm6_spi_hash(addr, spi, proto, hmask);
133 } 142 }
134 return 0; /*XXX*/ 143 return 0; /*XXX*/
135} 144}
136 145
146static inline unsigned int
147xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
148{
149 return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask);
150}
151
152static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz)
153{
154 struct hlist_head *n;
155
156 if (sz <= PAGE_SIZE)
157 n = kmalloc(sz, GFP_KERNEL);
158 else if (hashdist)
159 n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
160 else
161 n = (struct hlist_head *)
162 __get_free_pages(GFP_KERNEL, get_order(sz));
163
164 if (n)
165 memset(n, 0, sz);
166
167 return n;
168}
169
170static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz)
171{
172 if (sz <= PAGE_SIZE)
173 kfree(n);
174 else if (hashdist)
175 vfree(n);
176 else
177 free_pages((unsigned long)n, get_order(sz));
178}
179
180static void xfrm_hash_transfer(struct hlist_head *list,
181 struct hlist_head *ndsttable,
182 struct hlist_head *nsrctable,
183 struct hlist_head *nspitable,
184 unsigned int nhashmask)
185{
186 struct hlist_node *entry, *tmp;
187 struct xfrm_state *x;
188
189 hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
190 unsigned int h;
191
192 h = __xfrm_dst_hash(&x->id.daddr, x->props.family, nhashmask);
193 hlist_add_head(&x->bydst, ndsttable+h);
194
195 h = __xfrm_src_hash(&x->props.saddr, x->props.family,
196 nhashmask);
197 hlist_add_head(&x->bysrc, nsrctable+h);
198
199 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
200 x->props.family, nhashmask);
201 hlist_add_head(&x->byspi, nspitable+h);
202 }
203}
204
205static unsigned long xfrm_hash_new_size(void)
206{
207 return ((xfrm_state_hmask + 1) << 1) *
208 sizeof(struct hlist_head);
209}
210
211static DEFINE_MUTEX(hash_resize_mutex);
212
213static void xfrm_hash_resize(void *__unused)
214{
215 struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
216 unsigned long nsize, osize;
217 unsigned int nhashmask, ohashmask;
218 int i;
219
220 mutex_lock(&hash_resize_mutex);
221
222 nsize = xfrm_hash_new_size();
223 ndst = xfrm_state_hash_alloc(nsize);
224 if (!ndst)
225 goto out_unlock;
226 nsrc = xfrm_state_hash_alloc(nsize);
227 if (!nsrc) {
228 xfrm_state_hash_free(ndst, nsize);
229 goto out_unlock;
230 }
231 nspi = xfrm_state_hash_alloc(nsize);
232 if (!nspi) {
233 xfrm_state_hash_free(ndst, nsize);
234 xfrm_state_hash_free(nsrc, nsize);
235 goto out_unlock;
236 }
237
238 spin_lock_bh(&xfrm_state_lock);
239
240 nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
241 for (i = xfrm_state_hmask; i >= 0; i--)
242 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
243 nhashmask);
244
245 odst = xfrm_state_bydst;
246 osrc = xfrm_state_bysrc;
247 ospi = xfrm_state_byspi;
248 ohashmask = xfrm_state_hmask;
249
250 xfrm_state_bydst = ndst;
251 xfrm_state_bysrc = nsrc;
252 xfrm_state_byspi = nspi;
253 xfrm_state_hmask = nhashmask;
254
255 spin_unlock_bh(&xfrm_state_lock);
256
257 osize = (ohashmask + 1) * sizeof(struct hlist_head);
258 xfrm_state_hash_free(odst, osize);
259 xfrm_state_hash_free(osrc, osize);
260 xfrm_state_hash_free(ospi, osize);
261
262out_unlock:
263 mutex_unlock(&hash_resize_mutex);
264}
265
266static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
267
137DECLARE_WAIT_QUEUE_HEAD(km_waitq); 268DECLARE_WAIT_QUEUE_HEAD(km_waitq);
138EXPORT_SYMBOL(km_waitq); 269EXPORT_SYMBOL(km_waitq);
139 270
@@ -335,6 +466,7 @@ int __xfrm_state_delete(struct xfrm_state *x)
335 hlist_del(&x->byspi); 466 hlist_del(&x->byspi);
336 __xfrm_state_put(x); 467 __xfrm_state_put(x);
337 } 468 }
469 xfrm_state_num--;
338 spin_unlock(&xfrm_state_lock); 470 spin_unlock(&xfrm_state_lock);
339 if (del_timer(&x->timer)) 471 if (del_timer(&x->timer))
340 __xfrm_state_put(x); 472 __xfrm_state_put(x);
@@ -380,7 +512,7 @@ void xfrm_state_flush(u8 proto)
380 int i; 512 int i;
381 513
382 spin_lock_bh(&xfrm_state_lock); 514 spin_lock_bh(&xfrm_state_lock);
383 for (i = 0; i < XFRM_DST_HSIZE; i++) { 515 for (i = 0; i < xfrm_state_hmask; i++) {
384 struct hlist_node *entry; 516 struct hlist_node *entry;
385 struct xfrm_state *x; 517 struct xfrm_state *x;
386restart: 518restart:
@@ -611,7 +743,7 @@ out:
611 743
612static void __xfrm_state_insert(struct xfrm_state *x) 744static void __xfrm_state_insert(struct xfrm_state *x)
613{ 745{
614 unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family); 746 unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family);
615 747
616 hlist_add_head(&x->bydst, xfrm_state_bydst+h); 748 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
617 xfrm_state_hold(x); 749 xfrm_state_hold(x);
@@ -637,6 +769,13 @@ static void __xfrm_state_insert(struct xfrm_state *x)
637 xfrm_state_hold(x); 769 xfrm_state_hold(x);
638 770
639 wake_up(&km_waitq); 771 wake_up(&km_waitq);
772
773 xfrm_state_num++;
774
775 if (x->bydst.next != NULL &&
776 (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
777 xfrm_state_num > xfrm_state_hmask)
778 schedule_work(&xfrm_hash_work);
640} 779}
641 780
642void xfrm_state_insert(struct xfrm_state *x) 781void xfrm_state_insert(struct xfrm_state *x)
@@ -984,7 +1123,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
984{ 1123{
985 int i; 1124 int i;
986 1125
987 for (i = 0; i < XFRM_DST_HSIZE; i++) { 1126 for (i = 0; i <= xfrm_state_hmask; i++) {
988 struct hlist_node *entry; 1127 struct hlist_node *entry;
989 struct xfrm_state *x; 1128 struct xfrm_state *x;
990 1129
@@ -1026,7 +1165,7 @@ EXPORT_SYMBOL(xfrm_get_acqseq);
1026void 1165void
1027xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) 1166xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
1028{ 1167{
1029 u32 h; 1168 unsigned int h;
1030 struct xfrm_state *x0; 1169 struct xfrm_state *x0;
1031 1170
1032 if (x->id.spi) 1171 if (x->id.spi)
@@ -1074,7 +1213,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1074 int err = 0; 1213 int err = 0;
1075 1214
1076 spin_lock_bh(&xfrm_state_lock); 1215 spin_lock_bh(&xfrm_state_lock);
1077 for (i = 0; i < XFRM_DST_HSIZE; i++) { 1216 for (i = 0; i <= xfrm_state_hmask; i++) {
1078 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { 1217 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1079 if (xfrm_id_proto_match(x->id.proto, proto)) 1218 if (xfrm_id_proto_match(x->id.proto, proto))
1080 count++; 1219 count++;
@@ -1085,7 +1224,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1085 goto out; 1224 goto out;
1086 } 1225 }
1087 1226
1088 for (i = 0; i < XFRM_DST_HSIZE; i++) { 1227 for (i = 0; i <= xfrm_state_hmask; i++) {
1089 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { 1228 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1090 if (!xfrm_id_proto_match(x->id.proto, proto)) 1229 if (!xfrm_id_proto_match(x->id.proto, proto))
1091 continue; 1230 continue;
@@ -1531,13 +1670,17 @@ EXPORT_SYMBOL(xfrm_init_state);
1531 1670
1532void __init xfrm_state_init(void) 1671void __init xfrm_state_init(void)
1533{ 1672{
1534 int i; 1673 unsigned int sz;
1674
1675 sz = sizeof(struct hlist_head) * 8;
1676
1677 xfrm_state_bydst = xfrm_state_hash_alloc(sz);
1678 xfrm_state_bysrc = xfrm_state_hash_alloc(sz);
1679 xfrm_state_byspi = xfrm_state_hash_alloc(sz);
1680 if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1681 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1682 xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1535 1683
1536 for (i=0; i<XFRM_DST_HSIZE; i++) {
1537 INIT_HLIST_HEAD(&xfrm_state_bydst[i]);
1538 INIT_HLIST_HEAD(&xfrm_state_bysrc[i]);
1539 INIT_HLIST_HEAD(&xfrm_state_byspi[i]);
1540 }
1541 INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL); 1684 INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1542} 1685}
1543 1686