diff options
author | David S. Miller <davem@sunset.davemloft.net> | 2006-08-24 06:08:07 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2006-09-22 18:08:41 -0400 |
commit | f034b5d4efdfe0fb9e2a1ce1d95fa7914f24de49 (patch) | |
tree | e166f1e87606f7e53a78cac543284c3484481727 /net/xfrm | |
parent | 8f126e37c0b250310a48a609bedf92a19a5559ec (diff) |
[XFRM]: Dynamic xfrm_state hash table sizing.
The grow algorithm is simple, we grow if:
1) we see a hash chain collision at insert, and
2) we haven't hit the hash size limit (currently 1*1024*1024 slots), and
3) the number of xfrm_state objects is > the current hash mask
All of this needs some tweaking.
Remove __initdata from "hashdist" so we can use it safely at run time.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/xfrm')
-rw-r--r-- | net/xfrm/xfrm_state.c | 247 |
1 files changed, 195 insertions, 52 deletions
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index fe3c8c38d5e1..445263c54c94 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c | |||
@@ -18,6 +18,9 @@ | |||
18 | #include <linux/pfkeyv2.h> | 18 | #include <linux/pfkeyv2.h> |
19 | #include <linux/ipsec.h> | 19 | #include <linux/ipsec.h> |
20 | #include <linux/module.h> | 20 | #include <linux/module.h> |
21 | #include <linux/bootmem.h> | ||
22 | #include <linux/vmalloc.h> | ||
23 | #include <linux/cache.h> | ||
21 | #include <asm/uaccess.h> | 24 | #include <asm/uaccess.h> |
22 | 25 | ||
23 | struct sock *xfrm_nl; | 26 | struct sock *xfrm_nl; |
@@ -38,102 +41,230 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); | |||
38 | 41 | ||
39 | static DEFINE_SPINLOCK(xfrm_state_lock); | 42 | static DEFINE_SPINLOCK(xfrm_state_lock); |
40 | 43 | ||
41 | #define XFRM_DST_HSIZE 1024 | ||
42 | |||
43 | /* Hash table to find appropriate SA towards given target (endpoint | 44 | /* Hash table to find appropriate SA towards given target (endpoint |
44 | * of tunnel or destination of transport mode) allowed by selector. | 45 | * of tunnel or destination of transport mode) allowed by selector. |
45 | * | 46 | * |
46 | * Main use is finding SA after policy selected tunnel or transport mode. | 47 | * Main use is finding SA after policy selected tunnel or transport mode. |
47 | * Also, it can be used by ah/esp icmp error handler to find offending SA. | 48 | * Also, it can be used by ah/esp icmp error handler to find offending SA. |
48 | */ | 49 | */ |
49 | static struct hlist_head xfrm_state_bydst[XFRM_DST_HSIZE]; | 50 | static struct hlist_head *xfrm_state_bydst __read_mostly; |
50 | static struct hlist_head xfrm_state_bysrc[XFRM_DST_HSIZE]; | 51 | static struct hlist_head *xfrm_state_bysrc __read_mostly; |
51 | static struct hlist_head xfrm_state_byspi[XFRM_DST_HSIZE]; | 52 | static struct hlist_head *xfrm_state_byspi __read_mostly; |
52 | 53 | static unsigned int xfrm_state_hmask __read_mostly; | |
53 | static __inline__ | 54 | static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; |
54 | unsigned __xfrm4_dst_hash(xfrm_address_t *addr) | 55 | static unsigned int xfrm_state_num; |
56 | |||
57 | static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask) | ||
55 | { | 58 | { |
56 | unsigned h; | 59 | unsigned int h; |
57 | h = ntohl(addr->a4); | 60 | h = ntohl(addr->a4); |
58 | h = (h ^ (h>>16)) % XFRM_DST_HSIZE; | 61 | h = (h ^ (h>>16)) & hmask; |
59 | return h; | 62 | return h; |
60 | } | 63 | } |
61 | 64 | ||
62 | static __inline__ | 65 | static inline unsigned int __xfrm6_dst_hash(xfrm_address_t *addr, unsigned int hmask) |
63 | unsigned __xfrm6_dst_hash(xfrm_address_t *addr) | ||
64 | { | 66 | { |
65 | unsigned h; | 67 | unsigned int h; |
66 | h = ntohl(addr->a6[2]^addr->a6[3]); | 68 | h = ntohl(addr->a6[2]^addr->a6[3]); |
67 | h = (h ^ (h>>16)) % XFRM_DST_HSIZE; | 69 | h = (h ^ (h>>16)) & hmask; |
68 | return h; | 70 | return h; |
69 | } | 71 | } |
70 | 72 | ||
71 | static __inline__ | 73 | static inline unsigned int __xfrm4_src_hash(xfrm_address_t *addr, unsigned int hmask) |
72 | unsigned __xfrm4_src_hash(xfrm_address_t *addr) | ||
73 | { | 74 | { |
74 | return __xfrm4_dst_hash(addr); | 75 | return __xfrm4_dst_hash(addr, hmask); |
75 | } | 76 | } |
76 | 77 | ||
77 | static __inline__ | 78 | static inline unsigned int __xfrm6_src_hash(xfrm_address_t *addr, unsigned int hmask) |
78 | unsigned __xfrm6_src_hash(xfrm_address_t *addr) | ||
79 | { | 79 | { |
80 | return __xfrm6_dst_hash(addr); | 80 | return __xfrm6_dst_hash(addr, hmask); |
81 | } | 81 | } |
82 | 82 | ||
83 | static __inline__ | 83 | static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask) |
84 | unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) | ||
85 | { | 84 | { |
86 | switch (family) { | 85 | switch (family) { |
87 | case AF_INET: | 86 | case AF_INET: |
88 | return __xfrm4_src_hash(addr); | 87 | return __xfrm4_src_hash(addr, hmask); |
89 | case AF_INET6: | 88 | case AF_INET6: |
90 | return __xfrm6_src_hash(addr); | 89 | return __xfrm6_src_hash(addr, hmask); |
91 | } | 90 | } |
92 | return 0; | 91 | return 0; |
93 | } | 92 | } |
94 | 93 | ||
95 | static __inline__ | 94 | static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) |
96 | unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) | 95 | { |
96 | return __xfrm_src_hash(addr, family, xfrm_state_hmask); | ||
97 | } | ||
98 | |||
99 | static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask) | ||
97 | { | 100 | { |
98 | switch (family) { | 101 | switch (family) { |
99 | case AF_INET: | 102 | case AF_INET: |
100 | return __xfrm4_dst_hash(addr); | 103 | return __xfrm4_dst_hash(addr, hmask); |
101 | case AF_INET6: | 104 | case AF_INET6: |
102 | return __xfrm6_dst_hash(addr); | 105 | return __xfrm6_dst_hash(addr, hmask); |
103 | } | 106 | } |
104 | return 0; | 107 | return 0; |
105 | } | 108 | } |
106 | 109 | ||
107 | static __inline__ | 110 | static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) |
108 | unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) | 111 | { |
112 | return __xfrm_dst_hash(addr, family, xfrm_state_hmask); | ||
113 | } | ||
114 | |||
115 | static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, | ||
116 | unsigned int hmask) | ||
109 | { | 117 | { |
110 | unsigned h; | 118 | unsigned int h; |
111 | h = ntohl(addr->a4^spi^proto); | 119 | h = ntohl(addr->a4^spi^proto); |
112 | h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; | 120 | h = (h ^ (h>>10) ^ (h>>20)) & hmask; |
113 | return h; | 121 | return h; |
114 | } | 122 | } |
115 | 123 | ||
116 | static __inline__ | 124 | static inline unsigned int __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, |
117 | unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) | 125 | unsigned int hmask) |
118 | { | 126 | { |
119 | unsigned h; | 127 | unsigned int h; |
120 | h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto); | 128 | h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto); |
121 | h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; | 129 | h = (h ^ (h>>10) ^ (h>>20)) & hmask; |
122 | return h; | 130 | return h; |
123 | } | 131 | } |
124 | 132 | ||
125 | static __inline__ | 133 | static inline |
126 | unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) | 134 | unsigned __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family, |
135 | unsigned int hmask) | ||
127 | { | 136 | { |
128 | switch (family) { | 137 | switch (family) { |
129 | case AF_INET: | 138 | case AF_INET: |
130 | return __xfrm4_spi_hash(addr, spi, proto); | 139 | return __xfrm4_spi_hash(addr, spi, proto, hmask); |
131 | case AF_INET6: | 140 | case AF_INET6: |
132 | return __xfrm6_spi_hash(addr, spi, proto); | 141 | return __xfrm6_spi_hash(addr, spi, proto, hmask); |
133 | } | 142 | } |
134 | return 0; /*XXX*/ | 143 | return 0; /*XXX*/ |
135 | } | 144 | } |
136 | 145 | ||
146 | static inline unsigned int | ||
147 | xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) | ||
148 | { | ||
149 | return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask); | ||
150 | } | ||
151 | |||
152 | static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz) | ||
153 | { | ||
154 | struct hlist_head *n; | ||
155 | |||
156 | if (sz <= PAGE_SIZE) | ||
157 | n = kmalloc(sz, GFP_KERNEL); | ||
158 | else if (hashdist) | ||
159 | n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL); | ||
160 | else | ||
161 | n = (struct hlist_head *) | ||
162 | __get_free_pages(GFP_KERNEL, get_order(sz)); | ||
163 | |||
164 | if (n) | ||
165 | memset(n, 0, sz); | ||
166 | |||
167 | return n; | ||
168 | } | ||
169 | |||
170 | static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz) | ||
171 | { | ||
172 | if (sz <= PAGE_SIZE) | ||
173 | kfree(n); | ||
174 | else if (hashdist) | ||
175 | vfree(n); | ||
176 | else | ||
177 | free_pages((unsigned long)n, get_order(sz)); | ||
178 | } | ||
179 | |||
180 | static void xfrm_hash_transfer(struct hlist_head *list, | ||
181 | struct hlist_head *ndsttable, | ||
182 | struct hlist_head *nsrctable, | ||
183 | struct hlist_head *nspitable, | ||
184 | unsigned int nhashmask) | ||
185 | { | ||
186 | struct hlist_node *entry, *tmp; | ||
187 | struct xfrm_state *x; | ||
188 | |||
189 | hlist_for_each_entry_safe(x, entry, tmp, list, bydst) { | ||
190 | unsigned int h; | ||
191 | |||
192 | h = __xfrm_dst_hash(&x->id.daddr, x->props.family, nhashmask); | ||
193 | hlist_add_head(&x->bydst, ndsttable+h); | ||
194 | |||
195 | h = __xfrm_src_hash(&x->props.saddr, x->props.family, | ||
196 | nhashmask); | ||
197 | hlist_add_head(&x->bysrc, nsrctable+h); | ||
198 | |||
199 | h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, | ||
200 | x->props.family, nhashmask); | ||
201 | hlist_add_head(&x->byspi, nspitable+h); | ||
202 | } | ||
203 | } | ||
204 | |||
205 | static unsigned long xfrm_hash_new_size(void) | ||
206 | { | ||
207 | return ((xfrm_state_hmask + 1) << 1) * | ||
208 | sizeof(struct hlist_head); | ||
209 | } | ||
210 | |||
211 | static DEFINE_MUTEX(hash_resize_mutex); | ||
212 | |||
213 | static void xfrm_hash_resize(void *__unused) | ||
214 | { | ||
215 | struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi; | ||
216 | unsigned long nsize, osize; | ||
217 | unsigned int nhashmask, ohashmask; | ||
218 | int i; | ||
219 | |||
220 | mutex_lock(&hash_resize_mutex); | ||
221 | |||
222 | nsize = xfrm_hash_new_size(); | ||
223 | ndst = xfrm_state_hash_alloc(nsize); | ||
224 | if (!ndst) | ||
225 | goto out_unlock; | ||
226 | nsrc = xfrm_state_hash_alloc(nsize); | ||
227 | if (!nsrc) { | ||
228 | xfrm_state_hash_free(ndst, nsize); | ||
229 | goto out_unlock; | ||
230 | } | ||
231 | nspi = xfrm_state_hash_alloc(nsize); | ||
232 | if (!nspi) { | ||
233 | xfrm_state_hash_free(ndst, nsize); | ||
234 | xfrm_state_hash_free(nsrc, nsize); | ||
235 | goto out_unlock; | ||
236 | } | ||
237 | |||
238 | spin_lock_bh(&xfrm_state_lock); | ||
239 | |||
240 | nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; | ||
241 | for (i = xfrm_state_hmask; i >= 0; i--) | ||
242 | xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi, | ||
243 | nhashmask); | ||
244 | |||
245 | odst = xfrm_state_bydst; | ||
246 | osrc = xfrm_state_bysrc; | ||
247 | ospi = xfrm_state_byspi; | ||
248 | ohashmask = xfrm_state_hmask; | ||
249 | |||
250 | xfrm_state_bydst = ndst; | ||
251 | xfrm_state_bysrc = nsrc; | ||
252 | xfrm_state_byspi = nspi; | ||
253 | xfrm_state_hmask = nhashmask; | ||
254 | |||
255 | spin_unlock_bh(&xfrm_state_lock); | ||
256 | |||
257 | osize = (ohashmask + 1) * sizeof(struct hlist_head); | ||
258 | xfrm_state_hash_free(odst, osize); | ||
259 | xfrm_state_hash_free(osrc, osize); | ||
260 | xfrm_state_hash_free(ospi, osize); | ||
261 | |||
262 | out_unlock: | ||
263 | mutex_unlock(&hash_resize_mutex); | ||
264 | } | ||
265 | |||
266 | static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL); | ||
267 | |||
137 | DECLARE_WAIT_QUEUE_HEAD(km_waitq); | 268 | DECLARE_WAIT_QUEUE_HEAD(km_waitq); |
138 | EXPORT_SYMBOL(km_waitq); | 269 | EXPORT_SYMBOL(km_waitq); |
139 | 270 | ||
@@ -335,6 +466,7 @@ int __xfrm_state_delete(struct xfrm_state *x) | |||
335 | hlist_del(&x->byspi); | 466 | hlist_del(&x->byspi); |
336 | __xfrm_state_put(x); | 467 | __xfrm_state_put(x); |
337 | } | 468 | } |
469 | xfrm_state_num--; | ||
338 | spin_unlock(&xfrm_state_lock); | 470 | spin_unlock(&xfrm_state_lock); |
339 | if (del_timer(&x->timer)) | 471 | if (del_timer(&x->timer)) |
340 | __xfrm_state_put(x); | 472 | __xfrm_state_put(x); |
@@ -380,7 +512,7 @@ void xfrm_state_flush(u8 proto) | |||
380 | int i; | 512 | int i; |
381 | 513 | ||
382 | spin_lock_bh(&xfrm_state_lock); | 514 | spin_lock_bh(&xfrm_state_lock); |
383 | for (i = 0; i < XFRM_DST_HSIZE; i++) { | 515 | for (i = 0; i < xfrm_state_hmask; i++) { |
384 | struct hlist_node *entry; | 516 | struct hlist_node *entry; |
385 | struct xfrm_state *x; | 517 | struct xfrm_state *x; |
386 | restart: | 518 | restart: |
@@ -611,7 +743,7 @@ out: | |||
611 | 743 | ||
612 | static void __xfrm_state_insert(struct xfrm_state *x) | 744 | static void __xfrm_state_insert(struct xfrm_state *x) |
613 | { | 745 | { |
614 | unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family); | 746 | unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family); |
615 | 747 | ||
616 | hlist_add_head(&x->bydst, xfrm_state_bydst+h); | 748 | hlist_add_head(&x->bydst, xfrm_state_bydst+h); |
617 | xfrm_state_hold(x); | 749 | xfrm_state_hold(x); |
@@ -637,6 +769,13 @@ static void __xfrm_state_insert(struct xfrm_state *x) | |||
637 | xfrm_state_hold(x); | 769 | xfrm_state_hold(x); |
638 | 770 | ||
639 | wake_up(&km_waitq); | 771 | wake_up(&km_waitq); |
772 | |||
773 | xfrm_state_num++; | ||
774 | |||
775 | if (x->bydst.next != NULL && | ||
776 | (xfrm_state_hmask + 1) < xfrm_state_hashmax && | ||
777 | xfrm_state_num > xfrm_state_hmask) | ||
778 | schedule_work(&xfrm_hash_work); | ||
640 | } | 779 | } |
641 | 780 | ||
642 | void xfrm_state_insert(struct xfrm_state *x) | 781 | void xfrm_state_insert(struct xfrm_state *x) |
@@ -984,7 +1123,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) | |||
984 | { | 1123 | { |
985 | int i; | 1124 | int i; |
986 | 1125 | ||
987 | for (i = 0; i < XFRM_DST_HSIZE; i++) { | 1126 | for (i = 0; i <= xfrm_state_hmask; i++) { |
988 | struct hlist_node *entry; | 1127 | struct hlist_node *entry; |
989 | struct xfrm_state *x; | 1128 | struct xfrm_state *x; |
990 | 1129 | ||
@@ -1026,7 +1165,7 @@ EXPORT_SYMBOL(xfrm_get_acqseq); | |||
1026 | void | 1165 | void |
1027 | xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) | 1166 | xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) |
1028 | { | 1167 | { |
1029 | u32 h; | 1168 | unsigned int h; |
1030 | struct xfrm_state *x0; | 1169 | struct xfrm_state *x0; |
1031 | 1170 | ||
1032 | if (x->id.spi) | 1171 | if (x->id.spi) |
@@ -1074,7 +1213,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), | |||
1074 | int err = 0; | 1213 | int err = 0; |
1075 | 1214 | ||
1076 | spin_lock_bh(&xfrm_state_lock); | 1215 | spin_lock_bh(&xfrm_state_lock); |
1077 | for (i = 0; i < XFRM_DST_HSIZE; i++) { | 1216 | for (i = 0; i <= xfrm_state_hmask; i++) { |
1078 | hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { | 1217 | hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { |
1079 | if (xfrm_id_proto_match(x->id.proto, proto)) | 1218 | if (xfrm_id_proto_match(x->id.proto, proto)) |
1080 | count++; | 1219 | count++; |
@@ -1085,7 +1224,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), | |||
1085 | goto out; | 1224 | goto out; |
1086 | } | 1225 | } |
1087 | 1226 | ||
1088 | for (i = 0; i < XFRM_DST_HSIZE; i++) { | 1227 | for (i = 0; i <= xfrm_state_hmask; i++) { |
1089 | hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { | 1228 | hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { |
1090 | if (!xfrm_id_proto_match(x->id.proto, proto)) | 1229 | if (!xfrm_id_proto_match(x->id.proto, proto)) |
1091 | continue; | 1230 | continue; |
@@ -1531,13 +1670,17 @@ EXPORT_SYMBOL(xfrm_init_state); | |||
1531 | 1670 | ||
1532 | void __init xfrm_state_init(void) | 1671 | void __init xfrm_state_init(void) |
1533 | { | 1672 | { |
1534 | int i; | 1673 | unsigned int sz; |
1674 | |||
1675 | sz = sizeof(struct hlist_head) * 8; | ||
1676 | |||
1677 | xfrm_state_bydst = xfrm_state_hash_alloc(sz); | ||
1678 | xfrm_state_bysrc = xfrm_state_hash_alloc(sz); | ||
1679 | xfrm_state_byspi = xfrm_state_hash_alloc(sz); | ||
1680 | if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi) | ||
1681 | panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes."); | ||
1682 | xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); | ||
1535 | 1683 | ||
1536 | for (i=0; i<XFRM_DST_HSIZE; i++) { | ||
1537 | INIT_HLIST_HEAD(&xfrm_state_bydst[i]); | ||
1538 | INIT_HLIST_HEAD(&xfrm_state_bysrc[i]); | ||
1539 | INIT_HLIST_HEAD(&xfrm_state_byspi[i]); | ||
1540 | } | ||
1541 | INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL); | 1684 | INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL); |
1542 | } | 1685 | } |
1543 | 1686 | ||