aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTimo Teräs <timo.teras@iki.fi>2010-04-06 20:30:04 -0400
committerDavid S. Miller <davem@davemloft.net>2010-04-07 06:43:18 -0400
commitfe1a5f031e76bd8761a7803d75b95ee96e84a574 (patch)
treec74392cef02c1529b00df6c5d0b8f4239fe091c3
parent8020eb82d4c37d21dade0abeb8feed265a01819e (diff)
flow: virtualize flow cache entry methods
This allows to validate the cached object before returning it. It also allows to destruct object properly, if the last reference was held in flow cache. This is also a prepartion for caching bundles in the flow cache. In return for virtualizing the methods, we save on: - not having to regenerate the whole flow cache on policy removal: each flow matching a killed policy gets refreshed as the getter function notices it smartly. - we do not have to call flow_cache_flush from policy gc, since the flow cache now properly deletes the object if it had any references Signed-off-by: Timo Teras <timo.teras@iki.fi> Acked-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/flow.h23
-rw-r--r--include/net/xfrm.h2
-rw-r--r--net/core/flow.c128
-rw-r--r--net/xfrm/xfrm_policy.c112
4 files changed, 163 insertions, 102 deletions
diff --git a/include/net/flow.h b/include/net/flow.h
index 809970b7dfee..bb08692a20b0 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -86,11 +86,26 @@ struct flowi {
86 86
87struct net; 87struct net;
88struct sock; 88struct sock;
89typedef int (*flow_resolve_t)(struct net *net, struct flowi *key, u16 family, 89struct flow_cache_ops;
90 u8 dir, void **objp, atomic_t **obj_refp); 90
91struct flow_cache_object {
92 const struct flow_cache_ops *ops;
93};
94
95struct flow_cache_ops {
96 struct flow_cache_object *(*get)(struct flow_cache_object *);
97 int (*check)(struct flow_cache_object *);
98 void (*delete)(struct flow_cache_object *);
99};
100
101typedef struct flow_cache_object *(*flow_resolve_t)(
102 struct net *net, struct flowi *key, u16 family,
103 u8 dir, struct flow_cache_object *oldobj, void *ctx);
104
105extern struct flow_cache_object *flow_cache_lookup(
106 struct net *net, struct flowi *key, u16 family,
107 u8 dir, flow_resolve_t resolver, void *ctx);
91 108
92extern void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family,
93 u8 dir, flow_resolve_t resolver);
94extern void flow_cache_flush(void); 109extern void flow_cache_flush(void);
95extern atomic_t flow_cache_genid; 110extern atomic_t flow_cache_genid;
96 111
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index d74e080ba6c9..35396e2dd1dc 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -19,6 +19,7 @@
19#include <net/route.h> 19#include <net/route.h>
20#include <net/ipv6.h> 20#include <net/ipv6.h>
21#include <net/ip6_fib.h> 21#include <net/ip6_fib.h>
22#include <net/flow.h>
22 23
23#include <linux/interrupt.h> 24#include <linux/interrupt.h>
24 25
@@ -481,6 +482,7 @@ struct xfrm_policy {
481 atomic_t refcnt; 482 atomic_t refcnt;
482 struct timer_list timer; 483 struct timer_list timer;
483 484
485 struct flow_cache_object flo;
484 u32 priority; 486 u32 priority;
485 u32 index; 487 u32 index;
486 struct xfrm_mark mark; 488 struct xfrm_mark mark;
diff --git a/net/core/flow.c b/net/core/flow.c
index 1d27ca6b421d..521df52a77d2 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -26,17 +26,16 @@
26#include <linux/security.h> 26#include <linux/security.h>
27 27
28struct flow_cache_entry { 28struct flow_cache_entry {
29 struct flow_cache_entry *next; 29 struct flow_cache_entry *next;
30 u16 family; 30 u16 family;
31 u8 dir; 31 u8 dir;
32 u32 genid; 32 u32 genid;
33 struct flowi key; 33 struct flowi key;
34 void *object; 34 struct flow_cache_object *object;
35 atomic_t *object_ref;
36}; 35};
37 36
38struct flow_cache_percpu { 37struct flow_cache_percpu {
39 struct flow_cache_entry ** hash_table; 38 struct flow_cache_entry **hash_table;
40 int hash_count; 39 int hash_count;
41 u32 hash_rnd; 40 u32 hash_rnd;
42 int hash_rnd_recalc; 41 int hash_rnd_recalc;
@@ -44,7 +43,7 @@ struct flow_cache_percpu {
44}; 43};
45 44
46struct flow_flush_info { 45struct flow_flush_info {
47 struct flow_cache * cache; 46 struct flow_cache *cache;
48 atomic_t cpuleft; 47 atomic_t cpuleft;
49 struct completion completion; 48 struct completion completion;
50}; 49};
@@ -52,7 +51,7 @@ struct flow_flush_info {
52struct flow_cache { 51struct flow_cache {
53 u32 hash_shift; 52 u32 hash_shift;
54 unsigned long order; 53 unsigned long order;
55 struct flow_cache_percpu * percpu; 54 struct flow_cache_percpu *percpu;
56 struct notifier_block hotcpu_notifier; 55 struct notifier_block hotcpu_notifier;
57 int low_watermark; 56 int low_watermark;
58 int high_watermark; 57 int high_watermark;
@@ -78,12 +77,21 @@ static void flow_cache_new_hashrnd(unsigned long arg)
78 add_timer(&fc->rnd_timer); 77 add_timer(&fc->rnd_timer);
79} 78}
80 79
80static int flow_entry_valid(struct flow_cache_entry *fle)
81{
82 if (atomic_read(&flow_cache_genid) != fle->genid)
83 return 0;
84 if (fle->object && !fle->object->ops->check(fle->object))
85 return 0;
86 return 1;
87}
88
81static void flow_entry_kill(struct flow_cache *fc, 89static void flow_entry_kill(struct flow_cache *fc,
82 struct flow_cache_percpu *fcp, 90 struct flow_cache_percpu *fcp,
83 struct flow_cache_entry *fle) 91 struct flow_cache_entry *fle)
84{ 92{
85 if (fle->object) 93 if (fle->object)
86 atomic_dec(fle->object_ref); 94 fle->object->ops->delete(fle->object);
87 kmem_cache_free(flow_cachep, fle); 95 kmem_cache_free(flow_cachep, fle);
88 fcp->hash_count--; 96 fcp->hash_count--;
89} 97}
@@ -96,16 +104,18 @@ static void __flow_cache_shrink(struct flow_cache *fc,
96 int i; 104 int i;
97 105
98 for (i = 0; i < flow_cache_hash_size(fc); i++) { 106 for (i = 0; i < flow_cache_hash_size(fc); i++) {
99 int k = 0; 107 int saved = 0;
100 108
101 flp = &fcp->hash_table[i]; 109 flp = &fcp->hash_table[i];
102 while ((fle = *flp) != NULL && k < shrink_to) {
103 k++;
104 flp = &fle->next;
105 }
106 while ((fle = *flp) != NULL) { 110 while ((fle = *flp) != NULL) {
107 *flp = fle->next; 111 if (saved < shrink_to &&
108 flow_entry_kill(fc, fcp, fle); 112 flow_entry_valid(fle)) {
113 saved++;
114 flp = &fle->next;
115 } else {
116 *flp = fle->next;
117 flow_entry_kill(fc, fcp, fle);
118 }
109 } 119 }
110 } 120 }
111} 121}
@@ -166,18 +176,21 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
166 return 0; 176 return 0;
167} 177}
168 178
169void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, 179struct flow_cache_object *
170 flow_resolve_t resolver) 180flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
181 flow_resolve_t resolver, void *ctx)
171{ 182{
172 struct flow_cache *fc = &flow_cache_global; 183 struct flow_cache *fc = &flow_cache_global;
173 struct flow_cache_percpu *fcp; 184 struct flow_cache_percpu *fcp;
174 struct flow_cache_entry *fle, **head; 185 struct flow_cache_entry *fle, **head;
186 struct flow_cache_object *flo;
175 unsigned int hash; 187 unsigned int hash;
176 188
177 local_bh_disable(); 189 local_bh_disable();
178 fcp = per_cpu_ptr(fc->percpu, smp_processor_id()); 190 fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
179 191
180 fle = NULL; 192 fle = NULL;
193 flo = NULL;
181 /* Packet really early in init? Making flow_cache_init a 194 /* Packet really early in init? Making flow_cache_init a
182 * pre-smp initcall would solve this. --RR */ 195 * pre-smp initcall would solve this. --RR */
183 if (!fcp->hash_table) 196 if (!fcp->hash_table)
@@ -185,27 +198,17 @@ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
185 198
186 if (fcp->hash_rnd_recalc) 199 if (fcp->hash_rnd_recalc)
187 flow_new_hash_rnd(fc, fcp); 200 flow_new_hash_rnd(fc, fcp);
188 hash = flow_hash_code(fc, fcp, key);
189 201
202 hash = flow_hash_code(fc, fcp, key);
190 head = &fcp->hash_table[hash]; 203 head = &fcp->hash_table[hash];
191 for (fle = *head; fle; fle = fle->next) { 204 for (fle = *head; fle; fle = fle->next) {
192 if (fle->family == family && 205 if (fle->family == family &&
193 fle->dir == dir && 206 fle->dir == dir &&
194 flow_key_compare(key, &fle->key) == 0) { 207 flow_key_compare(key, &fle->key) == 0)
195 if (fle->genid == atomic_read(&flow_cache_genid)) {
196 void *ret = fle->object;
197
198 if (ret)
199 atomic_inc(fle->object_ref);
200 local_bh_enable();
201
202 return ret;
203 }
204 break; 208 break;
205 }
206 } 209 }
207 210
208 if (!fle) { 211 if (unlikely(!fle)) {
209 if (fcp->hash_count > fc->high_watermark) 212 if (fcp->hash_count > fc->high_watermark)
210 flow_cache_shrink(fc, fcp); 213 flow_cache_shrink(fc, fcp);
211 214
@@ -219,33 +222,39 @@ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
219 fle->object = NULL; 222 fle->object = NULL;
220 fcp->hash_count++; 223 fcp->hash_count++;
221 } 224 }
225 } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
226 flo = fle->object;
227 if (!flo)
228 goto ret_object;
229 flo = flo->ops->get(flo);
230 if (flo)
231 goto ret_object;
232 } else if (fle->object) {
233 flo = fle->object;
234 flo->ops->delete(flo);
235 fle->object = NULL;
222 } 236 }
223 237
224nocache: 238nocache:
225 { 239 flo = NULL;
226 int err; 240 if (fle) {
227 void *obj; 241 flo = fle->object;
228 atomic_t *obj_ref; 242 fle->object = NULL;
229 243 }
230 err = resolver(net, key, family, dir, &obj, &obj_ref); 244 flo = resolver(net, key, family, dir, flo, ctx);
231 245 if (fle) {
232 if (fle && !err) { 246 fle->genid = atomic_read(&flow_cache_genid);
233 fle->genid = atomic_read(&flow_cache_genid); 247 if (!IS_ERR(flo))
234 248 fle->object = flo;
235 if (fle->object) 249 else
236 atomic_dec(fle->object_ref); 250 fle->genid--;
237 251 } else {
238 fle->object = obj; 252 if (flo && !IS_ERR(flo))
239 fle->object_ref = obj_ref; 253 flo->ops->delete(flo);
240 if (obj)
241 atomic_inc(fle->object_ref);
242 }
243 local_bh_enable();
244
245 if (err)
246 obj = ERR_PTR(err);
247 return obj;
248 } 254 }
255ret_object:
256 local_bh_enable();
257 return flo;
249} 258}
250 259
251static void flow_cache_flush_tasklet(unsigned long data) 260static void flow_cache_flush_tasklet(unsigned long data)
@@ -261,13 +270,12 @@ static void flow_cache_flush_tasklet(unsigned long data)
261 270
262 fle = fcp->hash_table[i]; 271 fle = fcp->hash_table[i];
263 for (; fle; fle = fle->next) { 272 for (; fle; fle = fle->next) {
264 unsigned genid = atomic_read(&flow_cache_genid); 273 if (flow_entry_valid(fle))
265
266 if (!fle->object || fle->genid == genid)
267 continue; 274 continue;
268 275
276 if (fle->object)
277 fle->object->ops->delete(fle->object);
269 fle->object = NULL; 278 fle->object = NULL;
270 atomic_dec(fle->object_ref);
271 } 279 }
272 } 280 }
273 281
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 82789cf1c632..7722baeb140d 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -216,6 +216,35 @@ expired:
216 xfrm_pol_put(xp); 216 xfrm_pol_put(xp);
217} 217}
218 218
219static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
220{
221 struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
222
223 if (unlikely(pol->walk.dead))
224 flo = NULL;
225 else
226 xfrm_pol_hold(pol);
227
228 return flo;
229}
230
231static int xfrm_policy_flo_check(struct flow_cache_object *flo)
232{
233 struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
234
235 return !pol->walk.dead;
236}
237
238static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
239{
240 xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
241}
242
243static const struct flow_cache_ops xfrm_policy_fc_ops = {
244 .get = xfrm_policy_flo_get,
245 .check = xfrm_policy_flo_check,
246 .delete = xfrm_policy_flo_delete,
247};
219 248
220/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 249/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
221 * SPD calls. 250 * SPD calls.
@@ -236,6 +265,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
236 atomic_set(&policy->refcnt, 1); 265 atomic_set(&policy->refcnt, 1);
237 setup_timer(&policy->timer, xfrm_policy_timer, 266 setup_timer(&policy->timer, xfrm_policy_timer,
238 (unsigned long)policy); 267 (unsigned long)policy);
268 policy->flo.ops = &xfrm_policy_fc_ops;
239 } 269 }
240 return policy; 270 return policy;
241} 271}
@@ -269,9 +299,6 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
269 if (del_timer(&policy->timer)) 299 if (del_timer(&policy->timer))
270 atomic_dec(&policy->refcnt); 300 atomic_dec(&policy->refcnt);
271 301
272 if (atomic_read(&policy->refcnt) > 1)
273 flow_cache_flush();
274
275 xfrm_pol_put(policy); 302 xfrm_pol_put(policy);
276} 303}
277 304
@@ -661,10 +688,8 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
661 } 688 }
662 write_unlock_bh(&xfrm_policy_lock); 689 write_unlock_bh(&xfrm_policy_lock);
663 690
664 if (ret && delete) { 691 if (ret && delete)
665 atomic_inc(&flow_cache_genid);
666 xfrm_policy_kill(ret); 692 xfrm_policy_kill(ret);
667 }
668 return ret; 693 return ret;
669} 694}
670EXPORT_SYMBOL(xfrm_policy_bysel_ctx); 695EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
@@ -703,10 +728,8 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
703 } 728 }
704 write_unlock_bh(&xfrm_policy_lock); 729 write_unlock_bh(&xfrm_policy_lock);
705 730
706 if (ret && delete) { 731 if (ret && delete)
707 atomic_inc(&flow_cache_genid);
708 xfrm_policy_kill(ret); 732 xfrm_policy_kill(ret);
709 }
710 return ret; 733 return ret;
711} 734}
712EXPORT_SYMBOL(xfrm_policy_byid); 735EXPORT_SYMBOL(xfrm_policy_byid);
@@ -822,7 +845,6 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
822 } 845 }
823 if (!cnt) 846 if (!cnt)
824 err = -ESRCH; 847 err = -ESRCH;
825 atomic_inc(&flow_cache_genid);
826out: 848out:
827 write_unlock_bh(&xfrm_policy_lock); 849 write_unlock_bh(&xfrm_policy_lock);
828 return err; 850 return err;
@@ -976,32 +998,35 @@ fail:
976 return ret; 998 return ret;
977} 999}
978 1000
979static int xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, 1001static struct flow_cache_object *
980 u8 dir, void **objp, atomic_t **obj_refp) 1002xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
1003 u8 dir, struct flow_cache_object *old_obj, void *ctx)
981{ 1004{
982 struct xfrm_policy *pol; 1005 struct xfrm_policy *pol;
983 int err = 0; 1006
1007 if (old_obj)
1008 xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
984 1009
985#ifdef CONFIG_XFRM_SUB_POLICY 1010#ifdef CONFIG_XFRM_SUB_POLICY
986 pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); 1011 pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
987 if (IS_ERR(pol)) { 1012 if (IS_ERR(pol))
988 err = PTR_ERR(pol); 1013 return ERR_CAST(pol);
989 pol = NULL; 1014 if (pol)
990 } 1015 goto found;
991 if (pol || err)
992 goto end;
993#endif 1016#endif
994 pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); 1017 pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
995 if (IS_ERR(pol)) { 1018 if (IS_ERR(pol))
996 err = PTR_ERR(pol); 1019 return ERR_CAST(pol);
997 pol = NULL; 1020 if (pol)
998 } 1021 goto found;
999#ifdef CONFIG_XFRM_SUB_POLICY 1022 return NULL;
1000end: 1023
1001#endif 1024found:
1002 if ((*objp = (void *) pol) != NULL) 1025 /* Resolver returns two references:
1003 *obj_refp = &pol->refcnt; 1026 * one for cache and one for caller of flow_cache_lookup() */
1004 return err; 1027 xfrm_pol_hold(pol);
1028
1029 return &pol->flo;
1005} 1030}
1006 1031
1007static inline int policy_to_flow_dir(int dir) 1032static inline int policy_to_flow_dir(int dir)
@@ -1091,8 +1116,6 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1091 pol = __xfrm_policy_unlink(pol, dir); 1116 pol = __xfrm_policy_unlink(pol, dir);
1092 write_unlock_bh(&xfrm_policy_lock); 1117 write_unlock_bh(&xfrm_policy_lock);
1093 if (pol) { 1118 if (pol) {
1094 if (dir < XFRM_POLICY_MAX)
1095 atomic_inc(&flow_cache_genid);
1096 xfrm_policy_kill(pol); 1119 xfrm_policy_kill(pol);
1097 return 0; 1120 return 0;
1098 } 1121 }
@@ -1578,18 +1601,24 @@ restart:
1578 } 1601 }
1579 1602
1580 if (!policy) { 1603 if (!policy) {
1604 struct flow_cache_object *flo;
1605
1581 /* To accelerate a bit... */ 1606 /* To accelerate a bit... */
1582 if ((dst_orig->flags & DST_NOXFRM) || 1607 if ((dst_orig->flags & DST_NOXFRM) ||
1583 !net->xfrm.policy_count[XFRM_POLICY_OUT]) 1608 !net->xfrm.policy_count[XFRM_POLICY_OUT])
1584 goto nopol; 1609 goto nopol;
1585 1610
1586 policy = flow_cache_lookup(net, fl, dst_orig->ops->family, 1611 flo = flow_cache_lookup(net, fl, dst_orig->ops->family,
1587 dir, xfrm_policy_lookup); 1612 dir, xfrm_policy_lookup, NULL);
1588 err = PTR_ERR(policy); 1613 err = PTR_ERR(flo);
1589 if (IS_ERR(policy)) { 1614 if (IS_ERR(flo)) {
1590 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); 1615 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1591 goto dropdst; 1616 goto dropdst;
1592 } 1617 }
1618 if (flo)
1619 policy = container_of(flo, struct xfrm_policy, flo);
1620 else
1621 policy = NULL;
1593 } 1622 }
1594 1623
1595 if (!policy) 1624 if (!policy)
@@ -1939,9 +1968,16 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1939 } 1968 }
1940 } 1969 }
1941 1970
1942 if (!pol) 1971 if (!pol) {
1943 pol = flow_cache_lookup(net, &fl, family, fl_dir, 1972 struct flow_cache_object *flo;
1944 xfrm_policy_lookup); 1973
1974 flo = flow_cache_lookup(net, &fl, family, fl_dir,
1975 xfrm_policy_lookup, NULL);
1976 if (IS_ERR_OR_NULL(flo))
1977 pol = ERR_CAST(flo);
1978 else
1979 pol = container_of(flo, struct xfrm_policy, flo);
1980 }
1945 1981
1946 if (IS_ERR(pol)) { 1982 if (IS_ERR(pol)) {
1947 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); 1983 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);