diff options
Diffstat (limited to 'net/ipv4/fib_trie.c')
-rw-r--r-- | net/ipv4/fib_trie.c | 1628 |
1 files changed, 772 insertions, 856 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index a701405fab0b..b2dea4e5da77 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -43,7 +43,7 @@ | |||
43 | * 2 of the License, or (at your option) any later version. | 43 | * 2 of the License, or (at your option) any later version. |
44 | */ | 44 | */ |
45 | 45 | ||
46 | #define VERSION "0.325" | 46 | #define VERSION "0.402" |
47 | 47 | ||
48 | #include <linux/config.h> | 48 | #include <linux/config.h> |
49 | #include <asm/uaccess.h> | 49 | #include <asm/uaccess.h> |
@@ -62,6 +62,7 @@ | |||
62 | #include <linux/netdevice.h> | 62 | #include <linux/netdevice.h> |
63 | #include <linux/if_arp.h> | 63 | #include <linux/if_arp.h> |
64 | #include <linux/proc_fs.h> | 64 | #include <linux/proc_fs.h> |
65 | #include <linux/rcupdate.h> | ||
65 | #include <linux/skbuff.h> | 66 | #include <linux/skbuff.h> |
66 | #include <linux/netlink.h> | 67 | #include <linux/netlink.h> |
67 | #include <linux/init.h> | 68 | #include <linux/init.h> |
@@ -77,56 +78,55 @@ | |||
77 | #undef CONFIG_IP_FIB_TRIE_STATS | 78 | #undef CONFIG_IP_FIB_TRIE_STATS |
78 | #define MAX_CHILDS 16384 | 79 | #define MAX_CHILDS 16384 |
79 | 80 | ||
80 | #define EXTRACT(p, n, str) ((str)<<(p)>>(32-(n))) | ||
81 | #define KEYLENGTH (8*sizeof(t_key)) | 81 | #define KEYLENGTH (8*sizeof(t_key)) |
82 | #define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l)) | 82 | #define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l)) |
83 | #define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset)) | 83 | #define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset)) |
84 | 84 | ||
85 | static DEFINE_RWLOCK(fib_lock); | ||
86 | |||
87 | typedef unsigned int t_key; | 85 | typedef unsigned int t_key; |
88 | 86 | ||
89 | #define T_TNODE 0 | 87 | #define T_TNODE 0 |
90 | #define T_LEAF 1 | 88 | #define T_LEAF 1 |
91 | #define NODE_TYPE_MASK 0x1UL | 89 | #define NODE_TYPE_MASK 0x1UL |
92 | #define NODE_PARENT(_node) \ | 90 | #define NODE_PARENT(node) \ |
93 | ((struct tnode *)((_node)->_parent & ~NODE_TYPE_MASK)) | 91 | ((struct tnode *)rcu_dereference(((node)->parent & ~NODE_TYPE_MASK))) |
94 | #define NODE_SET_PARENT(_node, _ptr) \ | 92 | |
95 | ((_node)->_parent = (((unsigned long)(_ptr)) | \ | 93 | #define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK) |
96 | ((_node)->_parent & NODE_TYPE_MASK))) | 94 | |
97 | #define NODE_INIT_PARENT(_node, _type) \ | 95 | #define NODE_SET_PARENT(node, ptr) \ |
98 | ((_node)->_parent = (_type)) | 96 | rcu_assign_pointer((node)->parent, \ |
99 | #define NODE_TYPE(_node) \ | 97 | ((unsigned long)(ptr)) | NODE_TYPE(node)) |
100 | ((_node)->_parent & NODE_TYPE_MASK) | 98 | |
101 | 99 | #define IS_TNODE(n) (!(n->parent & T_LEAF)) | |
102 | #define IS_TNODE(n) (!(n->_parent & T_LEAF)) | 100 | #define IS_LEAF(n) (n->parent & T_LEAF) |
103 | #define IS_LEAF(n) (n->_parent & T_LEAF) | ||
104 | 101 | ||
105 | struct node { | 102 | struct node { |
106 | t_key key; | 103 | t_key key; |
107 | unsigned long _parent; | 104 | unsigned long parent; |
108 | }; | 105 | }; |
109 | 106 | ||
110 | struct leaf { | 107 | struct leaf { |
111 | t_key key; | 108 | t_key key; |
112 | unsigned long _parent; | 109 | unsigned long parent; |
113 | struct hlist_head list; | 110 | struct hlist_head list; |
111 | struct rcu_head rcu; | ||
114 | }; | 112 | }; |
115 | 113 | ||
116 | struct leaf_info { | 114 | struct leaf_info { |
117 | struct hlist_node hlist; | 115 | struct hlist_node hlist; |
116 | struct rcu_head rcu; | ||
118 | int plen; | 117 | int plen; |
119 | struct list_head falh; | 118 | struct list_head falh; |
120 | }; | 119 | }; |
121 | 120 | ||
122 | struct tnode { | 121 | struct tnode { |
123 | t_key key; | 122 | t_key key; |
124 | unsigned long _parent; | 123 | unsigned long parent; |
125 | unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */ | 124 | unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */ |
126 | unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */ | 125 | unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */ |
127 | unsigned short full_children; /* KEYLENGTH bits needed */ | 126 | unsigned short full_children; /* KEYLENGTH bits needed */ |
128 | unsigned short empty_children; /* KEYLENGTH bits needed */ | 127 | unsigned short empty_children; /* KEYLENGTH bits needed */ |
129 | struct node *child[0]; | 128 | struct rcu_head rcu; |
129 | struct node *child[0]; | ||
130 | }; | 130 | }; |
131 | 131 | ||
132 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 132 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
@@ -150,77 +150,45 @@ struct trie_stat { | |||
150 | }; | 150 | }; |
151 | 151 | ||
152 | struct trie { | 152 | struct trie { |
153 | struct node *trie; | 153 | struct node *trie; |
154 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 154 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
155 | struct trie_use_stats stats; | 155 | struct trie_use_stats stats; |
156 | #endif | 156 | #endif |
157 | int size; | 157 | int size; |
158 | unsigned int revision; | 158 | unsigned int revision; |
159 | }; | 159 | }; |
160 | 160 | ||
161 | static int trie_debug = 0; | ||
162 | |||
163 | static int tnode_full(struct tnode *tn, struct node *n); | ||
164 | static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); | 161 | static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); |
165 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull); | 162 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull); |
166 | static int tnode_child_length(struct tnode *tn); | ||
167 | static struct node *resize(struct trie *t, struct tnode *tn); | 163 | static struct node *resize(struct trie *t, struct tnode *tn); |
168 | static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err); | 164 | static struct tnode *inflate(struct trie *t, struct tnode *tn); |
169 | static struct tnode *halve(struct trie *t, struct tnode *tn, int *err); | 165 | static struct tnode *halve(struct trie *t, struct tnode *tn); |
170 | static void tnode_free(struct tnode *tn); | 166 | static void tnode_free(struct tnode *tn); |
171 | static void trie_dump_seq(struct seq_file *seq, struct trie *t); | 167 | static void trie_dump_seq(struct seq_file *seq, struct trie *t); |
172 | extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); | ||
173 | extern int fib_detect_death(struct fib_info *fi, int order, | ||
174 | struct fib_info **last_resort, int *last_idx, int *dflt); | ||
175 | |||
176 | extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, int z, int tb_id, | ||
177 | struct nlmsghdr *n, struct netlink_skb_parms *req); | ||
178 | 168 | ||
179 | static kmem_cache_t *fn_alias_kmem; | 169 | static kmem_cache_t *fn_alias_kmem __read_mostly; |
180 | static struct trie *trie_local = NULL, *trie_main = NULL; | 170 | static struct trie *trie_local = NULL, *trie_main = NULL; |
181 | 171 | ||
182 | static void trie_bug(char *err) | 172 | |
183 | { | 173 | /* rcu_read_lock needs to be hold by caller from readside */ |
184 | printk("Trie Bug: %s\n", err); | ||
185 | BUG(); | ||
186 | } | ||
187 | 174 | ||
188 | static inline struct node *tnode_get_child(struct tnode *tn, int i) | 175 | static inline struct node *tnode_get_child(struct tnode *tn, int i) |
189 | { | 176 | { |
190 | if (i >= 1<<tn->bits) | 177 | BUG_ON(i >= 1 << tn->bits); |
191 | trie_bug("tnode_get_child"); | ||
192 | 178 | ||
193 | return tn->child[i]; | 179 | return rcu_dereference(tn->child[i]); |
194 | } | 180 | } |
195 | 181 | ||
196 | static inline int tnode_child_length(struct tnode *tn) | 182 | static inline int tnode_child_length(const struct tnode *tn) |
197 | { | 183 | { |
198 | return 1<<tn->bits; | 184 | return 1 << tn->bits; |
199 | } | 185 | } |
200 | 186 | ||
201 | /* | ||
202 | _________________________________________________________________ | ||
203 | | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | | ||
204 | ---------------------------------------------------------------- | ||
205 | 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | ||
206 | |||
207 | _________________________________________________________________ | ||
208 | | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | | ||
209 | ----------------------------------------------------------------- | ||
210 | 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | ||
211 | |||
212 | tp->pos = 7 | ||
213 | tp->bits = 3 | ||
214 | n->pos = 15 | ||
215 | n->bits=4 | ||
216 | KEYLENGTH=32 | ||
217 | */ | ||
218 | |||
219 | static inline t_key tkey_extract_bits(t_key a, int offset, int bits) | 187 | static inline t_key tkey_extract_bits(t_key a, int offset, int bits) |
220 | { | 188 | { |
221 | if (offset < KEYLENGTH) | 189 | if (offset < KEYLENGTH) |
222 | return ((t_key)(a << offset)) >> (KEYLENGTH - bits); | 190 | return ((t_key)(a << offset)) >> (KEYLENGTH - bits); |
223 | else | 191 | else |
224 | return 0; | 192 | return 0; |
225 | } | 193 | } |
226 | 194 | ||
@@ -233,8 +201,8 @@ static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b) | |||
233 | { | 201 | { |
234 | if (bits == 0 || offset >= KEYLENGTH) | 202 | if (bits == 0 || offset >= KEYLENGTH) |
235 | return 1; | 203 | return 1; |
236 | bits = bits > KEYLENGTH ? KEYLENGTH : bits; | 204 | bits = bits > KEYLENGTH ? KEYLENGTH : bits; |
237 | return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0; | 205 | return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0; |
238 | } | 206 | } |
239 | 207 | ||
240 | static inline int tkey_mismatch(t_key a, int offset, t_key b) | 208 | static inline int tkey_mismatch(t_key a, int offset, t_key b) |
@@ -249,14 +217,6 @@ static inline int tkey_mismatch(t_key a, int offset, t_key b) | |||
249 | return i; | 217 | return i; |
250 | } | 218 | } |
251 | 219 | ||
252 | /* Candiate for fib_semantics */ | ||
253 | |||
254 | static void fn_free_alias(struct fib_alias *fa) | ||
255 | { | ||
256 | fib_release_info(fa->fa_info); | ||
257 | kmem_cache_free(fn_alias_kmem, fa); | ||
258 | } | ||
259 | |||
260 | /* | 220 | /* |
261 | To understand this stuff, an understanding of keys and all their bits is | 221 | To understand this stuff, an understanding of keys and all their bits is |
262 | necessary. Every node in the trie has a key associated with it, but not | 222 | necessary. Every node in the trie has a key associated with it, but not |
@@ -295,7 +255,7 @@ static void fn_free_alias(struct fib_alias *fa) | |||
295 | tp->pos = 7 | 255 | tp->pos = 7 |
296 | tp->bits = 3 | 256 | tp->bits = 3 |
297 | n->pos = 15 | 257 | n->pos = 15 |
298 | n->bits=4 | 258 | n->bits = 4 |
299 | 259 | ||
300 | First, let's just ignore the bits that come before the parent tp, that is | 260 | First, let's just ignore the bits that come before the parent tp, that is |
301 | the bits from 0 to (tp->pos-1). They are *known* but at this point we do | 261 | the bits from 0 to (tp->pos-1). They are *known* but at this point we do |
@@ -320,60 +280,65 @@ static void fn_free_alias(struct fib_alias *fa) | |||
320 | 280 | ||
321 | */ | 281 | */ |
322 | 282 | ||
323 | static void check_tnode(struct tnode *tn) | 283 | static inline void check_tnode(const struct tnode *tn) |
324 | { | 284 | { |
325 | if (tn && tn->pos+tn->bits > 32) { | 285 | WARN_ON(tn && tn->pos+tn->bits > 32); |
326 | printk("TNODE ERROR tn=%p, pos=%d, bits=%d\n", tn, tn->pos, tn->bits); | ||
327 | } | ||
328 | } | 286 | } |
329 | 287 | ||
330 | static int halve_threshold = 25; | 288 | static int halve_threshold = 25; |
331 | static int inflate_threshold = 50; | 289 | static int inflate_threshold = 50; |
332 | 290 | ||
333 | static struct leaf *leaf_new(void) | 291 | |
292 | static void __alias_free_mem(struct rcu_head *head) | ||
334 | { | 293 | { |
335 | struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL); | 294 | struct fib_alias *fa = container_of(head, struct fib_alias, rcu); |
336 | if (l) { | 295 | kmem_cache_free(fn_alias_kmem, fa); |
337 | NODE_INIT_PARENT(l, T_LEAF); | ||
338 | INIT_HLIST_HEAD(&l->list); | ||
339 | } | ||
340 | return l; | ||
341 | } | 296 | } |
342 | 297 | ||
343 | static struct leaf_info *leaf_info_new(int plen) | 298 | static inline void alias_free_mem_rcu(struct fib_alias *fa) |
344 | { | 299 | { |
345 | struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); | 300 | call_rcu(&fa->rcu, __alias_free_mem); |
346 | if (li) { | 301 | } |
347 | li->plen = plen; | 302 | |
348 | INIT_LIST_HEAD(&li->falh); | 303 | static void __leaf_free_rcu(struct rcu_head *head) |
349 | } | 304 | { |
350 | return li; | 305 | kfree(container_of(head, struct leaf, rcu)); |
306 | } | ||
307 | |||
308 | static inline void free_leaf(struct leaf *leaf) | ||
309 | { | ||
310 | call_rcu(&leaf->rcu, __leaf_free_rcu); | ||
351 | } | 311 | } |
352 | 312 | ||
353 | static inline void free_leaf(struct leaf *l) | 313 | static void __leaf_info_free_rcu(struct rcu_head *head) |
354 | { | 314 | { |
355 | kfree(l); | 315 | kfree(container_of(head, struct leaf_info, rcu)); |
356 | } | 316 | } |
357 | 317 | ||
358 | static inline void free_leaf_info(struct leaf_info *li) | 318 | static inline void free_leaf_info(struct leaf_info *leaf) |
359 | { | 319 | { |
360 | kfree(li); | 320 | call_rcu(&leaf->rcu, __leaf_info_free_rcu); |
361 | } | 321 | } |
362 | 322 | ||
363 | static struct tnode *tnode_alloc(unsigned int size) | 323 | static struct tnode *tnode_alloc(unsigned int size) |
364 | { | 324 | { |
365 | if (size <= PAGE_SIZE) { | 325 | struct page *pages; |
366 | return kmalloc(size, GFP_KERNEL); | 326 | |
367 | } else { | 327 | if (size <= PAGE_SIZE) |
368 | return (struct tnode *) | 328 | return kcalloc(size, 1, GFP_KERNEL); |
369 | __get_free_pages(GFP_KERNEL, get_order(size)); | 329 | |
370 | } | 330 | pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size)); |
331 | if (!pages) | ||
332 | return NULL; | ||
333 | |||
334 | return page_address(pages); | ||
371 | } | 335 | } |
372 | 336 | ||
373 | static void __tnode_free(struct tnode *tn) | 337 | static void __tnode_free_rcu(struct rcu_head *head) |
374 | { | 338 | { |
339 | struct tnode *tn = container_of(head, struct tnode, rcu); | ||
375 | unsigned int size = sizeof(struct tnode) + | 340 | unsigned int size = sizeof(struct tnode) + |
376 | (1<<tn->bits) * sizeof(struct node *); | 341 | (1 << tn->bits) * sizeof(struct node *); |
377 | 342 | ||
378 | if (size <= PAGE_SIZE) | 343 | if (size <= PAGE_SIZE) |
379 | kfree(tn); | 344 | kfree(tn); |
@@ -381,15 +346,40 @@ static void __tnode_free(struct tnode *tn) | |||
381 | free_pages((unsigned long)tn, get_order(size)); | 346 | free_pages((unsigned long)tn, get_order(size)); |
382 | } | 347 | } |
383 | 348 | ||
349 | static inline void tnode_free(struct tnode *tn) | ||
350 | { | ||
351 | call_rcu(&tn->rcu, __tnode_free_rcu); | ||
352 | } | ||
353 | |||
354 | static struct leaf *leaf_new(void) | ||
355 | { | ||
356 | struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL); | ||
357 | if (l) { | ||
358 | l->parent = T_LEAF; | ||
359 | INIT_HLIST_HEAD(&l->list); | ||
360 | } | ||
361 | return l; | ||
362 | } | ||
363 | |||
364 | static struct leaf_info *leaf_info_new(int plen) | ||
365 | { | ||
366 | struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); | ||
367 | if (li) { | ||
368 | li->plen = plen; | ||
369 | INIT_LIST_HEAD(&li->falh); | ||
370 | } | ||
371 | return li; | ||
372 | } | ||
373 | |||
384 | static struct tnode* tnode_new(t_key key, int pos, int bits) | 374 | static struct tnode* tnode_new(t_key key, int pos, int bits) |
385 | { | 375 | { |
386 | int nchildren = 1<<bits; | 376 | int nchildren = 1<<bits; |
387 | int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *); | 377 | int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *); |
388 | struct tnode *tn = tnode_alloc(sz); | 378 | struct tnode *tn = tnode_alloc(sz); |
389 | 379 | ||
390 | if (tn) { | 380 | if (tn) { |
391 | memset(tn, 0, sz); | 381 | memset(tn, 0, sz); |
392 | NODE_INIT_PARENT(tn, T_TNODE); | 382 | tn->parent = T_TNODE; |
393 | tn->pos = pos; | 383 | tn->pos = pos; |
394 | tn->bits = bits; | 384 | tn->bits = bits; |
395 | tn->key = key; | 385 | tn->key = key; |
@@ -397,38 +387,17 @@ static struct tnode* tnode_new(t_key key, int pos, int bits) | |||
397 | tn->empty_children = 1<<bits; | 387 | tn->empty_children = 1<<bits; |
398 | } | 388 | } |
399 | 389 | ||
400 | if (trie_debug > 0) | 390 | pr_debug("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode), |
401 | printk("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode), | 391 | (unsigned int) (sizeof(struct node) * 1<<bits)); |
402 | (unsigned int) (sizeof(struct node) * 1<<bits)); | ||
403 | return tn; | 392 | return tn; |
404 | } | 393 | } |
405 | 394 | ||
406 | static void tnode_free(struct tnode *tn) | ||
407 | { | ||
408 | if (!tn) { | ||
409 | trie_bug("tnode_free\n"); | ||
410 | } | ||
411 | if (IS_LEAF(tn)) { | ||
412 | free_leaf((struct leaf *)tn); | ||
413 | if (trie_debug > 0 ) | ||
414 | printk("FL %p \n", tn); | ||
415 | } | ||
416 | else if (IS_TNODE(tn)) { | ||
417 | __tnode_free(tn); | ||
418 | if (trie_debug > 0 ) | ||
419 | printk("FT %p \n", tn); | ||
420 | } | ||
421 | else { | ||
422 | trie_bug("tnode_free\n"); | ||
423 | } | ||
424 | } | ||
425 | |||
426 | /* | 395 | /* |
427 | * Check whether a tnode 'n' is "full", i.e. it is an internal node | 396 | * Check whether a tnode 'n' is "full", i.e. it is an internal node |
428 | * and no bits are skipped. See discussion in dyntree paper p. 6 | 397 | * and no bits are skipped. See discussion in dyntree paper p. 6 |
429 | */ | 398 | */ |
430 | 399 | ||
431 | static inline int tnode_full(struct tnode *tn, struct node *n) | 400 | static inline int tnode_full(const struct tnode *tn, const struct node *n) |
432 | { | 401 | { |
433 | if (n == NULL || IS_LEAF(n)) | 402 | if (n == NULL || IS_LEAF(n)) |
434 | return 0; | 403 | return 0; |
@@ -448,15 +417,11 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i, struct nod | |||
448 | 417 | ||
449 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull) | 418 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull) |
450 | { | 419 | { |
451 | struct node *chi; | 420 | struct node *chi = tn->child[i]; |
452 | int isfull; | 421 | int isfull; |
453 | 422 | ||
454 | if (i >= 1<<tn->bits) { | 423 | BUG_ON(i >= 1<<tn->bits); |
455 | printk("bits=%d, i=%d\n", tn->bits, i); | 424 | |
456 | trie_bug("tnode_put_child_reorg bits"); | ||
457 | } | ||
458 | write_lock_bh(&fib_lock); | ||
459 | chi = tn->child[i]; | ||
460 | 425 | ||
461 | /* update emptyChildren */ | 426 | /* update emptyChildren */ |
462 | if (n == NULL && chi != NULL) | 427 | if (n == NULL && chi != NULL) |
@@ -465,33 +430,32 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int w | |||
465 | tn->empty_children--; | 430 | tn->empty_children--; |
466 | 431 | ||
467 | /* update fullChildren */ | 432 | /* update fullChildren */ |
468 | if (wasfull == -1) | 433 | if (wasfull == -1) |
469 | wasfull = tnode_full(tn, chi); | 434 | wasfull = tnode_full(tn, chi); |
470 | 435 | ||
471 | isfull = tnode_full(tn, n); | 436 | isfull = tnode_full(tn, n); |
472 | if (wasfull && !isfull) | 437 | if (wasfull && !isfull) |
473 | tn->full_children--; | 438 | tn->full_children--; |
474 | |||
475 | else if (!wasfull && isfull) | 439 | else if (!wasfull && isfull) |
476 | tn->full_children++; | 440 | tn->full_children++; |
441 | |||
477 | if (n) | 442 | if (n) |
478 | NODE_SET_PARENT(n, tn); | 443 | NODE_SET_PARENT(n, tn); |
479 | 444 | ||
480 | tn->child[i] = n; | 445 | rcu_assign_pointer(tn->child[i], n); |
481 | write_unlock_bh(&fib_lock); | ||
482 | } | 446 | } |
483 | 447 | ||
484 | static struct node *resize(struct trie *t, struct tnode *tn) | 448 | static struct node *resize(struct trie *t, struct tnode *tn) |
485 | { | 449 | { |
486 | int i; | 450 | int i; |
487 | int err = 0; | 451 | int err = 0; |
452 | struct tnode *old_tn; | ||
488 | 453 | ||
489 | if (!tn) | 454 | if (!tn) |
490 | return NULL; | 455 | return NULL; |
491 | 456 | ||
492 | if (trie_debug) | 457 | pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", |
493 | printk("In tnode_resize %p inflate_threshold=%d threshold=%d\n", | 458 | tn, inflate_threshold, halve_threshold); |
494 | tn, inflate_threshold, halve_threshold); | ||
495 | 459 | ||
496 | /* No children */ | 460 | /* No children */ |
497 | if (tn->empty_children == tnode_child_length(tn)) { | 461 | if (tn->empty_children == tnode_child_length(tn)) { |
@@ -501,20 +465,16 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
501 | /* One child */ | 465 | /* One child */ |
502 | if (tn->empty_children == tnode_child_length(tn) - 1) | 466 | if (tn->empty_children == tnode_child_length(tn) - 1) |
503 | for (i = 0; i < tnode_child_length(tn); i++) { | 467 | for (i = 0; i < tnode_child_length(tn); i++) { |
468 | struct node *n; | ||
504 | 469 | ||
505 | write_lock_bh(&fib_lock); | 470 | n = tn->child[i]; |
506 | if (tn->child[i] != NULL) { | 471 | if (!n) |
507 | 472 | continue; | |
508 | /* compress one level */ | ||
509 | struct node *n = tn->child[i]; | ||
510 | if (n) | ||
511 | NODE_INIT_PARENT(n, NODE_TYPE(n)); | ||
512 | 473 | ||
513 | write_unlock_bh(&fib_lock); | 474 | /* compress one level */ |
514 | tnode_free(tn); | 475 | NODE_SET_PARENT(n, NULL); |
515 | return n; | 476 | tnode_free(tn); |
516 | } | 477 | return n; |
517 | write_unlock_bh(&fib_lock); | ||
518 | } | 478 | } |
519 | /* | 479 | /* |
520 | * Double as long as the resulting node has a number of | 480 | * Double as long as the resulting node has a number of |
@@ -566,16 +526,16 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
566 | * | 526 | * |
567 | * expand not_to_be_doubled and to_be_doubled, and shorten: | 527 | * expand not_to_be_doubled and to_be_doubled, and shorten: |
568 | * 100 * (tnode_child_length(tn) - tn->empty_children + | 528 | * 100 * (tnode_child_length(tn) - tn->empty_children + |
569 | * tn->full_children ) >= inflate_threshold * new_child_length | 529 | * tn->full_children) >= inflate_threshold * new_child_length |
570 | * | 530 | * |
571 | * expand new_child_length: | 531 | * expand new_child_length: |
572 | * 100 * (tnode_child_length(tn) - tn->empty_children + | 532 | * 100 * (tnode_child_length(tn) - tn->empty_children + |
573 | * tn->full_children ) >= | 533 | * tn->full_children) >= |
574 | * inflate_threshold * tnode_child_length(tn) * 2 | 534 | * inflate_threshold * tnode_child_length(tn) * 2 |
575 | * | 535 | * |
576 | * shorten again: | 536 | * shorten again: |
577 | * 50 * (tn->full_children + tnode_child_length(tn) - | 537 | * 50 * (tn->full_children + tnode_child_length(tn) - |
578 | * tn->empty_children ) >= inflate_threshold * | 538 | * tn->empty_children) >= inflate_threshold * |
579 | * tnode_child_length(tn) | 539 | * tnode_child_length(tn) |
580 | * | 540 | * |
581 | */ | 541 | */ |
@@ -587,9 +547,10 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
587 | 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= | 547 | 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= |
588 | inflate_threshold * tnode_child_length(tn))) { | 548 | inflate_threshold * tnode_child_length(tn))) { |
589 | 549 | ||
590 | tn = inflate(t, tn, &err); | 550 | old_tn = tn; |
591 | 551 | tn = inflate(t, tn); | |
592 | if (err) { | 552 | if (IS_ERR(tn)) { |
553 | tn = old_tn; | ||
593 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 554 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
594 | t->stats.resize_node_skipped++; | 555 | t->stats.resize_node_skipped++; |
595 | #endif | 556 | #endif |
@@ -609,9 +570,10 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
609 | 100 * (tnode_child_length(tn) - tn->empty_children) < | 570 | 100 * (tnode_child_length(tn) - tn->empty_children) < |
610 | halve_threshold * tnode_child_length(tn)) { | 571 | halve_threshold * tnode_child_length(tn)) { |
611 | 572 | ||
612 | tn = halve(t, tn, &err); | 573 | old_tn = tn; |
613 | 574 | tn = halve(t, tn); | |
614 | if (err) { | 575 | if (IS_ERR(tn)) { |
576 | tn = old_tn; | ||
615 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 577 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
616 | t->stats.resize_node_skipped++; | 578 | t->stats.resize_node_skipped++; |
617 | #endif | 579 | #endif |
@@ -621,44 +583,37 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
621 | 583 | ||
622 | 584 | ||
623 | /* Only one child remains */ | 585 | /* Only one child remains */ |
624 | |||
625 | if (tn->empty_children == tnode_child_length(tn) - 1) | 586 | if (tn->empty_children == tnode_child_length(tn) - 1) |
626 | for (i = 0; i < tnode_child_length(tn); i++) { | 587 | for (i = 0; i < tnode_child_length(tn); i++) { |
627 | 588 | struct node *n; | |
628 | write_lock_bh(&fib_lock); | 589 | |
629 | if (tn->child[i] != NULL) { | 590 | n = tn->child[i]; |
630 | /* compress one level */ | 591 | if (!n) |
631 | struct node *n = tn->child[i]; | 592 | continue; |
632 | 593 | ||
633 | if (n) | 594 | /* compress one level */ |
634 | NODE_INIT_PARENT(n, NODE_TYPE(n)); | 595 | |
635 | 596 | NODE_SET_PARENT(n, NULL); | |
636 | write_unlock_bh(&fib_lock); | 597 | tnode_free(tn); |
637 | tnode_free(tn); | 598 | return n; |
638 | return n; | ||
639 | } | ||
640 | write_unlock_bh(&fib_lock); | ||
641 | } | 599 | } |
642 | 600 | ||
643 | return (struct node *) tn; | 601 | return (struct node *) tn; |
644 | } | 602 | } |
645 | 603 | ||
646 | static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err) | 604 | static struct tnode *inflate(struct trie *t, struct tnode *tn) |
647 | { | 605 | { |
648 | struct tnode *inode; | 606 | struct tnode *inode; |
649 | struct tnode *oldtnode = tn; | 607 | struct tnode *oldtnode = tn; |
650 | int olen = tnode_child_length(tn); | 608 | int olen = tnode_child_length(tn); |
651 | int i; | 609 | int i; |
652 | 610 | ||
653 | if (trie_debug) | 611 | pr_debug("In inflate\n"); |
654 | printk("In inflate\n"); | ||
655 | 612 | ||
656 | tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); | 613 | tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); |
657 | 614 | ||
658 | if (!tn) { | 615 | if (!tn) |
659 | *err = -ENOMEM; | 616 | return ERR_PTR(-ENOMEM); |
660 | return oldtnode; | ||
661 | } | ||
662 | 617 | ||
663 | /* | 618 | /* |
664 | * Preallocate and store tnodes before the actual work so we | 619 | * Preallocate and store tnodes before the actual work so we |
@@ -666,8 +621,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err) | |||
666 | * fails. In case of failure we return the oldnode and inflate | 621 | * fails. In case of failure we return the oldnode and inflate |
667 | * of tnode is ignored. | 622 | * of tnode is ignored. |
668 | */ | 623 | */ |
669 | 624 | ||
670 | for(i = 0; i < olen; i++) { | 625 | for (i = 0; i < olen; i++) { |
671 | struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i); | 626 | struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i); |
672 | 627 | ||
673 | if (inode && | 628 | if (inode && |
@@ -675,46 +630,30 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err) | |||
675 | inode->pos == oldtnode->pos + oldtnode->bits && | 630 | inode->pos == oldtnode->pos + oldtnode->bits && |
676 | inode->bits > 1) { | 631 | inode->bits > 1) { |
677 | struct tnode *left, *right; | 632 | struct tnode *left, *right; |
678 | |||
679 | t_key m = TKEY_GET_MASK(inode->pos, 1); | 633 | t_key m = TKEY_GET_MASK(inode->pos, 1); |
680 | 634 | ||
681 | left = tnode_new(inode->key&(~m), inode->pos + 1, | 635 | left = tnode_new(inode->key&(~m), inode->pos + 1, |
682 | inode->bits - 1); | 636 | inode->bits - 1); |
637 | if (!left) | ||
638 | goto nomem; | ||
683 | 639 | ||
684 | if (!left) { | ||
685 | *err = -ENOMEM; | ||
686 | break; | ||
687 | } | ||
688 | |||
689 | right = tnode_new(inode->key|m, inode->pos + 1, | 640 | right = tnode_new(inode->key|m, inode->pos + 1, |
690 | inode->bits - 1); | 641 | inode->bits - 1); |
691 | 642 | ||
692 | if (!right) { | 643 | if (!right) { |
693 | *err = -ENOMEM; | 644 | tnode_free(left); |
694 | break; | 645 | goto nomem; |
695 | } | 646 | } |
696 | 647 | ||
697 | put_child(t, tn, 2*i, (struct node *) left); | 648 | put_child(t, tn, 2*i, (struct node *) left); |
698 | put_child(t, tn, 2*i+1, (struct node *) right); | 649 | put_child(t, tn, 2*i+1, (struct node *) right); |
699 | } | 650 | } |
700 | } | 651 | } |
701 | 652 | ||
702 | if (*err) { | 653 | for (i = 0; i < olen; i++) { |
703 | int size = tnode_child_length(tn); | ||
704 | int j; | ||
705 | |||
706 | for(j = 0; j < size; j++) | ||
707 | if (tn->child[j]) | ||
708 | tnode_free((struct tnode *)tn->child[j]); | ||
709 | |||
710 | tnode_free(tn); | ||
711 | |||
712 | *err = -ENOMEM; | ||
713 | return oldtnode; | ||
714 | } | ||
715 | |||
716 | for(i = 0; i < olen; i++) { | ||
717 | struct node *node = tnode_get_child(oldtnode, i); | 654 | struct node *node = tnode_get_child(oldtnode, i); |
655 | struct tnode *left, *right; | ||
656 | int size, j; | ||
718 | 657 | ||
719 | /* An empty child */ | 658 | /* An empty child */ |
720 | if (node == NULL) | 659 | if (node == NULL) |
@@ -740,76 +679,82 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err) | |||
740 | put_child(t, tn, 2*i+1, inode->child[1]); | 679 | put_child(t, tn, 2*i+1, inode->child[1]); |
741 | 680 | ||
742 | tnode_free(inode); | 681 | tnode_free(inode); |
682 | continue; | ||
743 | } | 683 | } |
744 | 684 | ||
745 | /* An internal node with more than two children */ | 685 | /* An internal node with more than two children */ |
746 | else { | 686 | |
747 | struct tnode *left, *right; | 687 | /* We will replace this node 'inode' with two new |
748 | int size, j; | 688 | * ones, 'left' and 'right', each with half of the |
749 | 689 | * original children. The two new nodes will have | |
750 | /* We will replace this node 'inode' with two new | 690 | * a position one bit further down the key and this |
751 | * ones, 'left' and 'right', each with half of the | 691 | * means that the "significant" part of their keys |
752 | * original children. The two new nodes will have | 692 | * (see the discussion near the top of this file) |
753 | * a position one bit further down the key and this | 693 | * will differ by one bit, which will be "0" in |
754 | * means that the "significant" part of their keys | 694 | * left's key and "1" in right's key. Since we are |
755 | * (see the discussion near the top of this file) | 695 | * moving the key position by one step, the bit that |
756 | * will differ by one bit, which will be "0" in | 696 | * we are moving away from - the bit at position |
757 | * left's key and "1" in right's key. Since we are | 697 | * (inode->pos) - is the one that will differ between |
758 | * moving the key position by one step, the bit that | 698 | * left and right. So... we synthesize that bit in the |
759 | * we are moving away from - the bit at position | 699 | * two new keys. |
760 | * (inode->pos) - is the one that will differ between | 700 | * The mask 'm' below will be a single "one" bit at |
761 | * left and right. So... we synthesize that bit in the | 701 | * the position (inode->pos) |
762 | * two new keys. | 702 | */ |
763 | * The mask 'm' below will be a single "one" bit at | ||
764 | * the position (inode->pos) | ||
765 | */ | ||
766 | |||
767 | /* Use the old key, but set the new significant | ||
768 | * bit to zero. | ||
769 | */ | ||
770 | 703 | ||
771 | left = (struct tnode *) tnode_get_child(tn, 2*i); | 704 | /* Use the old key, but set the new significant |
772 | put_child(t, tn, 2*i, NULL); | 705 | * bit to zero. |
706 | */ | ||
773 | 707 | ||
774 | if (!left) | 708 | left = (struct tnode *) tnode_get_child(tn, 2*i); |
775 | BUG(); | 709 | put_child(t, tn, 2*i, NULL); |
776 | 710 | ||
777 | right = (struct tnode *) tnode_get_child(tn, 2*i+1); | 711 | BUG_ON(!left); |
778 | put_child(t, tn, 2*i+1, NULL); | ||
779 | 712 | ||
780 | if (!right) | 713 | right = (struct tnode *) tnode_get_child(tn, 2*i+1); |
781 | BUG(); | 714 | put_child(t, tn, 2*i+1, NULL); |
782 | 715 | ||
783 | size = tnode_child_length(left); | 716 | BUG_ON(!right); |
784 | for(j = 0; j < size; j++) { | ||
785 | put_child(t, left, j, inode->child[j]); | ||
786 | put_child(t, right, j, inode->child[j + size]); | ||
787 | } | ||
788 | put_child(t, tn, 2*i, resize(t, left)); | ||
789 | put_child(t, tn, 2*i+1, resize(t, right)); | ||
790 | 717 | ||
791 | tnode_free(inode); | 718 | size = tnode_child_length(left); |
719 | for (j = 0; j < size; j++) { | ||
720 | put_child(t, left, j, inode->child[j]); | ||
721 | put_child(t, right, j, inode->child[j + size]); | ||
792 | } | 722 | } |
723 | put_child(t, tn, 2*i, resize(t, left)); | ||
724 | put_child(t, tn, 2*i+1, resize(t, right)); | ||
725 | |||
726 | tnode_free(inode); | ||
793 | } | 727 | } |
794 | tnode_free(oldtnode); | 728 | tnode_free(oldtnode); |
795 | return tn; | 729 | return tn; |
730 | nomem: | ||
731 | { | ||
732 | int size = tnode_child_length(tn); | ||
733 | int j; | ||
734 | |||
735 | for (j = 0; j < size; j++) | ||
736 | if (tn->child[j]) | ||
737 | tnode_free((struct tnode *)tn->child[j]); | ||
738 | |||
739 | tnode_free(tn); | ||
740 | |||
741 | return ERR_PTR(-ENOMEM); | ||
742 | } | ||
796 | } | 743 | } |
797 | 744 | ||
798 | static struct tnode *halve(struct trie *t, struct tnode *tn, int *err) | 745 | static struct tnode *halve(struct trie *t, struct tnode *tn) |
799 | { | 746 | { |
800 | struct tnode *oldtnode = tn; | 747 | struct tnode *oldtnode = tn; |
801 | struct node *left, *right; | 748 | struct node *left, *right; |
802 | int i; | 749 | int i; |
803 | int olen = tnode_child_length(tn); | 750 | int olen = tnode_child_length(tn); |
804 | 751 | ||
805 | if (trie_debug) printk("In halve\n"); | 752 | pr_debug("In halve\n"); |
806 | 753 | ||
807 | tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); | 754 | tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); |
808 | 755 | ||
809 | if (!tn) { | 756 | if (!tn) |
810 | *err = -ENOMEM; | 757 | return ERR_PTR(-ENOMEM); |
811 | return oldtnode; | ||
812 | } | ||
813 | 758 | ||
814 | /* | 759 | /* |
815 | * Preallocate and store tnodes before the actual work so we | 760 | * Preallocate and store tnodes before the actual work so we |
@@ -818,38 +763,27 @@ static struct tnode *halve(struct trie *t, struct tnode *tn, int *err) | |||
818 | * of tnode is ignored. | 763 | * of tnode is ignored. |
819 | */ | 764 | */ |
820 | 765 | ||
821 | for(i = 0; i < olen; i += 2) { | 766 | for (i = 0; i < olen; i += 2) { |
822 | left = tnode_get_child(oldtnode, i); | 767 | left = tnode_get_child(oldtnode, i); |
823 | right = tnode_get_child(oldtnode, i+1); | 768 | right = tnode_get_child(oldtnode, i+1); |
824 | 769 | ||
825 | /* Two nonempty children */ | 770 | /* Two nonempty children */ |
826 | if (left && right) { | 771 | if (left && right) { |
827 | struct tnode *newBinNode = | 772 | struct tnode *newn; |
828 | tnode_new(left->key, tn->pos + tn->bits, 1); | ||
829 | 773 | ||
830 | if (!newBinNode) { | 774 | newn = tnode_new(left->key, tn->pos + tn->bits, 1); |
831 | *err = -ENOMEM; | ||
832 | break; | ||
833 | } | ||
834 | put_child(t, tn, i/2, (struct node *)newBinNode); | ||
835 | } | ||
836 | } | ||
837 | 775 | ||
838 | if (*err) { | 776 | if (!newn) |
839 | int size = tnode_child_length(tn); | 777 | goto nomem; |
840 | int j; | ||
841 | 778 | ||
842 | for(j = 0; j < size; j++) | 779 | put_child(t, tn, i/2, (struct node *)newn); |
843 | if (tn->child[j]) | 780 | } |
844 | tnode_free((struct tnode *)tn->child[j]); | ||
845 | 781 | ||
846 | tnode_free(tn); | ||
847 | |||
848 | *err = -ENOMEM; | ||
849 | return oldtnode; | ||
850 | } | 782 | } |
851 | 783 | ||
852 | for(i = 0; i < olen; i += 2) { | 784 | for (i = 0; i < olen; i += 2) { |
785 | struct tnode *newBinNode; | ||
786 | |||
853 | left = tnode_get_child(oldtnode, i); | 787 | left = tnode_get_child(oldtnode, i); |
854 | right = tnode_get_child(oldtnode, i+1); | 788 | right = tnode_get_child(oldtnode, i+1); |
855 | 789 | ||
@@ -858,88 +792,99 @@ static struct tnode *halve(struct trie *t, struct tnode *tn, int *err) | |||
858 | if (right == NULL) /* Both are empty */ | 792 | if (right == NULL) /* Both are empty */ |
859 | continue; | 793 | continue; |
860 | put_child(t, tn, i/2, right); | 794 | put_child(t, tn, i/2, right); |
861 | } else if (right == NULL) | 795 | continue; |
796 | } | ||
797 | |||
798 | if (right == NULL) { | ||
862 | put_child(t, tn, i/2, left); | 799 | put_child(t, tn, i/2, left); |
800 | continue; | ||
801 | } | ||
863 | 802 | ||
864 | /* Two nonempty children */ | 803 | /* Two nonempty children */ |
865 | else { | 804 | newBinNode = (struct tnode *) tnode_get_child(tn, i/2); |
866 | struct tnode *newBinNode = | 805 | put_child(t, tn, i/2, NULL); |
867 | (struct tnode *) tnode_get_child(tn, i/2); | 806 | put_child(t, newBinNode, 0, left); |
868 | put_child(t, tn, i/2, NULL); | 807 | put_child(t, newBinNode, 1, right); |
869 | 808 | put_child(t, tn, i/2, resize(t, newBinNode)); | |
870 | if (!newBinNode) | ||
871 | BUG(); | ||
872 | |||
873 | put_child(t, newBinNode, 0, left); | ||
874 | put_child(t, newBinNode, 1, right); | ||
875 | put_child(t, tn, i/2, resize(t, newBinNode)); | ||
876 | } | ||
877 | } | 809 | } |
878 | tnode_free(oldtnode); | 810 | tnode_free(oldtnode); |
879 | return tn; | 811 | return tn; |
812 | nomem: | ||
813 | { | ||
814 | int size = tnode_child_length(tn); | ||
815 | int j; | ||
816 | |||
817 | for (j = 0; j < size; j++) | ||
818 | if (tn->child[j]) | ||
819 | tnode_free((struct tnode *)tn->child[j]); | ||
820 | |||
821 | tnode_free(tn); | ||
822 | |||
823 | return ERR_PTR(-ENOMEM); | ||
824 | } | ||
880 | } | 825 | } |
881 | 826 | ||
882 | static void *trie_init(struct trie *t) | 827 | static void trie_init(struct trie *t) |
883 | { | 828 | { |
884 | if (t) { | 829 | if (!t) |
885 | t->size = 0; | 830 | return; |
886 | t->trie = NULL; | 831 | |
887 | t->revision = 0; | 832 | t->size = 0; |
833 | rcu_assign_pointer(t->trie, NULL); | ||
834 | t->revision = 0; | ||
888 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 835 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
889 | memset(&t->stats, 0, sizeof(struct trie_use_stats)); | 836 | memset(&t->stats, 0, sizeof(struct trie_use_stats)); |
890 | #endif | 837 | #endif |
891 | } | ||
892 | return t; | ||
893 | } | 838 | } |
894 | 839 | ||
840 | /* readside most use rcu_read_lock currently dump routines | ||
841 | via get_fa_head and dump */ | ||
842 | |||
895 | static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) | 843 | static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) |
896 | { | 844 | { |
897 | struct hlist_node *node; | 845 | struct hlist_node *node; |
898 | struct leaf_info *li; | 846 | struct leaf_info *li; |
899 | 847 | ||
900 | hlist_for_each_entry(li, node, head, hlist) { | 848 | hlist_for_each_entry_rcu(li, node, head, hlist) |
901 | if (li->plen == plen) | 849 | if (li->plen == plen) |
902 | return li; | 850 | return li; |
903 | } | 851 | |
904 | return NULL; | 852 | return NULL; |
905 | } | 853 | } |
906 | 854 | ||
907 | static inline struct list_head * get_fa_head(struct leaf *l, int plen) | 855 | static inline struct list_head * get_fa_head(struct leaf *l, int plen) |
908 | { | 856 | { |
909 | struct list_head *fa_head = NULL; | ||
910 | struct leaf_info *li = find_leaf_info(&l->list, plen); | 857 | struct leaf_info *li = find_leaf_info(&l->list, plen); |
911 | 858 | ||
912 | if (li) | 859 | if (!li) |
913 | fa_head = &li->falh; | 860 | return NULL; |
914 | 861 | ||
915 | return fa_head; | 862 | return &li->falh; |
916 | } | 863 | } |
917 | 864 | ||
918 | static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) | 865 | static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) |
919 | { | 866 | { |
920 | struct leaf_info *li = NULL, *last = NULL; | 867 | struct leaf_info *li = NULL, *last = NULL; |
921 | struct hlist_node *node, *tmp; | 868 | struct hlist_node *node; |
922 | 869 | ||
923 | write_lock_bh(&fib_lock); | 870 | if (hlist_empty(head)) { |
924 | 871 | hlist_add_head_rcu(&new->hlist, head); | |
925 | if (hlist_empty(head)) | 872 | } else { |
926 | hlist_add_head(&new->hlist, head); | 873 | hlist_for_each_entry(li, node, head, hlist) { |
927 | else { | 874 | if (new->plen > li->plen) |
928 | hlist_for_each_entry_safe(li, node, tmp, head, hlist) { | 875 | break; |
929 | 876 | ||
930 | if (new->plen > li->plen) | 877 | last = li; |
931 | break; | 878 | } |
932 | 879 | if (last) | |
933 | last = li; | 880 | hlist_add_after_rcu(&last->hlist, &new->hlist); |
934 | } | 881 | else |
935 | if (last) | 882 | hlist_add_before_rcu(&new->hlist, &li->hlist); |
936 | hlist_add_after(&last->hlist, &new->hlist); | 883 | } |
937 | else | ||
938 | hlist_add_before(&new->hlist, &li->hlist); | ||
939 | } | ||
940 | write_unlock_bh(&fib_lock); | ||
941 | } | 884 | } |
942 | 885 | ||
886 | /* rcu_read_lock needs to be hold by caller from readside */ | ||
887 | |||
943 | static struct leaf * | 888 | static struct leaf * |
944 | fib_find_node(struct trie *t, u32 key) | 889 | fib_find_node(struct trie *t, u32 key) |
945 | { | 890 | { |
@@ -948,61 +893,43 @@ fib_find_node(struct trie *t, u32 key) | |||
948 | struct node *n; | 893 | struct node *n; |
949 | 894 | ||
950 | pos = 0; | 895 | pos = 0; |
951 | n = t->trie; | 896 | n = rcu_dereference(t->trie); |
952 | 897 | ||
953 | while (n != NULL && NODE_TYPE(n) == T_TNODE) { | 898 | while (n != NULL && NODE_TYPE(n) == T_TNODE) { |
954 | tn = (struct tnode *) n; | 899 | tn = (struct tnode *) n; |
955 | 900 | ||
956 | check_tnode(tn); | 901 | check_tnode(tn); |
957 | 902 | ||
958 | if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { | 903 | if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { |
959 | pos=tn->pos + tn->bits; | 904 | pos = tn->pos + tn->bits; |
960 | n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); | 905 | n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); |
961 | } | 906 | } else |
962 | else | ||
963 | break; | 907 | break; |
964 | } | 908 | } |
965 | /* Case we have found a leaf. Compare prefixes */ | 909 | /* Case we have found a leaf. Compare prefixes */ |
966 | 910 | ||
967 | if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { | 911 | if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) |
968 | struct leaf *l = (struct leaf *) n; | 912 | return (struct leaf *)n; |
969 | return l; | 913 | |
970 | } | ||
971 | return NULL; | 914 | return NULL; |
972 | } | 915 | } |
973 | 916 | ||
974 | static struct node *trie_rebalance(struct trie *t, struct tnode *tn) | 917 | static struct node *trie_rebalance(struct trie *t, struct tnode *tn) |
975 | { | 918 | { |
976 | int i = 0; | ||
977 | int wasfull; | 919 | int wasfull; |
978 | t_key cindex, key; | 920 | t_key cindex, key; |
979 | struct tnode *tp = NULL; | 921 | struct tnode *tp = NULL; |
980 | 922 | ||
981 | if (!tn) | ||
982 | BUG(); | ||
983 | |||
984 | key = tn->key; | 923 | key = tn->key; |
985 | i = 0; | ||
986 | 924 | ||
987 | while (tn != NULL && NODE_PARENT(tn) != NULL) { | 925 | while (tn != NULL && NODE_PARENT(tn) != NULL) { |
988 | 926 | ||
989 | if (i > 10) { | ||
990 | printk("Rebalance tn=%p \n", tn); | ||
991 | if (tn) printk("tn->parent=%p \n", NODE_PARENT(tn)); | ||
992 | |||
993 | printk("Rebalance tp=%p \n", tp); | ||
994 | if (tp) printk("tp->parent=%p \n", NODE_PARENT(tp)); | ||
995 | } | ||
996 | |||
997 | if (i > 12) BUG(); | ||
998 | i++; | ||
999 | |||
1000 | tp = NODE_PARENT(tn); | 927 | tp = NODE_PARENT(tn); |
1001 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 928 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
1002 | wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); | 929 | wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); |
1003 | tn = (struct tnode *) resize (t, (struct tnode *)tn); | 930 | tn = (struct tnode *) resize (t, (struct tnode *)tn); |
1004 | tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull); | 931 | tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull); |
1005 | 932 | ||
1006 | if (!NODE_PARENT(tn)) | 933 | if (!NODE_PARENT(tn)) |
1007 | break; | 934 | break; |
1008 | 935 | ||
@@ -1015,6 +942,8 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) | |||
1015 | return (struct node*) tn; | 942 | return (struct node*) tn; |
1016 | } | 943 | } |
1017 | 944 | ||
945 | /* only used from updater-side */ | ||
946 | |||
1018 | static struct list_head * | 947 | static struct list_head * |
1019 | fib_insert_node(struct trie *t, int *err, u32 key, int plen) | 948 | fib_insert_node(struct trie *t, int *err, u32 key, int plen) |
1020 | { | 949 | { |
@@ -1050,20 +979,16 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) | |||
1050 | 979 | ||
1051 | while (n != NULL && NODE_TYPE(n) == T_TNODE) { | 980 | while (n != NULL && NODE_TYPE(n) == T_TNODE) { |
1052 | tn = (struct tnode *) n; | 981 | tn = (struct tnode *) n; |
1053 | 982 | ||
1054 | check_tnode(tn); | 983 | check_tnode(tn); |
1055 | 984 | ||
1056 | if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { | 985 | if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { |
1057 | tp = tn; | 986 | tp = tn; |
1058 | pos=tn->pos + tn->bits; | 987 | pos = tn->pos + tn->bits; |
1059 | n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); | 988 | n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); |
1060 | 989 | ||
1061 | if (n && NODE_PARENT(n) != tn) { | 990 | BUG_ON(n && NODE_PARENT(n) != tn); |
1062 | printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n)); | 991 | } else |
1063 | BUG(); | ||
1064 | } | ||
1065 | } | ||
1066 | else | ||
1067 | break; | 992 | break; |
1068 | } | 993 | } |
1069 | 994 | ||
@@ -1073,17 +998,15 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) | |||
1073 | * tp is n's (parent) ----> NULL or TNODE | 998 | * tp is n's (parent) ----> NULL or TNODE |
1074 | */ | 999 | */ |
1075 | 1000 | ||
1076 | if (tp && IS_LEAF(tp)) | 1001 | BUG_ON(tp && IS_LEAF(tp)); |
1077 | BUG(); | ||
1078 | |||
1079 | 1002 | ||
1080 | /* Case 1: n is a leaf. Compare prefixes */ | 1003 | /* Case 1: n is a leaf. Compare prefixes */ |
1081 | 1004 | ||
1082 | if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { | 1005 | if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { |
1083 | struct leaf *l = ( struct leaf *) n; | 1006 | struct leaf *l = (struct leaf *) n; |
1084 | 1007 | ||
1085 | li = leaf_info_new(plen); | 1008 | li = leaf_info_new(plen); |
1086 | 1009 | ||
1087 | if (!li) { | 1010 | if (!li) { |
1088 | *err = -ENOMEM; | 1011 | *err = -ENOMEM; |
1089 | goto err; | 1012 | goto err; |
@@ -1113,35 +1036,29 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) | |||
1113 | fa_head = &li->falh; | 1036 | fa_head = &li->falh; |
1114 | insert_leaf_info(&l->list, li); | 1037 | insert_leaf_info(&l->list, li); |
1115 | 1038 | ||
1116 | /* Case 2: n is NULL, and will just insert a new leaf */ | ||
1117 | if (t->trie && n == NULL) { | 1039 | if (t->trie && n == NULL) { |
1040 | /* Case 2: n is NULL, and will just insert a new leaf */ | ||
1118 | 1041 | ||
1119 | NODE_SET_PARENT(l, tp); | 1042 | NODE_SET_PARENT(l, tp); |
1120 | |||
1121 | if (!tp) | ||
1122 | BUG(); | ||
1123 | 1043 | ||
1124 | else { | 1044 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
1125 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 1045 | put_child(t, (struct tnode *)tp, cindex, (struct node *)l); |
1126 | put_child(t, (struct tnode *)tp, cindex, (struct node *)l); | 1046 | } else { |
1127 | } | 1047 | /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ |
1128 | } | ||
1129 | /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ | ||
1130 | else { | ||
1131 | /* | 1048 | /* |
1132 | * Add a new tnode here | 1049 | * Add a new tnode here |
1133 | * first tnode need some special handling | 1050 | * first tnode need some special handling |
1134 | */ | 1051 | */ |
1135 | 1052 | ||
1136 | if (tp) | 1053 | if (tp) |
1137 | pos=tp->pos+tp->bits; | 1054 | pos = tp->pos+tp->bits; |
1138 | else | 1055 | else |
1139 | pos=0; | 1056 | pos = 0; |
1057 | |||
1140 | if (n) { | 1058 | if (n) { |
1141 | newpos = tkey_mismatch(key, pos, n->key); | 1059 | newpos = tkey_mismatch(key, pos, n->key); |
1142 | tn = tnode_new(n->key, newpos, 1); | 1060 | tn = tnode_new(n->key, newpos, 1); |
1143 | } | 1061 | } else { |
1144 | else { | ||
1145 | newpos = 0; | 1062 | newpos = 0; |
1146 | tn = tnode_new(key, newpos, 1); /* First tnode */ | 1063 | tn = tnode_new(key, newpos, 1); /* First tnode */ |
1147 | } | 1064 | } |
@@ -1151,32 +1068,33 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) | |||
1151 | tnode_free((struct tnode *) l); | 1068 | tnode_free((struct tnode *) l); |
1152 | *err = -ENOMEM; | 1069 | *err = -ENOMEM; |
1153 | goto err; | 1070 | goto err; |
1154 | } | 1071 | } |
1155 | 1072 | ||
1156 | NODE_SET_PARENT(tn, tp); | 1073 | NODE_SET_PARENT(tn, tp); |
1157 | 1074 | ||
1158 | missbit=tkey_extract_bits(key, newpos, 1); | 1075 | missbit = tkey_extract_bits(key, newpos, 1); |
1159 | put_child(t, tn, missbit, (struct node *)l); | 1076 | put_child(t, tn, missbit, (struct node *)l); |
1160 | put_child(t, tn, 1-missbit, n); | 1077 | put_child(t, tn, 1-missbit, n); |
1161 | 1078 | ||
1162 | if (tp) { | 1079 | if (tp) { |
1163 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 1080 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
1164 | put_child(t, (struct tnode *)tp, cindex, (struct node *)tn); | 1081 | put_child(t, (struct tnode *)tp, cindex, (struct node *)tn); |
1165 | } | 1082 | } else { |
1166 | else { | 1083 | rcu_assign_pointer(t->trie, (struct node *)tn); /* First tnode */ |
1167 | t->trie = (struct node*) tn; /* First tnode */ | ||
1168 | tp = tn; | 1084 | tp = tn; |
1169 | } | 1085 | } |
1170 | } | 1086 | } |
1171 | if (tp && tp->pos+tp->bits > 32) { | 1087 | |
1088 | if (tp && tp->pos + tp->bits > 32) | ||
1172 | printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", | 1089 | printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", |
1173 | tp, tp->pos, tp->bits, key, plen); | 1090 | tp, tp->pos, tp->bits, key, plen); |
1174 | } | 1091 | |
1175 | /* Rebalance the trie */ | 1092 | /* Rebalance the trie */ |
1176 | t->trie = trie_rebalance(t, tp); | 1093 | |
1094 | rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); | ||
1177 | done: | 1095 | done: |
1178 | t->revision++; | 1096 | t->revision++; |
1179 | err:; | 1097 | err: |
1180 | return fa_head; | 1098 | return fa_head; |
1181 | } | 1099 | } |
1182 | 1100 | ||
@@ -1204,17 +1122,18 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1204 | 1122 | ||
1205 | key = ntohl(key); | 1123 | key = ntohl(key); |
1206 | 1124 | ||
1207 | if (trie_debug) | 1125 | pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen); |
1208 | printk("Insert table=%d %08x/%d\n", tb->tb_id, key, plen); | ||
1209 | 1126 | ||
1210 | mask = ntohl( inet_make_mask(plen) ); | 1127 | mask = ntohl(inet_make_mask(plen)); |
1211 | 1128 | ||
1212 | if (key & ~mask) | 1129 | if (key & ~mask) |
1213 | return -EINVAL; | 1130 | return -EINVAL; |
1214 | 1131 | ||
1215 | key = key & mask; | 1132 | key = key & mask; |
1216 | 1133 | ||
1217 | if ((fi = fib_create_info(r, rta, nlhdr, &err)) == NULL) | 1134 | fi = fib_create_info(r, rta, nlhdr, &err); |
1135 | |||
1136 | if (!fi) | ||
1218 | goto err; | 1137 | goto err; |
1219 | 1138 | ||
1220 | l = fib_find_node(t, key); | 1139 | l = fib_find_node(t, key); |
@@ -1236,8 +1155,7 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1236 | * and we need to allocate a new one of those as well. | 1155 | * and we need to allocate a new one of those as well. |
1237 | */ | 1156 | */ |
1238 | 1157 | ||
1239 | if (fa && | 1158 | if (fa && fa->fa_info->fib_priority == fi->fib_priority) { |
1240 | fa->fa_info->fib_priority == fi->fib_priority) { | ||
1241 | struct fib_alias *fa_orig; | 1159 | struct fib_alias *fa_orig; |
1242 | 1160 | ||
1243 | err = -EEXIST; | 1161 | err = -EEXIST; |
@@ -1248,22 +1166,27 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1248 | struct fib_info *fi_drop; | 1166 | struct fib_info *fi_drop; |
1249 | u8 state; | 1167 | u8 state; |
1250 | 1168 | ||
1251 | write_lock_bh(&fib_lock); | 1169 | err = -ENOBUFS; |
1170 | new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL); | ||
1171 | if (new_fa == NULL) | ||
1172 | goto out; | ||
1252 | 1173 | ||
1253 | fi_drop = fa->fa_info; | 1174 | fi_drop = fa->fa_info; |
1254 | fa->fa_info = fi; | 1175 | new_fa->fa_tos = fa->fa_tos; |
1255 | fa->fa_type = type; | 1176 | new_fa->fa_info = fi; |
1256 | fa->fa_scope = r->rtm_scope; | 1177 | new_fa->fa_type = type; |
1178 | new_fa->fa_scope = r->rtm_scope; | ||
1257 | state = fa->fa_state; | 1179 | state = fa->fa_state; |
1258 | fa->fa_state &= ~FA_S_ACCESSED; | 1180 | new_fa->fa_state &= ~FA_S_ACCESSED; |
1259 | 1181 | ||
1260 | write_unlock_bh(&fib_lock); | 1182 | list_replace_rcu(&fa->fa_list, &new_fa->fa_list); |
1183 | alias_free_mem_rcu(fa); | ||
1261 | 1184 | ||
1262 | fib_release_info(fi_drop); | 1185 | fib_release_info(fi_drop); |
1263 | if (state & FA_S_ACCESSED) | 1186 | if (state & FA_S_ACCESSED) |
1264 | rt_cache_flush(-1); | 1187 | rt_cache_flush(-1); |
1265 | 1188 | ||
1266 | goto succeeded; | 1189 | goto succeeded; |
1267 | } | 1190 | } |
1268 | /* Error if we find a perfect match which | 1191 | /* Error if we find a perfect match which |
1269 | * uses the same scope, type, and nexthop | 1192 | * uses the same scope, type, and nexthop |
@@ -1285,7 +1208,7 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1285 | fa = fa_orig; | 1208 | fa = fa_orig; |
1286 | } | 1209 | } |
1287 | err = -ENOENT; | 1210 | err = -ENOENT; |
1288 | if (!(nlhdr->nlmsg_flags&NLM_F_CREATE)) | 1211 | if (!(nlhdr->nlmsg_flags & NLM_F_CREATE)) |
1289 | goto out; | 1212 | goto out; |
1290 | 1213 | ||
1291 | err = -ENOBUFS; | 1214 | err = -ENOBUFS; |
@@ -1298,9 +1221,6 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1298 | new_fa->fa_type = type; | 1221 | new_fa->fa_type = type; |
1299 | new_fa->fa_scope = r->rtm_scope; | 1222 | new_fa->fa_scope = r->rtm_scope; |
1300 | new_fa->fa_state = 0; | 1223 | new_fa->fa_state = 0; |
1301 | #if 0 | ||
1302 | new_fa->dst = NULL; | ||
1303 | #endif | ||
1304 | /* | 1224 | /* |
1305 | * Insert new entry to the list. | 1225 | * Insert new entry to the list. |
1306 | */ | 1226 | */ |
@@ -1312,12 +1232,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1312 | goto out_free_new_fa; | 1232 | goto out_free_new_fa; |
1313 | } | 1233 | } |
1314 | 1234 | ||
1315 | write_lock_bh(&fib_lock); | 1235 | list_add_tail_rcu(&new_fa->fa_list, |
1316 | 1236 | (fa ? &fa->fa_list : fa_head)); | |
1317 | list_add_tail(&new_fa->fa_list, | ||
1318 | (fa ? &fa->fa_list : fa_head)); | ||
1319 | |||
1320 | write_unlock_bh(&fib_lock); | ||
1321 | 1237 | ||
1322 | rt_cache_flush(-1); | 1238 | rt_cache_flush(-1); |
1323 | rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req); | 1239 | rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req); |
@@ -1328,38 +1244,40 @@ out_free_new_fa: | |||
1328 | kmem_cache_free(fn_alias_kmem, new_fa); | 1244 | kmem_cache_free(fn_alias_kmem, new_fa); |
1329 | out: | 1245 | out: |
1330 | fib_release_info(fi); | 1246 | fib_release_info(fi); |
1331 | err:; | 1247 | err: |
1332 | return err; | 1248 | return err; |
1333 | } | 1249 | } |
1334 | 1250 | ||
1335 | static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp, | 1251 | |
1336 | struct fib_result *res, int *err) | 1252 | /* should be clalled with rcu_read_lock */ |
1253 | static inline int check_leaf(struct trie *t, struct leaf *l, | ||
1254 | t_key key, int *plen, const struct flowi *flp, | ||
1255 | struct fib_result *res) | ||
1337 | { | 1256 | { |
1338 | int i; | 1257 | int err, i; |
1339 | t_key mask; | 1258 | t_key mask; |
1340 | struct leaf_info *li; | 1259 | struct leaf_info *li; |
1341 | struct hlist_head *hhead = &l->list; | 1260 | struct hlist_head *hhead = &l->list; |
1342 | struct hlist_node *node; | 1261 | struct hlist_node *node; |
1343 | 1262 | ||
1344 | hlist_for_each_entry(li, node, hhead, hlist) { | 1263 | hlist_for_each_entry_rcu(li, node, hhead, hlist) { |
1345 | |||
1346 | i = li->plen; | 1264 | i = li->plen; |
1347 | mask = ntohl(inet_make_mask(i)); | 1265 | mask = ntohl(inet_make_mask(i)); |
1348 | if (l->key != (key & mask)) | 1266 | if (l->key != (key & mask)) |
1349 | continue; | 1267 | continue; |
1350 | 1268 | ||
1351 | if (((*err) = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) == 0) { | 1269 | if ((err = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) <= 0) { |
1352 | *plen = i; | 1270 | *plen = i; |
1353 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 1271 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
1354 | t->stats.semantic_match_passed++; | 1272 | t->stats.semantic_match_passed++; |
1355 | #endif | 1273 | #endif |
1356 | return 1; | 1274 | return err; |
1357 | } | 1275 | } |
1358 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 1276 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
1359 | t->stats.semantic_match_miss++; | 1277 | t->stats.semantic_match_miss++; |
1360 | #endif | 1278 | #endif |
1361 | } | 1279 | } |
1362 | return 0; | 1280 | return 1; |
1363 | } | 1281 | } |
1364 | 1282 | ||
1365 | static int | 1283 | static int |
@@ -1370,13 +1288,17 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result | |||
1370 | struct node *n; | 1288 | struct node *n; |
1371 | struct tnode *pn; | 1289 | struct tnode *pn; |
1372 | int pos, bits; | 1290 | int pos, bits; |
1373 | t_key key=ntohl(flp->fl4_dst); | 1291 | t_key key = ntohl(flp->fl4_dst); |
1374 | int chopped_off; | 1292 | int chopped_off; |
1375 | t_key cindex = 0; | 1293 | t_key cindex = 0; |
1376 | int current_prefix_length = KEYLENGTH; | 1294 | int current_prefix_length = KEYLENGTH; |
1377 | n = t->trie; | 1295 | struct tnode *cn; |
1296 | t_key node_prefix, key_prefix, pref_mismatch; | ||
1297 | int mp; | ||
1298 | |||
1299 | rcu_read_lock(); | ||
1378 | 1300 | ||
1379 | read_lock(&fib_lock); | 1301 | n = rcu_dereference(t->trie); |
1380 | if (!n) | 1302 | if (!n) |
1381 | goto failed; | 1303 | goto failed; |
1382 | 1304 | ||
@@ -1386,15 +1308,14 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result | |||
1386 | 1308 | ||
1387 | /* Just a leaf? */ | 1309 | /* Just a leaf? */ |
1388 | if (IS_LEAF(n)) { | 1310 | if (IS_LEAF(n)) { |
1389 | if (check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret)) | 1311 | if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0) |
1390 | goto found; | 1312 | goto found; |
1391 | goto failed; | 1313 | goto failed; |
1392 | } | 1314 | } |
1393 | pn = (struct tnode *) n; | 1315 | pn = (struct tnode *) n; |
1394 | chopped_off = 0; | 1316 | chopped_off = 0; |
1395 | 1317 | ||
1396 | while (pn) { | 1318 | while (pn) { |
1397 | |||
1398 | pos = pn->pos; | 1319 | pos = pn->pos; |
1399 | bits = pn->bits; | 1320 | bits = pn->bits; |
1400 | 1321 | ||
@@ -1410,130 +1331,129 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result | |||
1410 | goto backtrace; | 1331 | goto backtrace; |
1411 | } | 1332 | } |
1412 | 1333 | ||
1413 | if (IS_TNODE(n)) { | 1334 | if (IS_LEAF(n)) { |
1335 | if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0) | ||
1336 | goto found; | ||
1337 | else | ||
1338 | goto backtrace; | ||
1339 | } | ||
1340 | |||
1414 | #define HL_OPTIMIZE | 1341 | #define HL_OPTIMIZE |
1415 | #ifdef HL_OPTIMIZE | 1342 | #ifdef HL_OPTIMIZE |
1416 | struct tnode *cn = (struct tnode *)n; | 1343 | cn = (struct tnode *)n; |
1417 | t_key node_prefix, key_prefix, pref_mismatch; | ||
1418 | int mp; | ||
1419 | 1344 | ||
1420 | /* | 1345 | /* |
1421 | * It's a tnode, and we can do some extra checks here if we | 1346 | * It's a tnode, and we can do some extra checks here if we |
1422 | * like, to avoid descending into a dead-end branch. | 1347 | * like, to avoid descending into a dead-end branch. |
1423 | * This tnode is in the parent's child array at index | 1348 | * This tnode is in the parent's child array at index |
1424 | * key[p_pos..p_pos+p_bits] but potentially with some bits | 1349 | * key[p_pos..p_pos+p_bits] but potentially with some bits |
1425 | * chopped off, so in reality the index may be just a | 1350 | * chopped off, so in reality the index may be just a |
1426 | * subprefix, padded with zero at the end. | 1351 | * subprefix, padded with zero at the end. |
1427 | * We can also take a look at any skipped bits in this | 1352 | * We can also take a look at any skipped bits in this |
1428 | * tnode - everything up to p_pos is supposed to be ok, | 1353 | * tnode - everything up to p_pos is supposed to be ok, |
1429 | * and the non-chopped bits of the index (se previous | 1354 | * and the non-chopped bits of the index (se previous |
1430 | * paragraph) are also guaranteed ok, but the rest is | 1355 | * paragraph) are also guaranteed ok, but the rest is |
1431 | * considered unknown. | 1356 | * considered unknown. |
1432 | * | 1357 | * |
1433 | * The skipped bits are key[pos+bits..cn->pos]. | 1358 | * The skipped bits are key[pos+bits..cn->pos]. |
1434 | */ | 1359 | */ |
1435 | |||
1436 | /* If current_prefix_length < pos+bits, we are already doing | ||
1437 | * actual prefix matching, which means everything from | ||
1438 | * pos+(bits-chopped_off) onward must be zero along some | ||
1439 | * branch of this subtree - otherwise there is *no* valid | ||
1440 | * prefix present. Here we can only check the skipped | ||
1441 | * bits. Remember, since we have already indexed into the | ||
1442 | * parent's child array, we know that the bits we chopped of | ||
1443 | * *are* zero. | ||
1444 | */ | ||
1445 | 1360 | ||
1446 | /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */ | 1361 | /* If current_prefix_length < pos+bits, we are already doing |
1447 | 1362 | * actual prefix matching, which means everything from | |
1448 | if (current_prefix_length < pos+bits) { | 1363 | * pos+(bits-chopped_off) onward must be zero along some |
1449 | if (tkey_extract_bits(cn->key, current_prefix_length, | 1364 | * branch of this subtree - otherwise there is *no* valid |
1450 | cn->pos - current_prefix_length) != 0 || | 1365 | * prefix present. Here we can only check the skipped |
1451 | !(cn->child[0])) | 1366 | * bits. Remember, since we have already indexed into the |
1452 | goto backtrace; | 1367 | * parent's child array, we know that the bits we chopped of |
1453 | } | 1368 | * *are* zero. |
1369 | */ | ||
1454 | 1370 | ||
1455 | /* | 1371 | /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */ |
1456 | * If chopped_off=0, the index is fully validated and we | ||
1457 | * only need to look at the skipped bits for this, the new, | ||
1458 | * tnode. What we actually want to do is to find out if | ||
1459 | * these skipped bits match our key perfectly, or if we will | ||
1460 | * have to count on finding a matching prefix further down, | ||
1461 | * because if we do, we would like to have some way of | ||
1462 | * verifying the existence of such a prefix at this point. | ||
1463 | */ | ||
1464 | 1372 | ||
1465 | /* The only thing we can do at this point is to verify that | 1373 | if (current_prefix_length < pos+bits) { |
1466 | * any such matching prefix can indeed be a prefix to our | 1374 | if (tkey_extract_bits(cn->key, current_prefix_length, |
1467 | * key, and if the bits in the node we are inspecting that | 1375 | cn->pos - current_prefix_length) != 0 || |
1468 | * do not match our key are not ZERO, this cannot be true. | 1376 | !(cn->child[0])) |
1469 | * Thus, find out where there is a mismatch (before cn->pos) | 1377 | goto backtrace; |
1470 | * and verify that all the mismatching bits are zero in the | 1378 | } |
1471 | * new tnode's key. | ||
1472 | */ | ||
1473 | 1379 | ||
1474 | /* Note: We aren't very concerned about the piece of the key | 1380 | /* |
1475 | * that precede pn->pos+pn->bits, since these have already been | 1381 | * If chopped_off=0, the index is fully validated and we |
1476 | * checked. The bits after cn->pos aren't checked since these are | 1382 | * only need to look at the skipped bits for this, the new, |
1477 | * by definition "unknown" at this point. Thus, what we want to | 1383 | * tnode. What we actually want to do is to find out if |
1478 | * see is if we are about to enter the "prefix matching" state, | 1384 | * these skipped bits match our key perfectly, or if we will |
1479 | * and in that case verify that the skipped bits that will prevail | 1385 | * have to count on finding a matching prefix further down, |
1480 | * throughout this subtree are zero, as they have to be if we are | 1386 | * because if we do, we would like to have some way of |
1481 | * to find a matching prefix. | 1387 | * verifying the existence of such a prefix at this point. |
1482 | */ | 1388 | */ |
1483 | 1389 | ||
1484 | node_prefix = MASK_PFX(cn->key, cn->pos); | 1390 | /* The only thing we can do at this point is to verify that |
1485 | key_prefix = MASK_PFX(key, cn->pos); | 1391 | * any such matching prefix can indeed be a prefix to our |
1486 | pref_mismatch = key_prefix^node_prefix; | 1392 | * key, and if the bits in the node we are inspecting that |
1487 | mp = 0; | 1393 | * do not match our key are not ZERO, this cannot be true. |
1394 | * Thus, find out where there is a mismatch (before cn->pos) | ||
1395 | * and verify that all the mismatching bits are zero in the | ||
1396 | * new tnode's key. | ||
1397 | */ | ||
1488 | 1398 | ||
1489 | /* In short: If skipped bits in this node do not match the search | 1399 | /* Note: We aren't very concerned about the piece of the key |
1490 | * key, enter the "prefix matching" state.directly. | 1400 | * that precede pn->pos+pn->bits, since these have already been |
1491 | */ | 1401 | * checked. The bits after cn->pos aren't checked since these are |
1492 | if (pref_mismatch) { | 1402 | * by definition "unknown" at this point. Thus, what we want to |
1493 | while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) { | 1403 | * see is if we are about to enter the "prefix matching" state, |
1494 | mp++; | 1404 | * and in that case verify that the skipped bits that will prevail |
1495 | pref_mismatch = pref_mismatch <<1; | 1405 | * throughout this subtree are zero, as they have to be if we are |
1496 | } | 1406 | * to find a matching prefix. |
1497 | key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp); | 1407 | */ |
1498 | 1408 | ||
1499 | if (key_prefix != 0) | 1409 | node_prefix = MASK_PFX(cn->key, cn->pos); |
1500 | goto backtrace; | 1410 | key_prefix = MASK_PFX(key, cn->pos); |
1501 | 1411 | pref_mismatch = key_prefix^node_prefix; | |
1502 | if (current_prefix_length >= cn->pos) | 1412 | mp = 0; |
1503 | current_prefix_length=mp; | 1413 | |
1504 | } | 1414 | /* In short: If skipped bits in this node do not match the search |
1505 | #endif | 1415 | * key, enter the "prefix matching" state.directly. |
1506 | pn = (struct tnode *)n; /* Descend */ | 1416 | */ |
1507 | chopped_off = 0; | 1417 | if (pref_mismatch) { |
1508 | continue; | 1418 | while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) { |
1419 | mp++; | ||
1420 | pref_mismatch = pref_mismatch <<1; | ||
1421 | } | ||
1422 | key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp); | ||
1423 | |||
1424 | if (key_prefix != 0) | ||
1425 | goto backtrace; | ||
1426 | |||
1427 | if (current_prefix_length >= cn->pos) | ||
1428 | current_prefix_length = mp; | ||
1509 | } | 1429 | } |
1510 | if (IS_LEAF(n)) { | 1430 | #endif |
1511 | if (check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret)) | 1431 | pn = (struct tnode *)n; /* Descend */ |
1512 | goto found; | 1432 | chopped_off = 0; |
1513 | } | 1433 | continue; |
1434 | |||
1514 | backtrace: | 1435 | backtrace: |
1515 | chopped_off++; | 1436 | chopped_off++; |
1516 | 1437 | ||
1517 | /* As zero don't change the child key (cindex) */ | 1438 | /* As zero don't change the child key (cindex) */ |
1518 | while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) { | 1439 | while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) |
1519 | chopped_off++; | 1440 | chopped_off++; |
1520 | } | ||
1521 | 1441 | ||
1522 | /* Decrease current_... with bits chopped off */ | 1442 | /* Decrease current_... with bits chopped off */ |
1523 | if (current_prefix_length > pn->pos + pn->bits - chopped_off) | 1443 | if (current_prefix_length > pn->pos + pn->bits - chopped_off) |
1524 | current_prefix_length = pn->pos + pn->bits - chopped_off; | 1444 | current_prefix_length = pn->pos + pn->bits - chopped_off; |
1525 | 1445 | ||
1526 | /* | 1446 | /* |
1527 | * Either we do the actual chop off according or if we have | 1447 | * Either we do the actual chop off according or if we have |
1528 | * chopped off all bits in this tnode walk up to our parent. | 1448 | * chopped off all bits in this tnode walk up to our parent. |
1529 | */ | 1449 | */ |
1530 | 1450 | ||
1531 | if (chopped_off <= pn->bits) | 1451 | if (chopped_off <= pn->bits) { |
1532 | cindex &= ~(1 << (chopped_off-1)); | 1452 | cindex &= ~(1 << (chopped_off-1)); |
1533 | else { | 1453 | } else { |
1534 | if (NODE_PARENT(pn) == NULL) | 1454 | if (NODE_PARENT(pn) == NULL) |
1535 | goto failed; | 1455 | goto failed; |
1536 | 1456 | ||
1537 | /* Get Child's index */ | 1457 | /* Get Child's index */ |
1538 | cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits); | 1458 | cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits); |
1539 | pn = NODE_PARENT(pn); | 1459 | pn = NODE_PARENT(pn); |
@@ -1548,10 +1468,11 @@ backtrace: | |||
1548 | failed: | 1468 | failed: |
1549 | ret = 1; | 1469 | ret = 1; |
1550 | found: | 1470 | found: |
1551 | read_unlock(&fib_lock); | 1471 | rcu_read_unlock(); |
1552 | return ret; | 1472 | return ret; |
1553 | } | 1473 | } |
1554 | 1474 | ||
1475 | /* only called from updater side */ | ||
1555 | static int trie_leaf_remove(struct trie *t, t_key key) | 1476 | static int trie_leaf_remove(struct trie *t, t_key key) |
1556 | { | 1477 | { |
1557 | t_key cindex; | 1478 | t_key cindex; |
@@ -1559,24 +1480,20 @@ static int trie_leaf_remove(struct trie *t, t_key key) | |||
1559 | struct node *n = t->trie; | 1480 | struct node *n = t->trie; |
1560 | struct leaf *l; | 1481 | struct leaf *l; |
1561 | 1482 | ||
1562 | if (trie_debug) | 1483 | pr_debug("entering trie_leaf_remove(%p)\n", n); |
1563 | printk("entering trie_leaf_remove(%p)\n", n); | ||
1564 | 1484 | ||
1565 | /* Note that in the case skipped bits, those bits are *not* checked! | 1485 | /* Note that in the case skipped bits, those bits are *not* checked! |
1566 | * When we finish this, we will have NULL or a T_LEAF, and the | 1486 | * When we finish this, we will have NULL or a T_LEAF, and the |
1567 | * T_LEAF may or may not match our key. | 1487 | * T_LEAF may or may not match our key. |
1568 | */ | 1488 | */ |
1569 | 1489 | ||
1570 | while (n != NULL && IS_TNODE(n)) { | 1490 | while (n != NULL && IS_TNODE(n)) { |
1571 | struct tnode *tn = (struct tnode *) n; | 1491 | struct tnode *tn = (struct tnode *) n; |
1572 | check_tnode(tn); | 1492 | check_tnode(tn); |
1573 | n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits)); | 1493 | n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits)); |
1574 | 1494 | ||
1575 | if (n && NODE_PARENT(n) != tn) { | 1495 | BUG_ON(n && NODE_PARENT(n) != tn); |
1576 | printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n)); | 1496 | } |
1577 | BUG(); | ||
1578 | } | ||
1579 | } | ||
1580 | l = (struct leaf *) n; | 1497 | l = (struct leaf *) n; |
1581 | 1498 | ||
1582 | if (!n || !tkey_equals(l->key, key)) | 1499 | if (!n || !tkey_equals(l->key, key)) |
@@ -1590,23 +1507,24 @@ static int trie_leaf_remove(struct trie *t, t_key key) | |||
1590 | t->revision++; | 1507 | t->revision++; |
1591 | t->size--; | 1508 | t->size--; |
1592 | 1509 | ||
1510 | preempt_disable(); | ||
1593 | tp = NODE_PARENT(n); | 1511 | tp = NODE_PARENT(n); |
1594 | tnode_free((struct tnode *) n); | 1512 | tnode_free((struct tnode *) n); |
1595 | 1513 | ||
1596 | if (tp) { | 1514 | if (tp) { |
1597 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 1515 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
1598 | put_child(t, (struct tnode *)tp, cindex, NULL); | 1516 | put_child(t, (struct tnode *)tp, cindex, NULL); |
1599 | t->trie = trie_rebalance(t, tp); | 1517 | rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); |
1600 | } | 1518 | } else |
1601 | else | 1519 | rcu_assign_pointer(t->trie, NULL); |
1602 | t->trie = NULL; | 1520 | preempt_enable(); |
1603 | 1521 | ||
1604 | return 1; | 1522 | return 1; |
1605 | } | 1523 | } |
1606 | 1524 | ||
1607 | static int | 1525 | static int |
1608 | fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | 1526 | fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, |
1609 | struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) | 1527 | struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) |
1610 | { | 1528 | { |
1611 | struct trie *t = (struct trie *) tb->tb_data; | 1529 | struct trie *t = (struct trie *) tb->tb_data; |
1612 | u32 key, mask; | 1530 | u32 key, mask; |
@@ -1615,6 +1533,8 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1615 | struct fib_alias *fa, *fa_to_delete; | 1533 | struct fib_alias *fa, *fa_to_delete; |
1616 | struct list_head *fa_head; | 1534 | struct list_head *fa_head; |
1617 | struct leaf *l; | 1535 | struct leaf *l; |
1536 | struct leaf_info *li; | ||
1537 | |||
1618 | 1538 | ||
1619 | if (plen > 32) | 1539 | if (plen > 32) |
1620 | return -EINVAL; | 1540 | return -EINVAL; |
@@ -1624,7 +1544,7 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1624 | memcpy(&key, rta->rta_dst, 4); | 1544 | memcpy(&key, rta->rta_dst, 4); |
1625 | 1545 | ||
1626 | key = ntohl(key); | 1546 | key = ntohl(key); |
1627 | mask = ntohl( inet_make_mask(plen) ); | 1547 | mask = ntohl(inet_make_mask(plen)); |
1628 | 1548 | ||
1629 | if (key & ~mask) | 1549 | if (key & ~mask) |
1630 | return -EINVAL; | 1550 | return -EINVAL; |
@@ -1641,11 +1561,11 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1641 | if (!fa) | 1561 | if (!fa) |
1642 | return -ESRCH; | 1562 | return -ESRCH; |
1643 | 1563 | ||
1644 | if (trie_debug) | 1564 | pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); |
1645 | printk("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); | ||
1646 | 1565 | ||
1647 | fa_to_delete = NULL; | 1566 | fa_to_delete = NULL; |
1648 | fa_head = fa->fa_list.prev; | 1567 | fa_head = fa->fa_list.prev; |
1568 | |||
1649 | list_for_each_entry(fa, fa_head, fa_list) { | 1569 | list_for_each_entry(fa, fa_head, fa_list) { |
1650 | struct fib_info *fi = fa->fa_info; | 1570 | struct fib_info *fi = fa->fa_info; |
1651 | 1571 | ||
@@ -1664,39 +1584,31 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1664 | } | 1584 | } |
1665 | } | 1585 | } |
1666 | 1586 | ||
1667 | if (fa_to_delete) { | 1587 | if (!fa_to_delete) |
1668 | int kill_li = 0; | 1588 | return -ESRCH; |
1669 | struct leaf_info *li; | ||
1670 | |||
1671 | fa = fa_to_delete; | ||
1672 | rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); | ||
1673 | 1589 | ||
1674 | l = fib_find_node(t, key); | 1590 | fa = fa_to_delete; |
1675 | li = find_leaf_info(&l->list, plen); | 1591 | rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); |
1676 | 1592 | ||
1677 | write_lock_bh(&fib_lock); | 1593 | l = fib_find_node(t, key); |
1594 | li = find_leaf_info(&l->list, plen); | ||
1678 | 1595 | ||
1679 | list_del(&fa->fa_list); | 1596 | list_del_rcu(&fa->fa_list); |
1680 | 1597 | ||
1681 | if (list_empty(fa_head)) { | 1598 | if (list_empty(fa_head)) { |
1682 | hlist_del(&li->hlist); | 1599 | hlist_del_rcu(&li->hlist); |
1683 | kill_li = 1; | 1600 | free_leaf_info(li); |
1684 | } | 1601 | } |
1685 | write_unlock_bh(&fib_lock); | ||
1686 | |||
1687 | if (kill_li) | ||
1688 | free_leaf_info(li); | ||
1689 | 1602 | ||
1690 | if (hlist_empty(&l->list)) | 1603 | if (hlist_empty(&l->list)) |
1691 | trie_leaf_remove(t, key); | 1604 | trie_leaf_remove(t, key); |
1692 | 1605 | ||
1693 | if (fa->fa_state & FA_S_ACCESSED) | 1606 | if (fa->fa_state & FA_S_ACCESSED) |
1694 | rt_cache_flush(-1); | 1607 | rt_cache_flush(-1); |
1695 | 1608 | ||
1696 | fn_free_alias(fa); | 1609 | fib_release_info(fa->fa_info); |
1697 | return 0; | 1610 | alias_free_mem_rcu(fa); |
1698 | } | 1611 | return 0; |
1699 | return -ESRCH; | ||
1700 | } | 1612 | } |
1701 | 1613 | ||
1702 | static int trie_flush_list(struct trie *t, struct list_head *head) | 1614 | static int trie_flush_list(struct trie *t, struct list_head *head) |
@@ -1706,14 +1618,11 @@ static int trie_flush_list(struct trie *t, struct list_head *head) | |||
1706 | 1618 | ||
1707 | list_for_each_entry_safe(fa, fa_node, head, fa_list) { | 1619 | list_for_each_entry_safe(fa, fa_node, head, fa_list) { |
1708 | struct fib_info *fi = fa->fa_info; | 1620 | struct fib_info *fi = fa->fa_info; |
1709 | |||
1710 | if (fi && (fi->fib_flags&RTNH_F_DEAD)) { | ||
1711 | |||
1712 | write_lock_bh(&fib_lock); | ||
1713 | list_del(&fa->fa_list); | ||
1714 | write_unlock_bh(&fib_lock); | ||
1715 | 1621 | ||
1716 | fn_free_alias(fa); | 1622 | if (fi && (fi->fib_flags & RTNH_F_DEAD)) { |
1623 | list_del_rcu(&fa->fa_list); | ||
1624 | fib_release_info(fa->fa_info); | ||
1625 | alias_free_mem_rcu(fa); | ||
1717 | found++; | 1626 | found++; |
1718 | } | 1627 | } |
1719 | } | 1628 | } |
@@ -1728,37 +1637,34 @@ static int trie_flush_leaf(struct trie *t, struct leaf *l) | |||
1728 | struct leaf_info *li = NULL; | 1637 | struct leaf_info *li = NULL; |
1729 | 1638 | ||
1730 | hlist_for_each_entry_safe(li, node, tmp, lih, hlist) { | 1639 | hlist_for_each_entry_safe(li, node, tmp, lih, hlist) { |
1731 | |||
1732 | found += trie_flush_list(t, &li->falh); | 1640 | found += trie_flush_list(t, &li->falh); |
1733 | 1641 | ||
1734 | if (list_empty(&li->falh)) { | 1642 | if (list_empty(&li->falh)) { |
1735 | 1643 | hlist_del_rcu(&li->hlist); | |
1736 | write_lock_bh(&fib_lock); | ||
1737 | hlist_del(&li->hlist); | ||
1738 | write_unlock_bh(&fib_lock); | ||
1739 | |||
1740 | free_leaf_info(li); | 1644 | free_leaf_info(li); |
1741 | } | 1645 | } |
1742 | } | 1646 | } |
1743 | return found; | 1647 | return found; |
1744 | } | 1648 | } |
1745 | 1649 | ||
1650 | /* rcu_read_lock needs to be hold by caller from readside */ | ||
1651 | |||
1746 | static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) | 1652 | static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) |
1747 | { | 1653 | { |
1748 | struct node *c = (struct node *) thisleaf; | 1654 | struct node *c = (struct node *) thisleaf; |
1749 | struct tnode *p; | 1655 | struct tnode *p; |
1750 | int idx; | 1656 | int idx; |
1657 | struct node *trie = rcu_dereference(t->trie); | ||
1751 | 1658 | ||
1752 | if (c == NULL) { | 1659 | if (c == NULL) { |
1753 | if (t->trie == NULL) | 1660 | if (trie == NULL) |
1754 | return NULL; | 1661 | return NULL; |
1755 | 1662 | ||
1756 | if (IS_LEAF(t->trie)) /* trie w. just a leaf */ | 1663 | if (IS_LEAF(trie)) /* trie w. just a leaf */ |
1757 | return (struct leaf *) t->trie; | 1664 | return (struct leaf *) trie; |
1758 | 1665 | ||
1759 | p = (struct tnode*) t->trie; /* Start */ | 1666 | p = (struct tnode*) trie; /* Start */ |
1760 | } | 1667 | } else |
1761 | else | ||
1762 | p = (struct tnode *) NODE_PARENT(c); | 1668 | p = (struct tnode *) NODE_PARENT(c); |
1763 | 1669 | ||
1764 | while (p) { | 1670 | while (p) { |
@@ -1771,29 +1677,31 @@ static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) | |||
1771 | pos = 0; | 1677 | pos = 0; |
1772 | 1678 | ||
1773 | last = 1 << p->bits; | 1679 | last = 1 << p->bits; |
1774 | for(idx = pos; idx < last ; idx++) { | 1680 | for (idx = pos; idx < last ; idx++) { |
1775 | if (p->child[idx]) { | 1681 | c = rcu_dereference(p->child[idx]); |
1776 | 1682 | ||
1777 | /* Decend if tnode */ | 1683 | if (!c) |
1778 | 1684 | continue; | |
1779 | while (IS_TNODE(p->child[idx])) { | 1685 | |
1780 | p = (struct tnode*) p->child[idx]; | 1686 | /* Decend if tnode */ |
1781 | idx = 0; | 1687 | while (IS_TNODE(c)) { |
1782 | 1688 | p = (struct tnode *) c; | |
1783 | /* Rightmost non-NULL branch */ | 1689 | idx = 0; |
1784 | if (p && IS_TNODE(p)) | 1690 | |
1785 | while (p->child[idx] == NULL && idx < (1 << p->bits)) idx++; | 1691 | /* Rightmost non-NULL branch */ |
1786 | 1692 | if (p && IS_TNODE(p)) | |
1787 | /* Done with this tnode? */ | 1693 | while (!(c = rcu_dereference(p->child[idx])) |
1788 | if (idx >= (1 << p->bits) || p->child[idx] == NULL ) | 1694 | && idx < (1<<p->bits)) idx++; |
1789 | goto up; | 1695 | |
1790 | } | 1696 | /* Done with this tnode? */ |
1791 | return (struct leaf*) p->child[idx]; | 1697 | if (idx >= (1 << p->bits) || !c) |
1698 | goto up; | ||
1792 | } | 1699 | } |
1700 | return (struct leaf *) c; | ||
1793 | } | 1701 | } |
1794 | up: | 1702 | up: |
1795 | /* No more children go up one step */ | 1703 | /* No more children go up one step */ |
1796 | c = (struct node*) p; | 1704 | c = (struct node *) p; |
1797 | p = (struct tnode *) NODE_PARENT(p); | 1705 | p = (struct tnode *) NODE_PARENT(p); |
1798 | } | 1706 | } |
1799 | return NULL; /* Ready. Root of trie */ | 1707 | return NULL; /* Ready. Root of trie */ |
@@ -1807,23 +1715,24 @@ static int fn_trie_flush(struct fib_table *tb) | |||
1807 | 1715 | ||
1808 | t->revision++; | 1716 | t->revision++; |
1809 | 1717 | ||
1810 | for (h=0; (l = nextleaf(t, l)) != NULL; h++) { | 1718 | rcu_read_lock(); |
1719 | for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { | ||
1811 | found += trie_flush_leaf(t, l); | 1720 | found += trie_flush_leaf(t, l); |
1812 | 1721 | ||
1813 | if (ll && hlist_empty(&ll->list)) | 1722 | if (ll && hlist_empty(&ll->list)) |
1814 | trie_leaf_remove(t, ll->key); | 1723 | trie_leaf_remove(t, ll->key); |
1815 | ll = l; | 1724 | ll = l; |
1816 | } | 1725 | } |
1726 | rcu_read_unlock(); | ||
1817 | 1727 | ||
1818 | if (ll && hlist_empty(&ll->list)) | 1728 | if (ll && hlist_empty(&ll->list)) |
1819 | trie_leaf_remove(t, ll->key); | 1729 | trie_leaf_remove(t, ll->key); |
1820 | 1730 | ||
1821 | if (trie_debug) | 1731 | pr_debug("trie_flush found=%d\n", found); |
1822 | printk("trie_flush found=%d\n", found); | ||
1823 | return found; | 1732 | return found; |
1824 | } | 1733 | } |
1825 | 1734 | ||
1826 | static int trie_last_dflt=-1; | 1735 | static int trie_last_dflt = -1; |
1827 | 1736 | ||
1828 | static void | 1737 | static void |
1829 | fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) | 1738 | fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) |
@@ -1840,7 +1749,7 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib | |||
1840 | last_resort = NULL; | 1749 | last_resort = NULL; |
1841 | order = -1; | 1750 | order = -1; |
1842 | 1751 | ||
1843 | read_lock(&fib_lock); | 1752 | rcu_read_lock(); |
1844 | 1753 | ||
1845 | l = fib_find_node(t, 0); | 1754 | l = fib_find_node(t, 0); |
1846 | if (!l) | 1755 | if (!l) |
@@ -1853,20 +1762,20 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib | |||
1853 | if (list_empty(fa_head)) | 1762 | if (list_empty(fa_head)) |
1854 | goto out; | 1763 | goto out; |
1855 | 1764 | ||
1856 | list_for_each_entry(fa, fa_head, fa_list) { | 1765 | list_for_each_entry_rcu(fa, fa_head, fa_list) { |
1857 | struct fib_info *next_fi = fa->fa_info; | 1766 | struct fib_info *next_fi = fa->fa_info; |
1858 | 1767 | ||
1859 | if (fa->fa_scope != res->scope || | 1768 | if (fa->fa_scope != res->scope || |
1860 | fa->fa_type != RTN_UNICAST) | 1769 | fa->fa_type != RTN_UNICAST) |
1861 | continue; | 1770 | continue; |
1862 | 1771 | ||
1863 | if (next_fi->fib_priority > res->fi->fib_priority) | 1772 | if (next_fi->fib_priority > res->fi->fib_priority) |
1864 | break; | 1773 | break; |
1865 | if (!next_fi->fib_nh[0].nh_gw || | 1774 | if (!next_fi->fib_nh[0].nh_gw || |
1866 | next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) | 1775 | next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) |
1867 | continue; | 1776 | continue; |
1868 | fa->fa_state |= FA_S_ACCESSED; | 1777 | fa->fa_state |= FA_S_ACCESSED; |
1869 | 1778 | ||
1870 | if (fi == NULL) { | 1779 | if (fi == NULL) { |
1871 | if (next_fi != res->fi) | 1780 | if (next_fi != res->fi) |
1872 | break; | 1781 | break; |
@@ -1904,7 +1813,7 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib | |||
1904 | } | 1813 | } |
1905 | trie_last_dflt = last_idx; | 1814 | trie_last_dflt = last_idx; |
1906 | out:; | 1815 | out:; |
1907 | read_unlock(&fib_lock); | 1816 | rcu_read_unlock(); |
1908 | } | 1817 | } |
1909 | 1818 | ||
1910 | static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb, | 1819 | static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb, |
@@ -1913,12 +1822,14 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi | |||
1913 | int i, s_i; | 1822 | int i, s_i; |
1914 | struct fib_alias *fa; | 1823 | struct fib_alias *fa; |
1915 | 1824 | ||
1916 | u32 xkey=htonl(key); | 1825 | u32 xkey = htonl(key); |
1917 | 1826 | ||
1918 | s_i=cb->args[3]; | 1827 | s_i = cb->args[3]; |
1919 | i = 0; | 1828 | i = 0; |
1920 | 1829 | ||
1921 | list_for_each_entry(fa, fah, fa_list) { | 1830 | /* rcu_read_lock is hold by caller */ |
1831 | |||
1832 | list_for_each_entry_rcu(fa, fah, fa_list) { | ||
1922 | if (i < s_i) { | 1833 | if (i < s_i) { |
1923 | i++; | 1834 | i++; |
1924 | continue; | 1835 | continue; |
@@ -1946,10 +1857,10 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi | |||
1946 | fa->fa_info, 0) < 0) { | 1857 | fa->fa_info, 0) < 0) { |
1947 | cb->args[3] = i; | 1858 | cb->args[3] = i; |
1948 | return -1; | 1859 | return -1; |
1949 | } | 1860 | } |
1950 | i++; | 1861 | i++; |
1951 | } | 1862 | } |
1952 | cb->args[3]=i; | 1863 | cb->args[3] = i; |
1953 | return skb->len; | 1864 | return skb->len; |
1954 | } | 1865 | } |
1955 | 1866 | ||
@@ -1959,10 +1870,10 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str | |||
1959 | int h, s_h; | 1870 | int h, s_h; |
1960 | struct list_head *fa_head; | 1871 | struct list_head *fa_head; |
1961 | struct leaf *l = NULL; | 1872 | struct leaf *l = NULL; |
1962 | s_h=cb->args[2]; | ||
1963 | 1873 | ||
1964 | for (h=0; (l = nextleaf(t, l)) != NULL; h++) { | 1874 | s_h = cb->args[2]; |
1965 | 1875 | ||
1876 | for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { | ||
1966 | if (h < s_h) | 1877 | if (h < s_h) |
1967 | continue; | 1878 | continue; |
1968 | if (h > s_h) | 1879 | if (h > s_h) |
@@ -1970,7 +1881,7 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str | |||
1970 | sizeof(cb->args) - 3*sizeof(cb->args[0])); | 1881 | sizeof(cb->args) - 3*sizeof(cb->args[0])); |
1971 | 1882 | ||
1972 | fa_head = get_fa_head(l, plen); | 1883 | fa_head = get_fa_head(l, plen); |
1973 | 1884 | ||
1974 | if (!fa_head) | 1885 | if (!fa_head) |
1975 | continue; | 1886 | continue; |
1976 | 1887 | ||
@@ -1978,11 +1889,11 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str | |||
1978 | continue; | 1889 | continue; |
1979 | 1890 | ||
1980 | if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) { | 1891 | if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) { |
1981 | cb->args[2]=h; | 1892 | cb->args[2] = h; |
1982 | return -1; | 1893 | return -1; |
1983 | } | 1894 | } |
1984 | } | 1895 | } |
1985 | cb->args[2]=h; | 1896 | cb->args[2] = h; |
1986 | return skb->len; | 1897 | return skb->len; |
1987 | } | 1898 | } |
1988 | 1899 | ||
@@ -1993,25 +1904,24 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin | |||
1993 | 1904 | ||
1994 | s_m = cb->args[1]; | 1905 | s_m = cb->args[1]; |
1995 | 1906 | ||
1996 | read_lock(&fib_lock); | 1907 | rcu_read_lock(); |
1997 | for (m=0; m<=32; m++) { | 1908 | for (m = 0; m <= 32; m++) { |
1998 | |||
1999 | if (m < s_m) | 1909 | if (m < s_m) |
2000 | continue; | 1910 | continue; |
2001 | if (m > s_m) | 1911 | if (m > s_m) |
2002 | memset(&cb->args[2], 0, | 1912 | memset(&cb->args[2], 0, |
2003 | sizeof(cb->args) - 2*sizeof(cb->args[0])); | 1913 | sizeof(cb->args) - 2*sizeof(cb->args[0])); |
2004 | 1914 | ||
2005 | if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) { | 1915 | if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) { |
2006 | cb->args[1] = m; | 1916 | cb->args[1] = m; |
2007 | goto out; | 1917 | goto out; |
2008 | } | 1918 | } |
2009 | } | 1919 | } |
2010 | read_unlock(&fib_lock); | 1920 | rcu_read_unlock(); |
2011 | cb->args[1] = m; | 1921 | cb->args[1] = m; |
2012 | return skb->len; | 1922 | return skb->len; |
2013 | out: | 1923 | out: |
2014 | read_unlock(&fib_lock); | 1924 | rcu_read_unlock(); |
2015 | return -1; | 1925 | return -1; |
2016 | } | 1926 | } |
2017 | 1927 | ||
@@ -2051,9 +1961,9 @@ struct fib_table * __init fib_hash_init(int id) | |||
2051 | trie_init(t); | 1961 | trie_init(t); |
2052 | 1962 | ||
2053 | if (id == RT_TABLE_LOCAL) | 1963 | if (id == RT_TABLE_LOCAL) |
2054 | trie_local = t; | 1964 | trie_local = t; |
2055 | else if (id == RT_TABLE_MAIN) | 1965 | else if (id == RT_TABLE_MAIN) |
2056 | trie_main = t; | 1966 | trie_main = t; |
2057 | 1967 | ||
2058 | if (id == RT_TABLE_LOCAL) | 1968 | if (id == RT_TABLE_LOCAL) |
2059 | printk("IPv4 FIB: Using LC-trie version %s\n", VERSION); | 1969 | printk("IPv4 FIB: Using LC-trie version %s\n", VERSION); |
@@ -2065,7 +1975,8 @@ struct fib_table * __init fib_hash_init(int id) | |||
2065 | 1975 | ||
2066 | static void putspace_seq(struct seq_file *seq, int n) | 1976 | static void putspace_seq(struct seq_file *seq, int n) |
2067 | { | 1977 | { |
2068 | while (n--) seq_printf(seq, " "); | 1978 | while (n--) |
1979 | seq_printf(seq, " "); | ||
2069 | } | 1980 | } |
2070 | 1981 | ||
2071 | static void printbin_seq(struct seq_file *seq, unsigned int v, int bits) | 1982 | static void printbin_seq(struct seq_file *seq, unsigned int v, int bits) |
@@ -2086,29 +1997,22 @@ static void printnode_seq(struct seq_file *seq, int indent, struct node *n, | |||
2086 | seq_printf(seq, "%d/", cindex); | 1997 | seq_printf(seq, "%d/", cindex); |
2087 | printbin_seq(seq, cindex, bits); | 1998 | printbin_seq(seq, cindex, bits); |
2088 | seq_printf(seq, ": "); | 1999 | seq_printf(seq, ": "); |
2089 | } | 2000 | } else |
2090 | else | ||
2091 | seq_printf(seq, "<root>: "); | 2001 | seq_printf(seq, "<root>: "); |
2092 | seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n); | 2002 | seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n); |
2093 | 2003 | ||
2094 | if (IS_LEAF(n)) | ||
2095 | seq_printf(seq, "key=%d.%d.%d.%d\n", | ||
2096 | n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256); | ||
2097 | else { | ||
2098 | int plen = ((struct tnode *)n)->pos; | ||
2099 | t_key prf=MASK_PFX(n->key, plen); | ||
2100 | seq_printf(seq, "key=%d.%d.%d.%d/%d\n", | ||
2101 | prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen); | ||
2102 | } | ||
2103 | if (IS_LEAF(n)) { | 2004 | if (IS_LEAF(n)) { |
2104 | struct leaf *l=(struct leaf *)n; | 2005 | struct leaf *l = (struct leaf *)n; |
2105 | struct fib_alias *fa; | 2006 | struct fib_alias *fa; |
2106 | int i; | 2007 | int i; |
2107 | for (i=32; i>=0; i--) | 2008 | |
2108 | if (find_leaf_info(&l->list, i)) { | 2009 | seq_printf(seq, "key=%d.%d.%d.%d\n", |
2109 | 2010 | n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256); | |
2011 | |||
2012 | for (i = 32; i >= 0; i--) | ||
2013 | if (find_leaf_info(&l->list, i)) { | ||
2110 | struct list_head *fa_head = get_fa_head(l, i); | 2014 | struct list_head *fa_head = get_fa_head(l, i); |
2111 | 2015 | ||
2112 | if (!fa_head) | 2016 | if (!fa_head) |
2113 | continue; | 2017 | continue; |
2114 | 2018 | ||
@@ -2118,17 +2022,16 @@ static void printnode_seq(struct seq_file *seq, int indent, struct node *n, | |||
2118 | putspace_seq(seq, indent+2); | 2022 | putspace_seq(seq, indent+2); |
2119 | seq_printf(seq, "{/%d...dumping}\n", i); | 2023 | seq_printf(seq, "{/%d...dumping}\n", i); |
2120 | 2024 | ||
2121 | 2025 | list_for_each_entry_rcu(fa, fa_head, fa_list) { | |
2122 | list_for_each_entry(fa, fa_head, fa_list) { | ||
2123 | putspace_seq(seq, indent+2); | 2026 | putspace_seq(seq, indent+2); |
2124 | if (fa->fa_info->fib_nh == NULL) { | ||
2125 | seq_printf(seq, "Error _fib_nh=NULL\n"); | ||
2126 | continue; | ||
2127 | } | ||
2128 | if (fa->fa_info == NULL) { | 2027 | if (fa->fa_info == NULL) { |
2129 | seq_printf(seq, "Error fa_info=NULL\n"); | 2028 | seq_printf(seq, "Error fa_info=NULL\n"); |
2130 | continue; | 2029 | continue; |
2131 | } | 2030 | } |
2031 | if (fa->fa_info->fib_nh == NULL) { | ||
2032 | seq_printf(seq, "Error _fib_nh=NULL\n"); | ||
2033 | continue; | ||
2034 | } | ||
2132 | 2035 | ||
2133 | seq_printf(seq, "{type=%d scope=%d TOS=%d}\n", | 2036 | seq_printf(seq, "{type=%d scope=%d TOS=%d}\n", |
2134 | fa->fa_type, | 2037 | fa->fa_type, |
@@ -2136,11 +2039,16 @@ static void printnode_seq(struct seq_file *seq, int indent, struct node *n, | |||
2136 | fa->fa_tos); | 2039 | fa->fa_tos); |
2137 | } | 2040 | } |
2138 | } | 2041 | } |
2139 | } | 2042 | } else { |
2140 | else if (IS_TNODE(n)) { | ||
2141 | struct tnode *tn = (struct tnode *)n; | 2043 | struct tnode *tn = (struct tnode *)n; |
2044 | int plen = ((struct tnode *)n)->pos; | ||
2045 | t_key prf = MASK_PFX(n->key, plen); | ||
2046 | |||
2047 | seq_printf(seq, "key=%d.%d.%d.%d/%d\n", | ||
2048 | prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen); | ||
2049 | |||
2142 | putspace_seq(seq, indent); seq_printf(seq, "| "); | 2050 | putspace_seq(seq, indent); seq_printf(seq, "| "); |
2143 | seq_printf(seq, "{key prefix=%08x/", tn->key&TKEY_GET_MASK(0, tn->pos)); | 2051 | seq_printf(seq, "{key prefix=%08x/", tn->key & TKEY_GET_MASK(0, tn->pos)); |
2144 | printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos); | 2052 | printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos); |
2145 | seq_printf(seq, "}\n"); | 2053 | seq_printf(seq, "}\n"); |
2146 | putspace_seq(seq, indent); seq_printf(seq, "| "); | 2054 | putspace_seq(seq, indent); seq_printf(seq, "| "); |
@@ -2154,194 +2062,196 @@ static void printnode_seq(struct seq_file *seq, int indent, struct node *n, | |||
2154 | 2062 | ||
2155 | static void trie_dump_seq(struct seq_file *seq, struct trie *t) | 2063 | static void trie_dump_seq(struct seq_file *seq, struct trie *t) |
2156 | { | 2064 | { |
2157 | struct node *n = t->trie; | 2065 | struct node *n; |
2158 | int cindex=0; | 2066 | int cindex = 0; |
2159 | int indent=1; | 2067 | int indent = 1; |
2160 | int pend=0; | 2068 | int pend = 0; |
2161 | int depth = 0; | 2069 | int depth = 0; |
2070 | struct tnode *tn; | ||
2162 | 2071 | ||
2163 | read_lock(&fib_lock); | 2072 | rcu_read_lock(); |
2164 | 2073 | n = rcu_dereference(t->trie); | |
2165 | seq_printf(seq, "------ trie_dump of t=%p ------\n", t); | 2074 | seq_printf(seq, "------ trie_dump of t=%p ------\n", t); |
2166 | if (n) { | ||
2167 | printnode_seq(seq, indent, n, pend, cindex, 0); | ||
2168 | if (IS_TNODE(n)) { | ||
2169 | struct tnode *tn = (struct tnode *)n; | ||
2170 | pend = tn->pos+tn->bits; | ||
2171 | putspace_seq(seq, indent); seq_printf(seq, "\\--\n"); | ||
2172 | indent += 3; | ||
2173 | depth++; | ||
2174 | |||
2175 | while (tn && cindex < (1 << tn->bits)) { | ||
2176 | if (tn->child[cindex]) { | ||
2177 | |||
2178 | /* Got a child */ | ||
2179 | |||
2180 | printnode_seq(seq, indent, tn->child[cindex], pend, cindex, tn->bits); | ||
2181 | if (IS_LEAF(tn->child[cindex])) { | ||
2182 | cindex++; | ||
2183 | |||
2184 | } | ||
2185 | else { | ||
2186 | /* | ||
2187 | * New tnode. Decend one level | ||
2188 | */ | ||
2189 | |||
2190 | depth++; | ||
2191 | n = tn->child[cindex]; | ||
2192 | tn = (struct tnode *)n; | ||
2193 | pend = tn->pos+tn->bits; | ||
2194 | putspace_seq(seq, indent); seq_printf(seq, "\\--\n"); | ||
2195 | indent+=3; | ||
2196 | cindex=0; | ||
2197 | } | ||
2198 | } | ||
2199 | else | ||
2200 | cindex++; | ||
2201 | 2075 | ||
2076 | if (!n) { | ||
2077 | seq_printf(seq, "------ trie is empty\n"); | ||
2078 | |||
2079 | rcu_read_unlock(); | ||
2080 | return; | ||
2081 | } | ||
2082 | |||
2083 | printnode_seq(seq, indent, n, pend, cindex, 0); | ||
2084 | |||
2085 | if (!IS_TNODE(n)) { | ||
2086 | rcu_read_unlock(); | ||
2087 | return; | ||
2088 | } | ||
2089 | |||
2090 | tn = (struct tnode *)n; | ||
2091 | pend = tn->pos+tn->bits; | ||
2092 | putspace_seq(seq, indent); seq_printf(seq, "\\--\n"); | ||
2093 | indent += 3; | ||
2094 | depth++; | ||
2095 | |||
2096 | while (tn && cindex < (1 << tn->bits)) { | ||
2097 | struct node *child = rcu_dereference(tn->child[cindex]); | ||
2098 | if (!child) | ||
2099 | cindex++; | ||
2100 | else { | ||
2101 | /* Got a child */ | ||
2102 | printnode_seq(seq, indent, child, pend, | ||
2103 | cindex, tn->bits); | ||
2104 | |||
2105 | if (IS_LEAF(child)) | ||
2106 | cindex++; | ||
2107 | |||
2108 | else { | ||
2202 | /* | 2109 | /* |
2203 | * Test if we are done | 2110 | * New tnode. Decend one level |
2204 | */ | 2111 | */ |
2205 | |||
2206 | while (cindex >= (1 << tn->bits)) { | ||
2207 | 2112 | ||
2208 | /* | 2113 | depth++; |
2209 | * Move upwards and test for root | 2114 | n = child; |
2210 | * pop off all traversed nodes | 2115 | tn = (struct tnode *)n; |
2211 | */ | 2116 | pend = tn->pos+tn->bits; |
2212 | 2117 | putspace_seq(seq, indent); | |
2213 | if (NODE_PARENT(tn) == NULL) { | 2118 | seq_printf(seq, "\\--\n"); |
2214 | tn = NULL; | 2119 | indent += 3; |
2215 | n = NULL; | 2120 | cindex = 0; |
2216 | break; | ||
2217 | } | ||
2218 | else { | ||
2219 | cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); | ||
2220 | tn = NODE_PARENT(tn); | ||
2221 | cindex++; | ||
2222 | n = (struct node *)tn; | ||
2223 | pend = tn->pos+tn->bits; | ||
2224 | indent-=3; | ||
2225 | depth--; | ||
2226 | } | ||
2227 | } | ||
2228 | } | 2121 | } |
2229 | } | 2122 | } |
2230 | else n = NULL; | ||
2231 | } | ||
2232 | else seq_printf(seq, "------ trie is empty\n"); | ||
2233 | 2123 | ||
2234 | read_unlock(&fib_lock); | 2124 | /* |
2125 | * Test if we are done | ||
2126 | */ | ||
2127 | |||
2128 | while (cindex >= (1 << tn->bits)) { | ||
2129 | /* | ||
2130 | * Move upwards and test for root | ||
2131 | * pop off all traversed nodes | ||
2132 | */ | ||
2133 | |||
2134 | if (NODE_PARENT(tn) == NULL) { | ||
2135 | tn = NULL; | ||
2136 | break; | ||
2137 | } | ||
2138 | |||
2139 | cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); | ||
2140 | cindex++; | ||
2141 | tn = NODE_PARENT(tn); | ||
2142 | pend = tn->pos + tn->bits; | ||
2143 | indent -= 3; | ||
2144 | depth--; | ||
2145 | } | ||
2146 | } | ||
2147 | rcu_read_unlock(); | ||
2235 | } | 2148 | } |
2236 | 2149 | ||
2237 | static struct trie_stat *trie_stat_new(void) | 2150 | static struct trie_stat *trie_stat_new(void) |
2238 | { | 2151 | { |
2239 | struct trie_stat *s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL); | 2152 | struct trie_stat *s; |
2240 | int i; | 2153 | int i; |
2241 | 2154 | ||
2242 | if (s) { | 2155 | s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL); |
2243 | s->totdepth = 0; | 2156 | if (!s) |
2244 | s->maxdepth = 0; | 2157 | return NULL; |
2245 | s->tnodes = 0; | 2158 | |
2246 | s->leaves = 0; | 2159 | s->totdepth = 0; |
2247 | s->nullpointers = 0; | 2160 | s->maxdepth = 0; |
2248 | 2161 | s->tnodes = 0; | |
2249 | for(i=0; i< MAX_CHILDS; i++) | 2162 | s->leaves = 0; |
2250 | s->nodesizes[i] = 0; | 2163 | s->nullpointers = 0; |
2251 | } | 2164 | |
2165 | for (i = 0; i < MAX_CHILDS; i++) | ||
2166 | s->nodesizes[i] = 0; | ||
2167 | |||
2252 | return s; | 2168 | return s; |
2253 | } | 2169 | } |
2254 | 2170 | ||
2255 | static struct trie_stat *trie_collect_stats(struct trie *t) | 2171 | static struct trie_stat *trie_collect_stats(struct trie *t) |
2256 | { | 2172 | { |
2257 | struct node *n = t->trie; | 2173 | struct node *n; |
2258 | struct trie_stat *s = trie_stat_new(); | 2174 | struct trie_stat *s = trie_stat_new(); |
2259 | int cindex = 0; | 2175 | int cindex = 0; |
2260 | int indent = 1; | ||
2261 | int pend = 0; | 2176 | int pend = 0; |
2262 | int depth = 0; | 2177 | int depth = 0; |
2263 | 2178 | ||
2264 | read_lock(&fib_lock); | 2179 | if (!s) |
2180 | return NULL; | ||
2265 | 2181 | ||
2266 | if (s) { | 2182 | rcu_read_lock(); |
2267 | if (n) { | 2183 | n = rcu_dereference(t->trie); |
2268 | if (IS_TNODE(n)) { | ||
2269 | struct tnode *tn = (struct tnode *)n; | ||
2270 | pend = tn->pos+tn->bits; | ||
2271 | indent += 3; | ||
2272 | s->nodesizes[tn->bits]++; | ||
2273 | depth++; | ||
2274 | 2184 | ||
2275 | while (tn && cindex < (1 << tn->bits)) { | 2185 | if (!n) |
2276 | if (tn->child[cindex]) { | 2186 | return s; |
2277 | /* Got a child */ | 2187 | |
2278 | 2188 | if (IS_TNODE(n)) { | |
2279 | if (IS_LEAF(tn->child[cindex])) { | 2189 | struct tnode *tn = (struct tnode *)n; |
2280 | cindex++; | 2190 | pend = tn->pos+tn->bits; |
2281 | 2191 | s->nodesizes[tn->bits]++; | |
2282 | /* stats */ | 2192 | depth++; |
2283 | if (depth > s->maxdepth) | 2193 | |
2284 | s->maxdepth = depth; | 2194 | while (tn && cindex < (1 << tn->bits)) { |
2285 | s->totdepth += depth; | 2195 | struct node *ch = rcu_dereference(tn->child[cindex]); |
2286 | s->leaves++; | 2196 | if (ch) { |
2287 | } | ||
2288 | |||
2289 | else { | ||
2290 | /* | ||
2291 | * New tnode. Decend one level | ||
2292 | */ | ||
2293 | |||
2294 | s->tnodes++; | ||
2295 | s->nodesizes[tn->bits]++; | ||
2296 | depth++; | ||
2297 | |||
2298 | n = tn->child[cindex]; | ||
2299 | tn = (struct tnode *)n; | ||
2300 | pend = tn->pos+tn->bits; | ||
2301 | |||
2302 | indent += 3; | ||
2303 | cindex = 0; | ||
2304 | } | ||
2305 | } | ||
2306 | else { | ||
2307 | cindex++; | ||
2308 | s->nullpointers++; | ||
2309 | } | ||
2310 | 2197 | ||
2198 | /* Got a child */ | ||
2199 | |||
2200 | if (IS_LEAF(tn->child[cindex])) { | ||
2201 | cindex++; | ||
2202 | |||
2203 | /* stats */ | ||
2204 | if (depth > s->maxdepth) | ||
2205 | s->maxdepth = depth; | ||
2206 | s->totdepth += depth; | ||
2207 | s->leaves++; | ||
2208 | } else { | ||
2311 | /* | 2209 | /* |
2312 | * Test if we are done | 2210 | * New tnode. Decend one level |
2313 | */ | 2211 | */ |
2314 | 2212 | ||
2315 | while (cindex >= (1 << tn->bits)) { | 2213 | s->tnodes++; |
2316 | 2214 | s->nodesizes[tn->bits]++; | |
2317 | /* | 2215 | depth++; |
2318 | * Move upwards and test for root | 2216 | |
2319 | * pop off all traversed nodes | 2217 | n = ch; |
2320 | */ | 2218 | tn = (struct tnode *)n; |
2321 | 2219 | pend = tn->pos+tn->bits; | |
2322 | 2220 | ||
2323 | if (NODE_PARENT(tn) == NULL) { | 2221 | cindex = 0; |
2324 | tn = NULL; | ||
2325 | n = NULL; | ||
2326 | break; | ||
2327 | } | ||
2328 | else { | ||
2329 | cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); | ||
2330 | tn = NODE_PARENT(tn); | ||
2331 | cindex++; | ||
2332 | n = (struct node *)tn; | ||
2333 | pend = tn->pos+tn->bits; | ||
2334 | indent -= 3; | ||
2335 | depth--; | ||
2336 | } | ||
2337 | } | ||
2338 | } | 2222 | } |
2223 | } else { | ||
2224 | cindex++; | ||
2225 | s->nullpointers++; | ||
2339 | } | 2226 | } |
2340 | else n = NULL; | 2227 | |
2228 | /* | ||
2229 | * Test if we are done | ||
2230 | */ | ||
2231 | |||
2232 | while (cindex >= (1 << tn->bits)) { | ||
2233 | /* | ||
2234 | * Move upwards and test for root | ||
2235 | * pop off all traversed nodes | ||
2236 | */ | ||
2237 | |||
2238 | if (NODE_PARENT(tn) == NULL) { | ||
2239 | tn = NULL; | ||
2240 | n = NULL; | ||
2241 | break; | ||
2242 | } | ||
2243 | |||
2244 | cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); | ||
2245 | tn = NODE_PARENT(tn); | ||
2246 | cindex++; | ||
2247 | n = (struct node *)tn; | ||
2248 | pend = tn->pos+tn->bits; | ||
2249 | depth--; | ||
2250 | } | ||
2341 | } | 2251 | } |
2342 | } | 2252 | } |
2343 | 2253 | ||
2344 | read_unlock(&fib_lock); | 2254 | rcu_read_unlock(); |
2345 | return s; | 2255 | return s; |
2346 | } | 2256 | } |
2347 | 2257 | ||
@@ -2359,17 +2269,22 @@ static struct fib_alias *fib_triestat_get_next(struct seq_file *seq) | |||
2359 | 2269 | ||
2360 | static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos) | 2270 | static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos) |
2361 | { | 2271 | { |
2362 | void *v = NULL; | 2272 | if (!ip_fib_main_table) |
2273 | return NULL; | ||
2363 | 2274 | ||
2364 | if (ip_fib_main_table) | 2275 | if (*pos) |
2365 | v = *pos ? fib_triestat_get_next(seq) : SEQ_START_TOKEN; | 2276 | return fib_triestat_get_next(seq); |
2366 | return v; | 2277 | else |
2278 | return SEQ_START_TOKEN; | ||
2367 | } | 2279 | } |
2368 | 2280 | ||
2369 | static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 2281 | static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
2370 | { | 2282 | { |
2371 | ++*pos; | 2283 | ++*pos; |
2372 | return v == SEQ_START_TOKEN ? fib_triestat_get_first(seq) : fib_triestat_get_next(seq); | 2284 | if (v == SEQ_START_TOKEN) |
2285 | return fib_triestat_get_first(seq); | ||
2286 | else | ||
2287 | return fib_triestat_get_next(seq); | ||
2373 | } | 2288 | } |
2374 | 2289 | ||
2375 | static void fib_triestat_seq_stop(struct seq_file *seq, void *v) | 2290 | static void fib_triestat_seq_stop(struct seq_file *seq, void *v) |
@@ -2388,22 +2303,22 @@ static void collect_and_show(struct trie *t, struct seq_file *seq) | |||
2388 | { | 2303 | { |
2389 | int bytes = 0; /* How many bytes are used, a ref is 4 bytes */ | 2304 | int bytes = 0; /* How many bytes are used, a ref is 4 bytes */ |
2390 | int i, max, pointers; | 2305 | int i, max, pointers; |
2391 | struct trie_stat *stat; | 2306 | struct trie_stat *stat; |
2392 | int avdepth; | 2307 | int avdepth; |
2393 | 2308 | ||
2394 | stat = trie_collect_stats(t); | 2309 | stat = trie_collect_stats(t); |
2395 | 2310 | ||
2396 | bytes=0; | 2311 | bytes = 0; |
2397 | seq_printf(seq, "trie=%p\n", t); | 2312 | seq_printf(seq, "trie=%p\n", t); |
2398 | 2313 | ||
2399 | if (stat) { | 2314 | if (stat) { |
2400 | if (stat->leaves) | 2315 | if (stat->leaves) |
2401 | avdepth=stat->totdepth*100 / stat->leaves; | 2316 | avdepth = stat->totdepth*100 / stat->leaves; |
2402 | else | 2317 | else |
2403 | avdepth=0; | 2318 | avdepth = 0; |
2404 | seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100 ); | 2319 | seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100); |
2405 | seq_printf(seq, "Max depth: %4d\n", stat->maxdepth); | 2320 | seq_printf(seq, "Max depth: %4d\n", stat->maxdepth); |
2406 | 2321 | ||
2407 | seq_printf(seq, "Leaves: %d\n", stat->leaves); | 2322 | seq_printf(seq, "Leaves: %d\n", stat->leaves); |
2408 | bytes += sizeof(struct leaf) * stat->leaves; | 2323 | bytes += sizeof(struct leaf) * stat->leaves; |
2409 | seq_printf(seq, "Internal nodes: %d\n", stat->tnodes); | 2324 | seq_printf(seq, "Internal nodes: %d\n", stat->tnodes); |
@@ -2455,11 +2370,9 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) | |||
2455 | 2370 | ||
2456 | if (trie_main) | 2371 | if (trie_main) |
2457 | collect_and_show(trie_main, seq); | 2372 | collect_and_show(trie_main, seq); |
2458 | } | 2373 | } else { |
2459 | else { | 2374 | snprintf(bf, sizeof(bf), "*\t%08X\t%08X", 200, 400); |
2460 | snprintf(bf, sizeof(bf), | 2375 | |
2461 | "*\t%08X\t%08X", 200, 400); | ||
2462 | |||
2463 | seq_printf(seq, "%-127s\n", bf); | 2376 | seq_printf(seq, "%-127s\n", bf); |
2464 | } | 2377 | } |
2465 | return 0; | 2378 | return 0; |
@@ -2520,22 +2433,27 @@ static struct fib_alias *fib_trie_get_next(struct seq_file *seq) | |||
2520 | 2433 | ||
2521 | static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) | 2434 | static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) |
2522 | { | 2435 | { |
2523 | void *v = NULL; | 2436 | if (!ip_fib_main_table) |
2437 | return NULL; | ||
2524 | 2438 | ||
2525 | if (ip_fib_main_table) | 2439 | if (*pos) |
2526 | v = *pos ? fib_trie_get_next(seq) : SEQ_START_TOKEN; | 2440 | return fib_trie_get_next(seq); |
2527 | return v; | 2441 | else |
2442 | return SEQ_START_TOKEN; | ||
2528 | } | 2443 | } |
2529 | 2444 | ||
2530 | static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 2445 | static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
2531 | { | 2446 | { |
2532 | ++*pos; | 2447 | ++*pos; |
2533 | return v == SEQ_START_TOKEN ? fib_trie_get_first(seq) : fib_trie_get_next(seq); | 2448 | if (v == SEQ_START_TOKEN) |
2449 | return fib_trie_get_first(seq); | ||
2450 | else | ||
2451 | return fib_trie_get_next(seq); | ||
2452 | |||
2534 | } | 2453 | } |
2535 | 2454 | ||
2536 | static void fib_trie_seq_stop(struct seq_file *seq, void *v) | 2455 | static void fib_trie_seq_stop(struct seq_file *seq, void *v) |
2537 | { | 2456 | { |
2538 | |||
2539 | } | 2457 | } |
2540 | 2458 | ||
2541 | /* | 2459 | /* |
@@ -2555,9 +2473,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) | |||
2555 | 2473 | ||
2556 | if (trie_main) | 2474 | if (trie_main) |
2557 | trie_dump_seq(seq, trie_main); | 2475 | trie_dump_seq(seq, trie_main); |
2558 | } | 2476 | } else { |
2559 | |||
2560 | else { | ||
2561 | snprintf(bf, sizeof(bf), | 2477 | snprintf(bf, sizeof(bf), |
2562 | "*\t%08X\t%08X", 200, 400); | 2478 | "*\t%08X\t%08X", 200, 400); |
2563 | seq_printf(seq, "%-127s\n", bf); | 2479 | seq_printf(seq, "%-127s\n", bf); |