aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--net/ipv4/fib_hash.c176
-rw-r--r--net/ipv4/fib_lookup.h2
2 files changed, 101 insertions, 77 deletions
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 04f05a96b75b..4f1aafd3ba89 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -58,7 +58,8 @@ struct fib_node {
58 58
59struct fn_zone { 59struct fn_zone {
60 struct fn_zone __rcu *fz_next; /* Next not empty zone */ 60 struct fn_zone __rcu *fz_next; /* Next not empty zone */
61 struct hlist_head *fz_hash; /* Hash table pointer */ 61 struct hlist_head __rcu *fz_hash; /* Hash table pointer */
62 seqlock_t fz_lock;
62 u32 fz_hashmask; /* (fz_divisor - 1) */ 63 u32 fz_hashmask; /* (fz_divisor - 1) */
63 64
64 u8 fz_order; /* Zone order (0..32) */ 65 u8 fz_order; /* Zone order (0..32) */
@@ -92,7 +93,6 @@ static inline __be32 fz_key(__be32 dst, struct fn_zone *fz)
92 return dst & FZ_MASK(fz); 93 return dst & FZ_MASK(fz);
93} 94}
94 95
95static DEFINE_RWLOCK(fib_hash_lock);
96static unsigned int fib_hash_genid; 96static unsigned int fib_hash_genid;
97 97
98#define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head)) 98#define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head))
@@ -101,12 +101,11 @@ static struct hlist_head *fz_hash_alloc(int divisor)
101{ 101{
102 unsigned long size = divisor * sizeof(struct hlist_head); 102 unsigned long size = divisor * sizeof(struct hlist_head);
103 103
104 if (size <= PAGE_SIZE) { 104 if (size <= PAGE_SIZE)
105 return kzalloc(size, GFP_KERNEL); 105 return kzalloc(size, GFP_KERNEL);
106 } else { 106
107 return (struct hlist_head *) 107 return (struct hlist_head *)
108 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size)); 108 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size));
109 }
110} 109}
111 110
112/* The fib hash lock must be held when this is called. */ 111/* The fib hash lock must be held when this is called. */
@@ -121,12 +120,12 @@ static inline void fn_rebuild_zone(struct fn_zone *fz,
121 struct fib_node *f; 120 struct fib_node *f;
122 121
123 hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) { 122 hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) {
124 struct hlist_head *new_head; 123 struct hlist_head __rcu *new_head;
125 124
126 hlist_del(&f->fn_hash); 125 hlist_del_rcu(&f->fn_hash);
127 126
128 new_head = &fz->fz_hash[fn_hash(f->fn_key, fz)]; 127 new_head = &fz->fz_hash[fn_hash(f->fn_key, fz)];
129 hlist_add_head(&f->fn_hash, new_head); 128 hlist_add_head_rcu(&f->fn_hash, new_head);
130 } 129 }
131 } 130 }
132} 131}
@@ -175,32 +174,55 @@ static void fn_rehash_zone(struct fn_zone *fz)
175 ht = fz_hash_alloc(new_divisor); 174 ht = fz_hash_alloc(new_divisor);
176 175
177 if (ht) { 176 if (ht) {
178 write_lock_bh(&fib_hash_lock); 177 struct fn_zone nfz;
178
179 memcpy(&nfz, fz, sizeof(nfz));
180
181 write_seqlock_bh(&fz->fz_lock);
179 old_ht = fz->fz_hash; 182 old_ht = fz->fz_hash;
180 fz->fz_hash = ht; 183 nfz.fz_hash = ht;
184 nfz.fz_hashmask = new_hashmask;
185 nfz.fz_divisor = new_divisor;
186 fn_rebuild_zone(&nfz, old_ht, old_divisor);
187 fib_hash_genid++;
188 rcu_assign_pointer(fz->fz_hash, ht);
181 fz->fz_hashmask = new_hashmask; 189 fz->fz_hashmask = new_hashmask;
182 fz->fz_divisor = new_divisor; 190 fz->fz_divisor = new_divisor;
183 fn_rebuild_zone(fz, old_ht, old_divisor); 191 write_sequnlock_bh(&fz->fz_lock);
184 fib_hash_genid++;
185 write_unlock_bh(&fib_hash_lock);
186 192
187 if (old_ht != fz->fz_embedded_hash) 193 if (old_ht != fz->fz_embedded_hash) {
194 synchronize_rcu();
188 fz_hash_free(old_ht, old_divisor); 195 fz_hash_free(old_ht, old_divisor);
196 }
189 } 197 }
190} 198}
191 199
192static inline void fn_free_node(struct fib_node * f) 200static void fn_free_node_rcu(struct rcu_head *head)
193{ 201{
202 struct fib_node *f = container_of(head, struct fib_node, fn_embedded_alias.rcu);
203
194 kmem_cache_free(fn_hash_kmem, f); 204 kmem_cache_free(fn_hash_kmem, f);
195} 205}
196 206
207static inline void fn_free_node(struct fib_node *f)
208{
209 call_rcu(&f->fn_embedded_alias.rcu, fn_free_node_rcu);
210}
211
212static void fn_free_alias_rcu(struct rcu_head *head)
213{
214 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
215
216 kmem_cache_free(fn_alias_kmem, fa);
217}
218
197static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f) 219static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f)
198{ 220{
199 fib_release_info(fa->fa_info); 221 fib_release_info(fa->fa_info);
200 if (fa == &f->fn_embedded_alias) 222 if (fa == &f->fn_embedded_alias)
201 fa->fa_info = NULL; 223 fa->fa_info = NULL;
202 else 224 else
203 kmem_cache_free(fn_alias_kmem, fa); 225 call_rcu(&fa->rcu, fn_free_alias_rcu);
204} 226}
205 227
206static struct fn_zone * 228static struct fn_zone *
@@ -211,6 +233,7 @@ fn_new_zone(struct fn_hash *table, int z)
211 if (!fz) 233 if (!fz)
212 return NULL; 234 return NULL;
213 235
236 seqlock_init(&fz->fz_lock);
214 fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1; 237 fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1;
215 fz->fz_hashmask = fz->fz_divisor - 1; 238 fz->fz_hashmask = fz->fz_divisor - 1;
216 fz->fz_hash = fz->fz_embedded_hash; 239 fz->fz_hash = fz->fz_embedded_hash;
@@ -246,30 +269,34 @@ int fib_table_lookup(struct fib_table *tb,
246 struct fn_hash *t = (struct fn_hash *)tb->tb_data; 269 struct fn_hash *t = (struct fn_hash *)tb->tb_data;
247 270
248 rcu_read_lock(); 271 rcu_read_lock();
249 read_lock(&fib_hash_lock);
250 for (fz = rcu_dereference(t->fn_zone_list); 272 for (fz = rcu_dereference(t->fn_zone_list);
251 fz != NULL; 273 fz != NULL;
252 fz = rcu_dereference(fz->fz_next)) { 274 fz = rcu_dereference(fz->fz_next)) {
253 struct hlist_head *head; 275 struct hlist_head __rcu *head;
254 struct hlist_node *node; 276 struct hlist_node *node;
255 struct fib_node *f; 277 struct fib_node *f;
256 __be32 k = fz_key(flp->fl4_dst, fz); 278 __be32 k;
279 unsigned int seq;
257 280
258 head = &fz->fz_hash[fn_hash(k, fz)]; 281 do {
259 hlist_for_each_entry(f, node, head, fn_hash) { 282 seq = read_seqbegin(&fz->fz_lock);
260 if (f->fn_key != k) 283 k = fz_key(flp->fl4_dst, fz);
261 continue; 284
285 head = &fz->fz_hash[fn_hash(k, fz)];
286 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
287 if (f->fn_key != k)
288 continue;
262 289
263 err = fib_semantic_match(&f->fn_alias, 290 err = fib_semantic_match(&f->fn_alias,
264 flp, res, 291 flp, res,
265 fz->fz_order, fib_flags); 292 fz->fz_order, fib_flags);
266 if (err <= 0) 293 if (err <= 0)
267 goto out; 294 goto out;
268 } 295 }
296 } while (read_seqretry(&fz->fz_lock, seq));
269 } 297 }
270 err = 1; 298 err = 1;
271out: 299out:
272 read_unlock(&fib_hash_lock);
273 rcu_read_unlock(); 300 rcu_read_unlock();
274 return err; 301 return err;
275} 302}
@@ -292,11 +319,11 @@ void fib_table_select_default(struct fib_table *tb,
292 last_resort = NULL; 319 last_resort = NULL;
293 order = -1; 320 order = -1;
294 321
295 read_lock(&fib_hash_lock); 322 rcu_read_lock();
296 hlist_for_each_entry(f, node, &fz->fz_hash[0], fn_hash) { 323 hlist_for_each_entry_rcu(f, node, &fz->fz_hash[0], fn_hash) {
297 struct fib_alias *fa; 324 struct fib_alias *fa;
298 325
299 list_for_each_entry(fa, &f->fn_alias, fa_list) { 326 list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
300 struct fib_info *next_fi = fa->fa_info; 327 struct fib_info *next_fi = fa->fa_info;
301 328
302 if (fa->fa_scope != res->scope || 329 if (fa->fa_scope != res->scope ||
@@ -340,7 +367,7 @@ void fib_table_select_default(struct fib_table *tb,
340 fib_result_assign(res, last_resort); 367 fib_result_assign(res, last_resort);
341 tb->tb_default = last_idx; 368 tb->tb_default = last_idx;
342out: 369out:
343 read_unlock(&fib_hash_lock); 370 rcu_read_unlock();
344} 371}
345 372
346/* Insert node F to FZ. */ 373/* Insert node F to FZ. */
@@ -348,7 +375,7 @@ static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
348{ 375{
349 struct hlist_head *head = &fz->fz_hash[fn_hash(f->fn_key, fz)]; 376 struct hlist_head *head = &fz->fz_hash[fn_hash(f->fn_key, fz)];
350 377
351 hlist_add_head(&f->fn_hash, head); 378 hlist_add_head_rcu(&f->fn_hash, head);
352} 379}
353 380
354/* Return the node in FZ matching KEY. */ 381/* Return the node in FZ matching KEY. */
@@ -358,7 +385,7 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
358 struct hlist_node *node; 385 struct hlist_node *node;
359 struct fib_node *f; 386 struct fib_node *f;
360 387
361 hlist_for_each_entry(f, node, head, fn_hash) { 388 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
362 if (f->fn_key == key) 389 if (f->fn_key == key)
363 return f; 390 return f;
364 } 391 }
@@ -366,6 +393,16 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
366 return NULL; 393 return NULL;
367} 394}
368 395
396
397static struct fib_alias *fib_fast_alloc(struct fib_node *f)
398{
399 struct fib_alias *fa = &f->fn_embedded_alias;
400
401 if (fa->fa_info != NULL)
402 fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
403 return fa;
404}
405
369/* Caller must hold RTNL. */ 406/* Caller must hold RTNL. */
370int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) 407int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
371{ 408{
@@ -451,7 +488,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
451 } 488 }
452 489
453 if (cfg->fc_nlflags & NLM_F_REPLACE) { 490 if (cfg->fc_nlflags & NLM_F_REPLACE) {
454 struct fib_info *fi_drop;
455 u8 state; 491 u8 state;
456 492
457 fa = fa_first; 493 fa = fa_first;
@@ -460,21 +496,25 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
460 err = 0; 496 err = 0;
461 goto out; 497 goto out;
462 } 498 }
463 write_lock_bh(&fib_hash_lock); 499 err = -ENOBUFS;
464 fi_drop = fa->fa_info; 500 new_fa = fib_fast_alloc(f);
465 fa->fa_info = fi; 501 if (new_fa == NULL)
466 fa->fa_type = cfg->fc_type; 502 goto out;
467 fa->fa_scope = cfg->fc_scope; 503
504 new_fa->fa_tos = fa->fa_tos;
505 new_fa->fa_info = fi;
506 new_fa->fa_type = cfg->fc_type;
507 new_fa->fa_scope = cfg->fc_scope;
468 state = fa->fa_state; 508 state = fa->fa_state;
469 fa->fa_state &= ~FA_S_ACCESSED; 509 new_fa->fa_state = state & ~FA_S_ACCESSED;
470 fib_hash_genid++; 510 fib_hash_genid++;
471 write_unlock_bh(&fib_hash_lock); 511 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
472 512
473 fib_release_info(fi_drop); 513 fn_free_alias(fa, f);
474 if (state & FA_S_ACCESSED) 514 if (state & FA_S_ACCESSED)
475 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); 515 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
476 rtmsg_fib(RTM_NEWROUTE, key, fa, cfg->fc_dst_len, tb->tb_id, 516 rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len,
477 &cfg->fc_nlinfo, NLM_F_REPLACE); 517 tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
478 return 0; 518 return 0;
479 } 519 }
480 520
@@ -506,12 +546,10 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
506 f = new_f; 546 f = new_f;
507 } 547 }
508 548
509 new_fa = &f->fn_embedded_alias; 549 new_fa = fib_fast_alloc(f);
510 if (new_fa->fa_info != NULL) { 550 if (new_fa == NULL)
511 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 551 goto out;
512 if (new_fa == NULL) 552
513 goto out;
514 }
515 new_fa->fa_info = fi; 553 new_fa->fa_info = fi;
516 new_fa->fa_tos = tos; 554 new_fa->fa_tos = tos;
517 new_fa->fa_type = cfg->fc_type; 555 new_fa->fa_type = cfg->fc_type;
@@ -522,13 +560,11 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
522 * Insert new entry to the list. 560 * Insert new entry to the list.
523 */ 561 */
524 562
525 write_lock_bh(&fib_hash_lock);
526 if (new_f) 563 if (new_f)
527 fib_insert_node(fz, new_f); 564 fib_insert_node(fz, new_f);
528 list_add_tail(&new_fa->fa_list, 565 list_add_tail_rcu(&new_fa->fa_list,
529 (fa ? &fa->fa_list : &f->fn_alias)); 566 (fa ? &fa->fa_list : &f->fn_alias));
530 fib_hash_genid++; 567 fib_hash_genid++;
531 write_unlock_bh(&fib_hash_lock);
532 568
533 if (new_f) 569 if (new_f)
534 fz->fz_nent++; 570 fz->fz_nent++;
@@ -603,14 +639,12 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
603 tb->tb_id, &cfg->fc_nlinfo, 0); 639 tb->tb_id, &cfg->fc_nlinfo, 0);
604 640
605 kill_fn = 0; 641 kill_fn = 0;
606 write_lock_bh(&fib_hash_lock); 642 list_del_rcu(&fa->fa_list);
607 list_del(&fa->fa_list);
608 if (list_empty(&f->fn_alias)) { 643 if (list_empty(&f->fn_alias)) {
609 hlist_del(&f->fn_hash); 644 hlist_del_rcu(&f->fn_hash);
610 kill_fn = 1; 645 kill_fn = 1;
611 } 646 }
612 fib_hash_genid++; 647 fib_hash_genid++;
613 write_unlock_bh(&fib_hash_lock);
614 648
615 if (fa->fa_state & FA_S_ACCESSED) 649 if (fa->fa_state & FA_S_ACCESSED)
616 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); 650 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
@@ -641,14 +675,12 @@ static int fn_flush_list(struct fn_zone *fz, int idx)
641 struct fib_info *fi = fa->fa_info; 675 struct fib_info *fi = fa->fa_info;
642 676
643 if (fi && (fi->fib_flags&RTNH_F_DEAD)) { 677 if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
644 write_lock_bh(&fib_hash_lock); 678 list_del_rcu(&fa->fa_list);
645 list_del(&fa->fa_list);
646 if (list_empty(&f->fn_alias)) { 679 if (list_empty(&f->fn_alias)) {
647 hlist_del(&f->fn_hash); 680 hlist_del_rcu(&f->fn_hash);
648 kill_f = 1; 681 kill_f = 1;
649 } 682 }
650 fib_hash_genid++; 683 fib_hash_genid++;
651 write_unlock_bh(&fib_hash_lock);
652 684
653 fn_free_alias(fa, f); 685 fn_free_alias(fa, f);
654 found++; 686 found++;
@@ -693,10 +725,10 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
693 725
694 s_i = cb->args[4]; 726 s_i = cb->args[4];
695 i = 0; 727 i = 0;
696 hlist_for_each_entry(f, node, head, fn_hash) { 728 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
697 struct fib_alias *fa; 729 struct fib_alias *fa;
698 730
699 list_for_each_entry(fa, &f->fn_alias, fa_list) { 731 list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
700 if (i < s_i) 732 if (i < s_i)
701 goto next; 733 goto next;
702 734
@@ -714,7 +746,7 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
714 cb->args[4] = i; 746 cb->args[4] = i;
715 return -1; 747 return -1;
716 } 748 }
717 next: 749next:
718 i++; 750 i++;
719 } 751 }
720 } 752 }
@@ -755,7 +787,6 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
755 787
756 s_m = cb->args[2]; 788 s_m = cb->args[2];
757 rcu_read_lock(); 789 rcu_read_lock();
758 read_lock(&fib_hash_lock);
759 for (fz = rcu_dereference(table->fn_zone_list); 790 for (fz = rcu_dereference(table->fn_zone_list);
760 fz != NULL; 791 fz != NULL;
761 fz = rcu_dereference(fz->fz_next), m++) { 792 fz = rcu_dereference(fz->fz_next), m++) {
@@ -763,14 +794,12 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
763 continue; 794 continue;
764 if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { 795 if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
765 cb->args[2] = m; 796 cb->args[2] = m;
766 read_unlock(&fib_hash_lock);
767 rcu_read_unlock(); 797 rcu_read_unlock();
768 return -1; 798 return -1;
769 } 799 }
770 memset(&cb->args[3], 0, 800 memset(&cb->args[3], 0,
771 sizeof(cb->args) - 3*sizeof(cb->args[0])); 801 sizeof(cb->args) - 3*sizeof(cb->args[0]));
772 } 802 }
773 read_unlock(&fib_hash_lock);
774 rcu_read_unlock(); 803 rcu_read_unlock();
775 cb->args[2] = m; 804 cb->args[2] = m;
776 return skb->len; 805 return skb->len;
@@ -960,13 +989,11 @@ static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos)
960} 989}
961 990
962static void *fib_seq_start(struct seq_file *seq, loff_t *pos) 991static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
963 __acquires(fib_hash_lock)
964 __acquires(RCU) 992 __acquires(RCU)
965{ 993{
966 void *v = NULL; 994 void *v = NULL;
967 995
968 rcu_read_lock(); 996 rcu_read_lock();
969 read_lock(&fib_hash_lock);
970 if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN)) 997 if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN))
971 v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 998 v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
972 return v; 999 return v;
@@ -979,17 +1006,16 @@ static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos)
979} 1006}
980 1007
981static void fib_seq_stop(struct seq_file *seq, void *v) 1008static void fib_seq_stop(struct seq_file *seq, void *v)
982 __releases(fib_hash_lock)
983 __releases(RCU) 1009 __releases(RCU)
984{ 1010{
985 read_unlock(&fib_hash_lock);
986 rcu_read_unlock(); 1011 rcu_read_unlock();
987} 1012}
988 1013
989static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi) 1014static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
990{ 1015{
991 static const unsigned type2flags[RTN_MAX + 1] = { 1016 static const unsigned type2flags[RTN_MAX + 1] = {
992 [7] = RTF_REJECT, [8] = RTF_REJECT, 1017 [7] = RTF_REJECT,
1018 [8] = RTF_REJECT,
993 }; 1019 };
994 unsigned flags = type2flags[type]; 1020 unsigned flags = type2flags[type];
995 1021
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index b9c9a9f2aee5..5072d8effd5d 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -12,9 +12,7 @@ struct fib_alias {
12 u8 fa_type; 12 u8 fa_type;
13 u8 fa_scope; 13 u8 fa_scope;
14 u8 fa_state; 14 u8 fa_state;
15#ifdef CONFIG_IP_FIB_TRIE
16 struct rcu_head rcu; 15 struct rcu_head rcu;
17#endif
18}; 16};
19 17
20#define FA_S_ACCESSED 0x01 18#define FA_S_ACCESSED 0x01