aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJarek Poplawski <jarkao2@gmail.com>2009-06-15 05:31:29 -0400
committerDavid S. Miller <davem@davemloft.net>2009-06-15 05:31:29 -0400
commite0f7cb8c8cc6cccce28d2ce39ad8c60d23c3799f (patch)
tree204963b92fd4cdd8a73cd133ef36360c0d47014f
parent3c4bdc4bd4af791a72147b6ebc29553808f53cea (diff)
ipv4: Fix fib_trie rebalancing
While doing trie_rebalance(): resize(), inflate(), halve() RCU free tnodes before updating their parents. It depends on RCU delaying the real destruction, but if RCU readers start after call_rcu() and before parent update they could access freed memory. It is currently prevented with preempt_disable() on the update side, but it's not safe, except maybe classic RCU, plus it conflicts with memory allocations with GFP_KERNEL flag used from these functions. This patch explicitly delays freeing of tnodes by adding them to the list, which is flushed after the update is finished. Reported-by: Yan Zheng <zheng.yan@oracle.com> Signed-off-by: Jarek Poplawski <jarkao2@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/ipv4/fib_trie.c47
1 files changed, 37 insertions, 10 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 538d2a9a5115..d1a39b1277d6 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -123,6 +123,7 @@ struct tnode {
123 union { 123 union {
124 struct rcu_head rcu; 124 struct rcu_head rcu;
125 struct work_struct work; 125 struct work_struct work;
126 struct tnode *tnode_free;
126 }; 127 };
127 struct node *child[0]; 128 struct node *child[0];
128}; 129};
@@ -161,6 +162,8 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
161static struct node *resize(struct trie *t, struct tnode *tn); 162static struct node *resize(struct trie *t, struct tnode *tn);
162static struct tnode *inflate(struct trie *t, struct tnode *tn); 163static struct tnode *inflate(struct trie *t, struct tnode *tn);
163static struct tnode *halve(struct trie *t, struct tnode *tn); 164static struct tnode *halve(struct trie *t, struct tnode *tn);
165/* tnodes to free after resize(); protected by RTNL */
166static struct tnode *tnode_free_head;
164 167
165static struct kmem_cache *fn_alias_kmem __read_mostly; 168static struct kmem_cache *fn_alias_kmem __read_mostly;
166static struct kmem_cache *trie_leaf_kmem __read_mostly; 169static struct kmem_cache *trie_leaf_kmem __read_mostly;
@@ -385,6 +388,29 @@ static inline void tnode_free(struct tnode *tn)
385 call_rcu(&tn->rcu, __tnode_free_rcu); 388 call_rcu(&tn->rcu, __tnode_free_rcu);
386} 389}
387 390
391static void tnode_free_safe(struct tnode *tn)
392{
393 BUG_ON(IS_LEAF(tn));
394
395 if (node_parent((struct node *) tn)) {
396 tn->tnode_free = tnode_free_head;
397 tnode_free_head = tn;
398 } else {
399 tnode_free(tn);
400 }
401}
402
403static void tnode_free_flush(void)
404{
405 struct tnode *tn;
406
407 while ((tn = tnode_free_head)) {
408 tnode_free_head = tn->tnode_free;
409 tn->tnode_free = NULL;
410 tnode_free(tn);
411 }
412}
413
388static struct leaf *leaf_new(void) 414static struct leaf *leaf_new(void)
389{ 415{
390 struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); 416 struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
@@ -495,7 +521,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
495 521
496 /* No children */ 522 /* No children */
497 if (tn->empty_children == tnode_child_length(tn)) { 523 if (tn->empty_children == tnode_child_length(tn)) {
498 tnode_free(tn); 524 tnode_free_safe(tn);
499 return NULL; 525 return NULL;
500 } 526 }
501 /* One child */ 527 /* One child */
@@ -509,7 +535,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
509 535
510 /* compress one level */ 536 /* compress one level */
511 node_set_parent(n, NULL); 537 node_set_parent(n, NULL);
512 tnode_free(tn); 538 tnode_free_safe(tn);
513 return n; 539 return n;
514 } 540 }
515 /* 541 /*
@@ -670,7 +696,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
670 /* compress one level */ 696 /* compress one level */
671 697
672 node_set_parent(n, NULL); 698 node_set_parent(n, NULL);
673 tnode_free(tn); 699 tnode_free_safe(tn);
674 return n; 700 return n;
675 } 701 }
676 702
@@ -756,7 +782,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
756 put_child(t, tn, 2*i, inode->child[0]); 782 put_child(t, tn, 2*i, inode->child[0]);
757 put_child(t, tn, 2*i+1, inode->child[1]); 783 put_child(t, tn, 2*i+1, inode->child[1]);
758 784
759 tnode_free(inode); 785 tnode_free_safe(inode);
760 continue; 786 continue;
761 } 787 }
762 788
@@ -801,9 +827,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
801 put_child(t, tn, 2*i, resize(t, left)); 827 put_child(t, tn, 2*i, resize(t, left));
802 put_child(t, tn, 2*i+1, resize(t, right)); 828 put_child(t, tn, 2*i+1, resize(t, right));
803 829
804 tnode_free(inode); 830 tnode_free_safe(inode);
805 } 831 }
806 tnode_free(oldtnode); 832 tnode_free_safe(oldtnode);
807 return tn; 833 return tn;
808nomem: 834nomem:
809 { 835 {
@@ -885,7 +911,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
885 put_child(t, newBinNode, 1, right); 911 put_child(t, newBinNode, 1, right);
886 put_child(t, tn, i/2, resize(t, newBinNode)); 912 put_child(t, tn, i/2, resize(t, newBinNode));
887 } 913 }
888 tnode_free(oldtnode); 914 tnode_free_safe(oldtnode);
889 return tn; 915 return tn;
890nomem: 916nomem:
891 { 917 {
@@ -989,7 +1015,6 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
989 t_key cindex, key; 1015 t_key cindex, key;
990 struct tnode *tp; 1016 struct tnode *tp;
991 1017
992 preempt_disable();
993 key = tn->key; 1018 key = tn->key;
994 1019
995 while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { 1020 while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
@@ -1001,16 +1026,18 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
1001 (struct node *)tn, wasfull); 1026 (struct node *)tn, wasfull);
1002 1027
1003 tp = node_parent((struct node *) tn); 1028 tp = node_parent((struct node *) tn);
1029 tnode_free_flush();
1004 if (!tp) 1030 if (!tp)
1005 break; 1031 break;
1006 tn = tp; 1032 tn = tp;
1007 } 1033 }
1008 1034
1009 /* Handle last (top) tnode */ 1035 /* Handle last (top) tnode */
1010 if (IS_TNODE(tn)) 1036 if (IS_TNODE(tn)) {
1011 tn = (struct tnode *)resize(t, (struct tnode *)tn); 1037 tn = (struct tnode *)resize(t, (struct tnode *)tn);
1038 tnode_free_flush();
1039 }
1012 1040
1013 preempt_enable();
1014 return (struct node *)tn; 1041 return (struct node *)tn;
1015} 1042}
1016 1043