diff options
author | Jarek Poplawski <jarkao2@gmail.com> | 2009-06-15 05:31:29 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-06-15 05:31:29 -0400 |
commit | e0f7cb8c8cc6cccce28d2ce39ad8c60d23c3799f (patch) | |
tree | 204963b92fd4cdd8a73cd133ef36360c0d47014f /net/ipv4 | |
parent | 3c4bdc4bd4af791a72147b6ebc29553808f53cea (diff) |
ipv4: Fix fib_trie rebalancing
While doing trie_rebalance(): resize(), inflate(), halve() RCU free
tnodes before updating their parents. It depends on RCU delaying the
real destruction, but if RCU readers start after call_rcu() and before
parent update they could access freed memory.
It is currently prevented with preempt_disable() on the update side,
but it's not safe, except maybe classic RCU, plus it conflicts with
memory allocations with GFP_KERNEL flag used from these functions.
This patch explicitly delays freeing of tnodes by adding them to the
list, which is flushed after the update is finished.
Reported-by: Yan Zheng <zheng.yan@oracle.com>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/fib_trie.c | 47 |
1 files changed, 37 insertions, 10 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 538d2a9a5115..d1a39b1277d6 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -123,6 +123,7 @@ struct tnode { | |||
123 | union { | 123 | union { |
124 | struct rcu_head rcu; | 124 | struct rcu_head rcu; |
125 | struct work_struct work; | 125 | struct work_struct work; |
126 | struct tnode *tnode_free; | ||
126 | }; | 127 | }; |
127 | struct node *child[0]; | 128 | struct node *child[0]; |
128 | }; | 129 | }; |
@@ -161,6 +162,8 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, | |||
161 | static struct node *resize(struct trie *t, struct tnode *tn); | 162 | static struct node *resize(struct trie *t, struct tnode *tn); |
162 | static struct tnode *inflate(struct trie *t, struct tnode *tn); | 163 | static struct tnode *inflate(struct trie *t, struct tnode *tn); |
163 | static struct tnode *halve(struct trie *t, struct tnode *tn); | 164 | static struct tnode *halve(struct trie *t, struct tnode *tn); |
165 | /* tnodes to free after resize(); protected by RTNL */ | ||
166 | static struct tnode *tnode_free_head; | ||
164 | 167 | ||
165 | static struct kmem_cache *fn_alias_kmem __read_mostly; | 168 | static struct kmem_cache *fn_alias_kmem __read_mostly; |
166 | static struct kmem_cache *trie_leaf_kmem __read_mostly; | 169 | static struct kmem_cache *trie_leaf_kmem __read_mostly; |
@@ -385,6 +388,29 @@ static inline void tnode_free(struct tnode *tn) | |||
385 | call_rcu(&tn->rcu, __tnode_free_rcu); | 388 | call_rcu(&tn->rcu, __tnode_free_rcu); |
386 | } | 389 | } |
387 | 390 | ||
391 | static void tnode_free_safe(struct tnode *tn) | ||
392 | { | ||
393 | BUG_ON(IS_LEAF(tn)); | ||
394 | |||
395 | if (node_parent((struct node *) tn)) { | ||
396 | tn->tnode_free = tnode_free_head; | ||
397 | tnode_free_head = tn; | ||
398 | } else { | ||
399 | tnode_free(tn); | ||
400 | } | ||
401 | } | ||
402 | |||
403 | static void tnode_free_flush(void) | ||
404 | { | ||
405 | struct tnode *tn; | ||
406 | |||
407 | while ((tn = tnode_free_head)) { | ||
408 | tnode_free_head = tn->tnode_free; | ||
409 | tn->tnode_free = NULL; | ||
410 | tnode_free(tn); | ||
411 | } | ||
412 | } | ||
413 | |||
388 | static struct leaf *leaf_new(void) | 414 | static struct leaf *leaf_new(void) |
389 | { | 415 | { |
390 | struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); | 416 | struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); |
@@ -495,7 +521,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
495 | 521 | ||
496 | /* No children */ | 522 | /* No children */ |
497 | if (tn->empty_children == tnode_child_length(tn)) { | 523 | if (tn->empty_children == tnode_child_length(tn)) { |
498 | tnode_free(tn); | 524 | tnode_free_safe(tn); |
499 | return NULL; | 525 | return NULL; |
500 | } | 526 | } |
501 | /* One child */ | 527 | /* One child */ |
@@ -509,7 +535,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
509 | 535 | ||
510 | /* compress one level */ | 536 | /* compress one level */ |
511 | node_set_parent(n, NULL); | 537 | node_set_parent(n, NULL); |
512 | tnode_free(tn); | 538 | tnode_free_safe(tn); |
513 | return n; | 539 | return n; |
514 | } | 540 | } |
515 | /* | 541 | /* |
@@ -670,7 +696,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
670 | /* compress one level */ | 696 | /* compress one level */ |
671 | 697 | ||
672 | node_set_parent(n, NULL); | 698 | node_set_parent(n, NULL); |
673 | tnode_free(tn); | 699 | tnode_free_safe(tn); |
674 | return n; | 700 | return n; |
675 | } | 701 | } |
676 | 702 | ||
@@ -756,7 +782,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
756 | put_child(t, tn, 2*i, inode->child[0]); | 782 | put_child(t, tn, 2*i, inode->child[0]); |
757 | put_child(t, tn, 2*i+1, inode->child[1]); | 783 | put_child(t, tn, 2*i+1, inode->child[1]); |
758 | 784 | ||
759 | tnode_free(inode); | 785 | tnode_free_safe(inode); |
760 | continue; | 786 | continue; |
761 | } | 787 | } |
762 | 788 | ||
@@ -801,9 +827,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
801 | put_child(t, tn, 2*i, resize(t, left)); | 827 | put_child(t, tn, 2*i, resize(t, left)); |
802 | put_child(t, tn, 2*i+1, resize(t, right)); | 828 | put_child(t, tn, 2*i+1, resize(t, right)); |
803 | 829 | ||
804 | tnode_free(inode); | 830 | tnode_free_safe(inode); |
805 | } | 831 | } |
806 | tnode_free(oldtnode); | 832 | tnode_free_safe(oldtnode); |
807 | return tn; | 833 | return tn; |
808 | nomem: | 834 | nomem: |
809 | { | 835 | { |
@@ -885,7 +911,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) | |||
885 | put_child(t, newBinNode, 1, right); | 911 | put_child(t, newBinNode, 1, right); |
886 | put_child(t, tn, i/2, resize(t, newBinNode)); | 912 | put_child(t, tn, i/2, resize(t, newBinNode)); |
887 | } | 913 | } |
888 | tnode_free(oldtnode); | 914 | tnode_free_safe(oldtnode); |
889 | return tn; | 915 | return tn; |
890 | nomem: | 916 | nomem: |
891 | { | 917 | { |
@@ -989,7 +1015,6 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) | |||
989 | t_key cindex, key; | 1015 | t_key cindex, key; |
990 | struct tnode *tp; | 1016 | struct tnode *tp; |
991 | 1017 | ||
992 | preempt_disable(); | ||
993 | key = tn->key; | 1018 | key = tn->key; |
994 | 1019 | ||
995 | while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { | 1020 | while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { |
@@ -1001,16 +1026,18 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) | |||
1001 | (struct node *)tn, wasfull); | 1026 | (struct node *)tn, wasfull); |
1002 | 1027 | ||
1003 | tp = node_parent((struct node *) tn); | 1028 | tp = node_parent((struct node *) tn); |
1029 | tnode_free_flush(); | ||
1004 | if (!tp) | 1030 | if (!tp) |
1005 | break; | 1031 | break; |
1006 | tn = tp; | 1032 | tn = tp; |
1007 | } | 1033 | } |
1008 | 1034 | ||
1009 | /* Handle last (top) tnode */ | 1035 | /* Handle last (top) tnode */ |
1010 | if (IS_TNODE(tn)) | 1036 | if (IS_TNODE(tn)) { |
1011 | tn = (struct tnode *)resize(t, (struct tnode *)tn); | 1037 | tn = (struct tnode *)resize(t, (struct tnode *)tn); |
1038 | tnode_free_flush(); | ||
1039 | } | ||
1012 | 1040 | ||
1013 | preempt_enable(); | ||
1014 | return (struct node *)tn; | 1041 | return (struct node *)tn; |
1015 | } | 1042 | } |
1016 | 1043 | ||