aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Duyck <alexander.h.duyck@redhat.com>2015-03-04 18:02:44 -0500
committerDavid S. Miller <davem@davemloft.net>2015-03-04 23:35:18 -0500
commita7e53531234dc206bb75abb5305a72665dd4d75d (patch)
tree015a31ddf3a8d54491a04549adf3e3471dbb8943
parent41b489fd6ce03e96e90fcffdb69b168065ae2e40 (diff)
fib_trie: Make fib_table rcu safe
The fib_table was wrapped in several places with an rcu_read_lock/rcu_read_unlock however after looking over the code I found several spots where the tables were being accessed as just standard pointers without any protections. This change fixes that so that all of the proper protections are in place when accessing the table to take RCU replacement or removal of the table into account. Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/ip_fib.h70
-rw-r--r--include/net/netns/ipv4.h7
-rw-r--r--net/ipv4/fib_frontend.c52
-rw-r--r--net/ipv4/fib_trie.c21
4 files changed, 98 insertions, 52 deletions
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index cba4b7c32935..825cb2800908 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -185,6 +185,7 @@ struct fib_table {
185 u32 tb_id; 185 u32 tb_id;
186 int tb_default; 186 int tb_default;
187 int tb_num_default; 187 int tb_num_default;
188 struct rcu_head rcu;
188 unsigned long tb_data[0]; 189 unsigned long tb_data[0];
189}; 190};
190 191
@@ -206,12 +207,16 @@ void fib_free_table(struct fib_table *tb);
206 207
207static inline struct fib_table *fib_get_table(struct net *net, u32 id) 208static inline struct fib_table *fib_get_table(struct net *net, u32 id)
208{ 209{
210 struct hlist_node *tb_hlist;
209 struct hlist_head *ptr; 211 struct hlist_head *ptr;
210 212
211 ptr = id == RT_TABLE_LOCAL ? 213 ptr = id == RT_TABLE_LOCAL ?
212 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX] : 214 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX] :
213 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]; 215 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX];
214 return hlist_entry(ptr->first, struct fib_table, tb_hlist); 216
217 tb_hlist = rcu_dereference_rtnl(hlist_first_rcu(ptr));
218
219 return hlist_entry(tb_hlist, struct fib_table, tb_hlist);
215} 220}
216 221
217static inline struct fib_table *fib_new_table(struct net *net, u32 id) 222static inline struct fib_table *fib_new_table(struct net *net, u32 id)
@@ -222,15 +227,19 @@ static inline struct fib_table *fib_new_table(struct net *net, u32 id)
222static inline int fib_lookup(struct net *net, const struct flowi4 *flp, 227static inline int fib_lookup(struct net *net, const struct flowi4 *flp,
223 struct fib_result *res) 228 struct fib_result *res)
224{ 229{
225 int err = -ENETUNREACH; 230 struct fib_table *tb;
231 int err;
226 232
227 rcu_read_lock(); 233 rcu_read_lock();
228 234
229 if (!fib_table_lookup(fib_get_table(net, RT_TABLE_LOCAL), flp, res, 235 for (err = 0; !err; err = -ENETUNREACH) {
230 FIB_LOOKUP_NOREF) || 236 tb = fib_get_table(net, RT_TABLE_LOCAL);
231 !fib_table_lookup(fib_get_table(net, RT_TABLE_MAIN), flp, res, 237 if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
232 FIB_LOOKUP_NOREF)) 238 break;
233 err = 0; 239 tb = fib_get_table(net, RT_TABLE_MAIN);
240 if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
241 break;
242 }
234 243
235 rcu_read_unlock(); 244 rcu_read_unlock();
236 245
@@ -249,28 +258,33 @@ int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res);
249static inline int fib_lookup(struct net *net, struct flowi4 *flp, 258static inline int fib_lookup(struct net *net, struct flowi4 *flp,
250 struct fib_result *res) 259 struct fib_result *res)
251{ 260{
252 if (!net->ipv4.fib_has_custom_rules) { 261 struct fib_table *tb;
253 int err = -ENETUNREACH; 262 int err;
254 263
255 rcu_read_lock(); 264 if (net->ipv4.fib_has_custom_rules)
256 265 return __fib_lookup(net, flp, res);
257 res->tclassid = 0; 266
258 if ((net->ipv4.fib_local && 267 rcu_read_lock();
259 !fib_table_lookup(net->ipv4.fib_local, flp, res, 268
260 FIB_LOOKUP_NOREF)) || 269 res->tclassid = 0;
261 (net->ipv4.fib_main && 270
262 !fib_table_lookup(net->ipv4.fib_main, flp, res, 271 for (err = 0; !err; err = -ENETUNREACH) {
263 FIB_LOOKUP_NOREF)) || 272 tb = rcu_dereference_rtnl(net->ipv4.fib_local);
264 (net->ipv4.fib_default && 273 if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
265 !fib_table_lookup(net->ipv4.fib_default, flp, res, 274 break;
266 FIB_LOOKUP_NOREF))) 275
267 err = 0; 276 tb = rcu_dereference_rtnl(net->ipv4.fib_main);
268 277 if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
269 rcu_read_unlock(); 278 break;
270 279
271 return err; 280 tb = rcu_dereference_rtnl(net->ipv4.fib_default);
281 if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
282 break;
272 } 283 }
273 return __fib_lookup(net, flp, res); 284
285 rcu_read_unlock();
286
287 return err;
274} 288}
275 289
276#endif /* CONFIG_IP_MULTIPLE_TABLES */ 290#endif /* CONFIG_IP_MULTIPLE_TABLES */
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 1b26c6c3fd7c..db1db158a00e 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -7,6 +7,7 @@
7 7
8#include <linux/uidgid.h> 8#include <linux/uidgid.h>
9#include <net/inet_frag.h> 9#include <net/inet_frag.h>
10#include <linux/rcupdate.h>
10 11
11struct tcpm_hash_bucket; 12struct tcpm_hash_bucket;
12struct ctl_table_header; 13struct ctl_table_header;
@@ -38,9 +39,9 @@ struct netns_ipv4 {
38#ifdef CONFIG_IP_MULTIPLE_TABLES 39#ifdef CONFIG_IP_MULTIPLE_TABLES
39 struct fib_rules_ops *rules_ops; 40 struct fib_rules_ops *rules_ops;
40 bool fib_has_custom_rules; 41 bool fib_has_custom_rules;
41 struct fib_table *fib_local; 42 struct fib_table __rcu *fib_local;
42 struct fib_table *fib_main; 43 struct fib_table __rcu *fib_main;
43 struct fib_table *fib_default; 44 struct fib_table __rcu *fib_default;
44#endif 45#endif
45#ifdef CONFIG_IP_ROUTE_CLASSID 46#ifdef CONFIG_IP_ROUTE_CLASSID
46 int fib_num_tclassid_users; 47 int fib_num_tclassid_users;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 57be71dd6a9e..220c4b4af4cf 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -89,17 +89,14 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
89 89
90 switch (id) { 90 switch (id) {
91 case RT_TABLE_LOCAL: 91 case RT_TABLE_LOCAL:
92 net->ipv4.fib_local = tb; 92 rcu_assign_pointer(net->ipv4.fib_local, tb);
93 break; 93 break;
94
95 case RT_TABLE_MAIN: 94 case RT_TABLE_MAIN:
96 net->ipv4.fib_main = tb; 95 rcu_assign_pointer(net->ipv4.fib_main, tb);
97 break; 96 break;
98
99 case RT_TABLE_DEFAULT: 97 case RT_TABLE_DEFAULT:
100 net->ipv4.fib_default = tb; 98 rcu_assign_pointer(net->ipv4.fib_default, tb);
101 break; 99 break;
102
103 default: 100 default:
104 break; 101 break;
105 } 102 }
@@ -132,13 +129,14 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
132static void fib_flush(struct net *net) 129static void fib_flush(struct net *net)
133{ 130{
134 int flushed = 0; 131 int flushed = 0;
135 struct fib_table *tb;
136 struct hlist_head *head;
137 unsigned int h; 132 unsigned int h;
138 133
139 for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 134 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
140 head = &net->ipv4.fib_table_hash[h]; 135 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
141 hlist_for_each_entry(tb, head, tb_hlist) 136 struct hlist_node *tmp;
137 struct fib_table *tb;
138
139 hlist_for_each_entry_safe(tb, tmp, head, tb_hlist)
142 flushed += fib_table_flush(tb); 140 flushed += fib_table_flush(tb);
143 } 141 }
144 142
@@ -665,10 +663,12 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
665 s_h = cb->args[0]; 663 s_h = cb->args[0];
666 s_e = cb->args[1]; 664 s_e = cb->args[1];
667 665
666 rcu_read_lock();
667
668 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { 668 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
669 e = 0; 669 e = 0;
670 head = &net->ipv4.fib_table_hash[h]; 670 head = &net->ipv4.fib_table_hash[h];
671 hlist_for_each_entry(tb, head, tb_hlist) { 671 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
672 if (e < s_e) 672 if (e < s_e)
673 goto next; 673 goto next;
674 if (dumped) 674 if (dumped)
@@ -682,6 +682,8 @@ next:
682 } 682 }
683 } 683 }
684out: 684out:
685 rcu_read_unlock();
686
685 cb->args[1] = e; 687 cb->args[1] = e;
686 cb->args[0] = h; 688 cb->args[0] = h;
687 689
@@ -1117,14 +1119,34 @@ static void ip_fib_net_exit(struct net *net)
1117 1119
1118 rtnl_lock(); 1120 rtnl_lock();
1119 for (i = 0; i < FIB_TABLE_HASHSZ; i++) { 1121 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1120 struct fib_table *tb; 1122 struct hlist_head *head = &net->ipv4.fib_table_hash[i];
1121 struct hlist_head *head;
1122 struct hlist_node *tmp; 1123 struct hlist_node *tmp;
1124 struct fib_table *tb;
1125
1126 /* this is done in two passes as flushing the table could
1127 * cause it to be reallocated in order to accommodate new
1128 * tnodes at the root as the table shrinks.
1129 */
1130 hlist_for_each_entry_safe(tb, tmp, head, tb_hlist)
1131 fib_table_flush(tb);
1123 1132
1124 head = &net->ipv4.fib_table_hash[i];
1125 hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) { 1133 hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) {
1134#ifdef CONFIG_IP_MULTIPLE_TABLES
1135 switch (tb->tb_id) {
1136 case RT_TABLE_LOCAL:
1137 RCU_INIT_POINTER(net->ipv4.fib_local, NULL);
1138 break;
1139 case RT_TABLE_MAIN:
1140 RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
1141 break;
1142 case RT_TABLE_DEFAULT:
1143 RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
1144 break;
1145 default:
1146 break;
1147 }
1148#endif
1126 hlist_del(&tb->tb_hlist); 1149 hlist_del(&tb->tb_hlist);
1127 fib_table_flush(tb);
1128 fib_free_table(tb); 1150 fib_free_table(tb);
1129 } 1151 }
1130 } 1152 }
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 2233ebf2aae8..3642b17c8726 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -193,6 +193,13 @@ static inline struct tnode *tnode_get_child_rcu(const struct tnode *tn,
193 return rcu_dereference_rtnl(tn->tnode[i]); 193 return rcu_dereference_rtnl(tn->tnode[i]);
194} 194}
195 195
196static inline struct fib_table *trie_get_table(struct trie *t)
197{
198 unsigned long *tb_data = (unsigned long *)t;
199
200 return container_of(tb_data, struct fib_table, tb_data[0]);
201}
202
196/* To understand this stuff, an understanding of keys and all their bits is 203/* To understand this stuff, an understanding of keys and all their bits is
197 * necessary. Every node in the trie has a key associated with it, but not 204 * necessary. Every node in the trie has a key associated with it, but not
198 * all of the bits in that key are significant. 205 * all of the bits in that key are significant.
@@ -1593,8 +1600,9 @@ flush_complete:
1593 return found; 1600 return found;
1594} 1601}
1595 1602
1596void fib_free_table(struct fib_table *tb) 1603static void __trie_free_rcu(struct rcu_head *head)
1597{ 1604{
1605 struct fib_table *tb = container_of(head, struct fib_table, rcu);
1598#ifdef CONFIG_IP_FIB_TRIE_STATS 1606#ifdef CONFIG_IP_FIB_TRIE_STATS
1599 struct trie *t = (struct trie *)tb->tb_data; 1607 struct trie *t = (struct trie *)tb->tb_data;
1600 1608
@@ -1603,6 +1611,11 @@ void fib_free_table(struct fib_table *tb)
1603 kfree(tb); 1611 kfree(tb);
1604} 1612}
1605 1613
1614void fib_free_table(struct fib_table *tb)
1615{
1616 call_rcu(&tb->rcu, __trie_free_rcu);
1617}
1618
1606static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb, 1619static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
1607 struct sk_buff *skb, struct netlink_callback *cb) 1620 struct sk_buff *skb, struct netlink_callback *cb)
1608{ 1621{
@@ -1639,6 +1652,7 @@ static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
1639 return skb->len; 1652 return skb->len;
1640} 1653}
1641 1654
1655/* rcu_read_lock needs to be hold by caller from readside */
1642int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, 1656int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
1643 struct netlink_callback *cb) 1657 struct netlink_callback *cb)
1644{ 1658{
@@ -1650,15 +1664,12 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
1650 int count = cb->args[2]; 1664 int count = cb->args[2];
1651 t_key key = cb->args[3]; 1665 t_key key = cb->args[3];
1652 1666
1653 rcu_read_lock();
1654
1655 tp = rcu_dereference_rtnl(t->trie); 1667 tp = rcu_dereference_rtnl(t->trie);
1656 1668
1657 while ((l = leaf_walk_rcu(&tp, key)) != NULL) { 1669 while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
1658 if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) { 1670 if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
1659 cb->args[3] = key; 1671 cb->args[3] = key;
1660 cb->args[2] = count; 1672 cb->args[2] = count;
1661 rcu_read_unlock();
1662 return -1; 1673 return -1;
1663 } 1674 }
1664 1675
@@ -1673,8 +1684,6 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
1673 break; 1684 break;
1674 } 1685 }
1675 1686
1676 rcu_read_unlock();
1677
1678 cb->args[3] = key; 1687 cb->args[3] = key;
1679 cb->args[2] = count; 1688 cb->args[2] = count;
1680 1689