diff options
Diffstat (limited to 'net/ipv4/fib_hash.c')
-rw-r--r-- | net/ipv4/fib_hash.c | 291 |
1 files changed, 166 insertions, 125 deletions
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 4ed7e0dea1bc..43e1c594ce8f 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c | |||
@@ -54,36 +54,37 @@ struct fib_node { | |||
54 | struct fib_alias fn_embedded_alias; | 54 | struct fib_alias fn_embedded_alias; |
55 | }; | 55 | }; |
56 | 56 | ||
57 | struct fn_zone { | 57 | #define EMBEDDED_HASH_SIZE (L1_CACHE_BYTES / sizeof(struct hlist_head)) |
58 | struct fn_zone *fz_next; /* Next not empty zone */ | ||
59 | struct hlist_head *fz_hash; /* Hash table pointer */ | ||
60 | int fz_nent; /* Number of entries */ | ||
61 | 58 | ||
62 | int fz_divisor; /* Hash divisor */ | 59 | struct fn_zone { |
60 | struct fn_zone __rcu *fz_next; /* Next not empty zone */ | ||
61 | struct hlist_head __rcu *fz_hash; /* Hash table pointer */ | ||
62 | seqlock_t fz_lock; | ||
63 | u32 fz_hashmask; /* (fz_divisor - 1) */ | 63 | u32 fz_hashmask; /* (fz_divisor - 1) */ |
64 | #define FZ_HASHMASK(fz) ((fz)->fz_hashmask) | ||
65 | 64 | ||
66 | int fz_order; /* Zone order */ | 65 | u8 fz_order; /* Zone order (0..32) */ |
67 | __be32 fz_mask; | 66 | u8 fz_revorder; /* 32 - fz_order */ |
67 | __be32 fz_mask; /* inet_make_mask(order) */ | ||
68 | #define FZ_MASK(fz) ((fz)->fz_mask) | 68 | #define FZ_MASK(fz) ((fz)->fz_mask) |
69 | }; | ||
70 | 69 | ||
71 | /* NOTE. On fast computers evaluation of fz_hashmask and fz_mask | 70 | struct hlist_head fz_embedded_hash[EMBEDDED_HASH_SIZE]; |
72 | * can be cheaper than memory lookup, so that FZ_* macros are used. | 71 | |
73 | */ | 72 | int fz_nent; /* Number of entries */ |
73 | int fz_divisor; /* Hash size (mask+1) */ | ||
74 | }; | ||
74 | 75 | ||
75 | struct fn_hash { | 76 | struct fn_hash { |
76 | struct fn_zone *fn_zones[33]; | 77 | struct fn_zone *fn_zones[33]; |
77 | struct fn_zone *fn_zone_list; | 78 | struct fn_zone __rcu *fn_zone_list; |
78 | }; | 79 | }; |
79 | 80 | ||
80 | static inline u32 fn_hash(__be32 key, struct fn_zone *fz) | 81 | static inline u32 fn_hash(__be32 key, struct fn_zone *fz) |
81 | { | 82 | { |
82 | u32 h = ntohl(key)>>(32 - fz->fz_order); | 83 | u32 h = ntohl(key) >> fz->fz_revorder; |
83 | h ^= (h>>20); | 84 | h ^= (h>>20); |
84 | h ^= (h>>10); | 85 | h ^= (h>>10); |
85 | h ^= (h>>5); | 86 | h ^= (h>>5); |
86 | h &= FZ_HASHMASK(fz); | 87 | h &= fz->fz_hashmask; |
87 | return h; | 88 | return h; |
88 | } | 89 | } |
89 | 90 | ||
@@ -92,7 +93,6 @@ static inline __be32 fz_key(__be32 dst, struct fn_zone *fz) | |||
92 | return dst & FZ_MASK(fz); | 93 | return dst & FZ_MASK(fz); |
93 | } | 94 | } |
94 | 95 | ||
95 | static DEFINE_RWLOCK(fib_hash_lock); | ||
96 | static unsigned int fib_hash_genid; | 96 | static unsigned int fib_hash_genid; |
97 | 97 | ||
98 | #define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head)) | 98 | #define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head)) |
@@ -101,12 +101,11 @@ static struct hlist_head *fz_hash_alloc(int divisor) | |||
101 | { | 101 | { |
102 | unsigned long size = divisor * sizeof(struct hlist_head); | 102 | unsigned long size = divisor * sizeof(struct hlist_head); |
103 | 103 | ||
104 | if (size <= PAGE_SIZE) { | 104 | if (size <= PAGE_SIZE) |
105 | return kzalloc(size, GFP_KERNEL); | 105 | return kzalloc(size, GFP_KERNEL); |
106 | } else { | 106 | |
107 | return (struct hlist_head *) | 107 | return (struct hlist_head *) |
108 | __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size)); | 108 | __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size)); |
109 | } | ||
110 | } | 109 | } |
111 | 110 | ||
112 | /* The fib hash lock must be held when this is called. */ | 111 | /* The fib hash lock must be held when this is called. */ |
@@ -121,12 +120,12 @@ static inline void fn_rebuild_zone(struct fn_zone *fz, | |||
121 | struct fib_node *f; | 120 | struct fib_node *f; |
122 | 121 | ||
123 | hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) { | 122 | hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) { |
124 | struct hlist_head *new_head; | 123 | struct hlist_head __rcu *new_head; |
125 | 124 | ||
126 | hlist_del(&f->fn_hash); | 125 | hlist_del_rcu(&f->fn_hash); |
127 | 126 | ||
128 | new_head = &fz->fz_hash[fn_hash(f->fn_key, fz)]; | 127 | new_head = &fz->fz_hash[fn_hash(f->fn_key, fz)]; |
129 | hlist_add_head(&f->fn_hash, new_head); | 128 | hlist_add_head_rcu(&f->fn_hash, new_head); |
130 | } | 129 | } |
131 | } | 130 | } |
132 | } | 131 | } |
@@ -147,14 +146,14 @@ static void fn_rehash_zone(struct fn_zone *fz) | |||
147 | int old_divisor, new_divisor; | 146 | int old_divisor, new_divisor; |
148 | u32 new_hashmask; | 147 | u32 new_hashmask; |
149 | 148 | ||
150 | old_divisor = fz->fz_divisor; | 149 | new_divisor = old_divisor = fz->fz_divisor; |
151 | 150 | ||
152 | switch (old_divisor) { | 151 | switch (old_divisor) { |
153 | case 16: | 152 | case EMBEDDED_HASH_SIZE: |
154 | new_divisor = 256; | 153 | new_divisor *= EMBEDDED_HASH_SIZE; |
155 | break; | 154 | break; |
156 | case 256: | 155 | case EMBEDDED_HASH_SIZE*EMBEDDED_HASH_SIZE: |
157 | new_divisor = 1024; | 156 | new_divisor *= (EMBEDDED_HASH_SIZE/2); |
158 | break; | 157 | break; |
159 | default: | 158 | default: |
160 | if ((old_divisor << 1) > FZ_MAX_DIVISOR) { | 159 | if ((old_divisor << 1) > FZ_MAX_DIVISOR) { |
@@ -175,31 +174,55 @@ static void fn_rehash_zone(struct fn_zone *fz) | |||
175 | ht = fz_hash_alloc(new_divisor); | 174 | ht = fz_hash_alloc(new_divisor); |
176 | 175 | ||
177 | if (ht) { | 176 | if (ht) { |
178 | write_lock_bh(&fib_hash_lock); | 177 | struct fn_zone nfz; |
178 | |||
179 | memcpy(&nfz, fz, sizeof(nfz)); | ||
180 | |||
181 | write_seqlock_bh(&fz->fz_lock); | ||
179 | old_ht = fz->fz_hash; | 182 | old_ht = fz->fz_hash; |
180 | fz->fz_hash = ht; | 183 | nfz.fz_hash = ht; |
184 | nfz.fz_hashmask = new_hashmask; | ||
185 | nfz.fz_divisor = new_divisor; | ||
186 | fn_rebuild_zone(&nfz, old_ht, old_divisor); | ||
187 | fib_hash_genid++; | ||
188 | rcu_assign_pointer(fz->fz_hash, ht); | ||
181 | fz->fz_hashmask = new_hashmask; | 189 | fz->fz_hashmask = new_hashmask; |
182 | fz->fz_divisor = new_divisor; | 190 | fz->fz_divisor = new_divisor; |
183 | fn_rebuild_zone(fz, old_ht, old_divisor); | 191 | write_sequnlock_bh(&fz->fz_lock); |
184 | fib_hash_genid++; | ||
185 | write_unlock_bh(&fib_hash_lock); | ||
186 | 192 | ||
187 | fz_hash_free(old_ht, old_divisor); | 193 | if (old_ht != fz->fz_embedded_hash) { |
194 | synchronize_rcu(); | ||
195 | fz_hash_free(old_ht, old_divisor); | ||
196 | } | ||
188 | } | 197 | } |
189 | } | 198 | } |
190 | 199 | ||
191 | static inline void fn_free_node(struct fib_node * f) | 200 | static void fn_free_node_rcu(struct rcu_head *head) |
192 | { | 201 | { |
202 | struct fib_node *f = container_of(head, struct fib_node, fn_embedded_alias.rcu); | ||
203 | |||
193 | kmem_cache_free(fn_hash_kmem, f); | 204 | kmem_cache_free(fn_hash_kmem, f); |
194 | } | 205 | } |
195 | 206 | ||
207 | static inline void fn_free_node(struct fib_node *f) | ||
208 | { | ||
209 | call_rcu(&f->fn_embedded_alias.rcu, fn_free_node_rcu); | ||
210 | } | ||
211 | |||
212 | static void fn_free_alias_rcu(struct rcu_head *head) | ||
213 | { | ||
214 | struct fib_alias *fa = container_of(head, struct fib_alias, rcu); | ||
215 | |||
216 | kmem_cache_free(fn_alias_kmem, fa); | ||
217 | } | ||
218 | |||
196 | static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f) | 219 | static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f) |
197 | { | 220 | { |
198 | fib_release_info(fa->fa_info); | 221 | fib_release_info(fa->fa_info); |
199 | if (fa == &f->fn_embedded_alias) | 222 | if (fa == &f->fn_embedded_alias) |
200 | fa->fa_info = NULL; | 223 | fa->fa_info = NULL; |
201 | else | 224 | else |
202 | kmem_cache_free(fn_alias_kmem, fa); | 225 | call_rcu(&fa->rcu, fn_free_alias_rcu); |
203 | } | 226 | } |
204 | 227 | ||
205 | static struct fn_zone * | 228 | static struct fn_zone * |
@@ -210,68 +233,71 @@ fn_new_zone(struct fn_hash *table, int z) | |||
210 | if (!fz) | 233 | if (!fz) |
211 | return NULL; | 234 | return NULL; |
212 | 235 | ||
213 | if (z) { | 236 | seqlock_init(&fz->fz_lock); |
214 | fz->fz_divisor = 16; | 237 | fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1; |
215 | } else { | 238 | fz->fz_hashmask = fz->fz_divisor - 1; |
216 | fz->fz_divisor = 1; | 239 | fz->fz_hash = fz->fz_embedded_hash; |
217 | } | ||
218 | fz->fz_hashmask = (fz->fz_divisor - 1); | ||
219 | fz->fz_hash = fz_hash_alloc(fz->fz_divisor); | ||
220 | if (!fz->fz_hash) { | ||
221 | kfree(fz); | ||
222 | return NULL; | ||
223 | } | ||
224 | fz->fz_order = z; | 240 | fz->fz_order = z; |
241 | fz->fz_revorder = 32 - z; | ||
225 | fz->fz_mask = inet_make_mask(z); | 242 | fz->fz_mask = inet_make_mask(z); |
226 | 243 | ||
227 | /* Find the first not empty zone with more specific mask */ | 244 | /* Find the first not empty zone with more specific mask */ |
228 | for (i=z+1; i<=32; i++) | 245 | for (i = z + 1; i <= 32; i++) |
229 | if (table->fn_zones[i]) | 246 | if (table->fn_zones[i]) |
230 | break; | 247 | break; |
231 | write_lock_bh(&fib_hash_lock); | 248 | if (i > 32) { |
232 | if (i>32) { | ||
233 | /* No more specific masks, we are the first. */ | 249 | /* No more specific masks, we are the first. */ |
234 | fz->fz_next = table->fn_zone_list; | 250 | rcu_assign_pointer(fz->fz_next, |
235 | table->fn_zone_list = fz; | 251 | rtnl_dereference(table->fn_zone_list)); |
252 | rcu_assign_pointer(table->fn_zone_list, fz); | ||
236 | } else { | 253 | } else { |
237 | fz->fz_next = table->fn_zones[i]->fz_next; | 254 | rcu_assign_pointer(fz->fz_next, |
238 | table->fn_zones[i]->fz_next = fz; | 255 | rtnl_dereference(table->fn_zones[i]->fz_next)); |
256 | rcu_assign_pointer(table->fn_zones[i]->fz_next, fz); | ||
239 | } | 257 | } |
240 | table->fn_zones[z] = fz; | 258 | table->fn_zones[z] = fz; |
241 | fib_hash_genid++; | 259 | fib_hash_genid++; |
242 | write_unlock_bh(&fib_hash_lock); | ||
243 | return fz; | 260 | return fz; |
244 | } | 261 | } |
245 | 262 | ||
246 | int fib_table_lookup(struct fib_table *tb, | 263 | int fib_table_lookup(struct fib_table *tb, |
247 | const struct flowi *flp, struct fib_result *res) | 264 | const struct flowi *flp, struct fib_result *res, |
265 | int fib_flags) | ||
248 | { | 266 | { |
249 | int err; | 267 | int err; |
250 | struct fn_zone *fz; | 268 | struct fn_zone *fz; |
251 | struct fn_hash *t = (struct fn_hash *)tb->tb_data; | 269 | struct fn_hash *t = (struct fn_hash *)tb->tb_data; |
252 | 270 | ||
253 | read_lock(&fib_hash_lock); | 271 | rcu_read_lock(); |
254 | for (fz = t->fn_zone_list; fz; fz = fz->fz_next) { | 272 | for (fz = rcu_dereference(t->fn_zone_list); |
255 | struct hlist_head *head; | 273 | fz != NULL; |
274 | fz = rcu_dereference(fz->fz_next)) { | ||
275 | struct hlist_head __rcu *head; | ||
256 | struct hlist_node *node; | 276 | struct hlist_node *node; |
257 | struct fib_node *f; | 277 | struct fib_node *f; |
258 | __be32 k = fz_key(flp->fl4_dst, fz); | 278 | __be32 k; |
279 | unsigned int seq; | ||
259 | 280 | ||
260 | head = &fz->fz_hash[fn_hash(k, fz)]; | 281 | do { |
261 | hlist_for_each_entry(f, node, head, fn_hash) { | 282 | seq = read_seqbegin(&fz->fz_lock); |
262 | if (f->fn_key != k) | 283 | k = fz_key(flp->fl4_dst, fz); |
263 | continue; | 284 | |
285 | head = &fz->fz_hash[fn_hash(k, fz)]; | ||
286 | hlist_for_each_entry_rcu(f, node, head, fn_hash) { | ||
287 | if (f->fn_key != k) | ||
288 | continue; | ||
264 | 289 | ||
265 | err = fib_semantic_match(&f->fn_alias, | 290 | err = fib_semantic_match(&f->fn_alias, |
266 | flp, res, | 291 | flp, res, |
267 | fz->fz_order); | 292 | fz->fz_order, fib_flags); |
268 | if (err <= 0) | 293 | if (err <= 0) |
269 | goto out; | 294 | goto out; |
270 | } | 295 | } |
296 | } while (read_seqretry(&fz->fz_lock, seq)); | ||
271 | } | 297 | } |
272 | err = 1; | 298 | err = 1; |
273 | out: | 299 | out: |
274 | read_unlock(&fib_hash_lock); | 300 | rcu_read_unlock(); |
275 | return err; | 301 | return err; |
276 | } | 302 | } |
277 | 303 | ||
@@ -293,11 +319,11 @@ void fib_table_select_default(struct fib_table *tb, | |||
293 | last_resort = NULL; | 319 | last_resort = NULL; |
294 | order = -1; | 320 | order = -1; |
295 | 321 | ||
296 | read_lock(&fib_hash_lock); | 322 | rcu_read_lock(); |
297 | hlist_for_each_entry(f, node, &fz->fz_hash[0], fn_hash) { | 323 | hlist_for_each_entry_rcu(f, node, &fz->fz_hash[0], fn_hash) { |
298 | struct fib_alias *fa; | 324 | struct fib_alias *fa; |
299 | 325 | ||
300 | list_for_each_entry(fa, &f->fn_alias, fa_list) { | 326 | list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) { |
301 | struct fib_info *next_fi = fa->fa_info; | 327 | struct fib_info *next_fi = fa->fa_info; |
302 | 328 | ||
303 | if (fa->fa_scope != res->scope || | 329 | if (fa->fa_scope != res->scope || |
@@ -309,7 +335,8 @@ void fib_table_select_default(struct fib_table *tb, | |||
309 | if (!next_fi->fib_nh[0].nh_gw || | 335 | if (!next_fi->fib_nh[0].nh_gw || |
310 | next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) | 336 | next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) |
311 | continue; | 337 | continue; |
312 | fa->fa_state |= FA_S_ACCESSED; | 338 | |
339 | fib_alias_accessed(fa); | ||
313 | 340 | ||
314 | if (fi == NULL) { | 341 | if (fi == NULL) { |
315 | if (next_fi != res->fi) | 342 | if (next_fi != res->fi) |
@@ -341,7 +368,7 @@ void fib_table_select_default(struct fib_table *tb, | |||
341 | fib_result_assign(res, last_resort); | 368 | fib_result_assign(res, last_resort); |
342 | tb->tb_default = last_idx; | 369 | tb->tb_default = last_idx; |
343 | out: | 370 | out: |
344 | read_unlock(&fib_hash_lock); | 371 | rcu_read_unlock(); |
345 | } | 372 | } |
346 | 373 | ||
347 | /* Insert node F to FZ. */ | 374 | /* Insert node F to FZ. */ |
@@ -349,7 +376,7 @@ static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f) | |||
349 | { | 376 | { |
350 | struct hlist_head *head = &fz->fz_hash[fn_hash(f->fn_key, fz)]; | 377 | struct hlist_head *head = &fz->fz_hash[fn_hash(f->fn_key, fz)]; |
351 | 378 | ||
352 | hlist_add_head(&f->fn_hash, head); | 379 | hlist_add_head_rcu(&f->fn_hash, head); |
353 | } | 380 | } |
354 | 381 | ||
355 | /* Return the node in FZ matching KEY. */ | 382 | /* Return the node in FZ matching KEY. */ |
@@ -359,7 +386,7 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key) | |||
359 | struct hlist_node *node; | 386 | struct hlist_node *node; |
360 | struct fib_node *f; | 387 | struct fib_node *f; |
361 | 388 | ||
362 | hlist_for_each_entry(f, node, head, fn_hash) { | 389 | hlist_for_each_entry_rcu(f, node, head, fn_hash) { |
363 | if (f->fn_key == key) | 390 | if (f->fn_key == key) |
364 | return f; | 391 | return f; |
365 | } | 392 | } |
@@ -367,6 +394,17 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key) | |||
367 | return NULL; | 394 | return NULL; |
368 | } | 395 | } |
369 | 396 | ||
397 | |||
398 | static struct fib_alias *fib_fast_alloc(struct fib_node *f) | ||
399 | { | ||
400 | struct fib_alias *fa = &f->fn_embedded_alias; | ||
401 | |||
402 | if (fa->fa_info != NULL) | ||
403 | fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); | ||
404 | return fa; | ||
405 | } | ||
406 | |||
407 | /* Caller must hold RTNL. */ | ||
370 | int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) | 408 | int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) |
371 | { | 409 | { |
372 | struct fn_hash *table = (struct fn_hash *) tb->tb_data; | 410 | struct fn_hash *table = (struct fn_hash *) tb->tb_data; |
@@ -451,7 +489,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) | |||
451 | } | 489 | } |
452 | 490 | ||
453 | if (cfg->fc_nlflags & NLM_F_REPLACE) { | 491 | if (cfg->fc_nlflags & NLM_F_REPLACE) { |
454 | struct fib_info *fi_drop; | ||
455 | u8 state; | 492 | u8 state; |
456 | 493 | ||
457 | fa = fa_first; | 494 | fa = fa_first; |
@@ -460,21 +497,25 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) | |||
460 | err = 0; | 497 | err = 0; |
461 | goto out; | 498 | goto out; |
462 | } | 499 | } |
463 | write_lock_bh(&fib_hash_lock); | 500 | err = -ENOBUFS; |
464 | fi_drop = fa->fa_info; | 501 | new_fa = fib_fast_alloc(f); |
465 | fa->fa_info = fi; | 502 | if (new_fa == NULL) |
466 | fa->fa_type = cfg->fc_type; | 503 | goto out; |
467 | fa->fa_scope = cfg->fc_scope; | 504 | |
505 | new_fa->fa_tos = fa->fa_tos; | ||
506 | new_fa->fa_info = fi; | ||
507 | new_fa->fa_type = cfg->fc_type; | ||
508 | new_fa->fa_scope = cfg->fc_scope; | ||
468 | state = fa->fa_state; | 509 | state = fa->fa_state; |
469 | fa->fa_state &= ~FA_S_ACCESSED; | 510 | new_fa->fa_state = state & ~FA_S_ACCESSED; |
470 | fib_hash_genid++; | 511 | fib_hash_genid++; |
471 | write_unlock_bh(&fib_hash_lock); | 512 | list_replace_rcu(&fa->fa_list, &new_fa->fa_list); |
472 | 513 | ||
473 | fib_release_info(fi_drop); | 514 | fn_free_alias(fa, f); |
474 | if (state & FA_S_ACCESSED) | 515 | if (state & FA_S_ACCESSED) |
475 | rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); | 516 | rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); |
476 | rtmsg_fib(RTM_NEWROUTE, key, fa, cfg->fc_dst_len, tb->tb_id, | 517 | rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, |
477 | &cfg->fc_nlinfo, NLM_F_REPLACE); | 518 | tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); |
478 | return 0; | 519 | return 0; |
479 | } | 520 | } |
480 | 521 | ||
@@ -506,12 +547,10 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) | |||
506 | f = new_f; | 547 | f = new_f; |
507 | } | 548 | } |
508 | 549 | ||
509 | new_fa = &f->fn_embedded_alias; | 550 | new_fa = fib_fast_alloc(f); |
510 | if (new_fa->fa_info != NULL) { | 551 | if (new_fa == NULL) |
511 | new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); | 552 | goto out; |
512 | if (new_fa == NULL) | 553 | |
513 | goto out; | ||
514 | } | ||
515 | new_fa->fa_info = fi; | 554 | new_fa->fa_info = fi; |
516 | new_fa->fa_tos = tos; | 555 | new_fa->fa_tos = tos; |
517 | new_fa->fa_type = cfg->fc_type; | 556 | new_fa->fa_type = cfg->fc_type; |
@@ -522,13 +561,11 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) | |||
522 | * Insert new entry to the list. | 561 | * Insert new entry to the list. |
523 | */ | 562 | */ |
524 | 563 | ||
525 | write_lock_bh(&fib_hash_lock); | ||
526 | if (new_f) | 564 | if (new_f) |
527 | fib_insert_node(fz, new_f); | 565 | fib_insert_node(fz, new_f); |
528 | list_add_tail(&new_fa->fa_list, | 566 | list_add_tail_rcu(&new_fa->fa_list, |
529 | (fa ? &fa->fa_list : &f->fn_alias)); | 567 | (fa ? &fa->fa_list : &f->fn_alias)); |
530 | fib_hash_genid++; | 568 | fib_hash_genid++; |
531 | write_unlock_bh(&fib_hash_lock); | ||
532 | 569 | ||
533 | if (new_f) | 570 | if (new_f) |
534 | fz->fz_nent++; | 571 | fz->fz_nent++; |
@@ -603,14 +640,12 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) | |||
603 | tb->tb_id, &cfg->fc_nlinfo, 0); | 640 | tb->tb_id, &cfg->fc_nlinfo, 0); |
604 | 641 | ||
605 | kill_fn = 0; | 642 | kill_fn = 0; |
606 | write_lock_bh(&fib_hash_lock); | 643 | list_del_rcu(&fa->fa_list); |
607 | list_del(&fa->fa_list); | ||
608 | if (list_empty(&f->fn_alias)) { | 644 | if (list_empty(&f->fn_alias)) { |
609 | hlist_del(&f->fn_hash); | 645 | hlist_del_rcu(&f->fn_hash); |
610 | kill_fn = 1; | 646 | kill_fn = 1; |
611 | } | 647 | } |
612 | fib_hash_genid++; | 648 | fib_hash_genid++; |
613 | write_unlock_bh(&fib_hash_lock); | ||
614 | 649 | ||
615 | if (fa->fa_state & FA_S_ACCESSED) | 650 | if (fa->fa_state & FA_S_ACCESSED) |
616 | rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); | 651 | rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); |
@@ -641,14 +676,12 @@ static int fn_flush_list(struct fn_zone *fz, int idx) | |||
641 | struct fib_info *fi = fa->fa_info; | 676 | struct fib_info *fi = fa->fa_info; |
642 | 677 | ||
643 | if (fi && (fi->fib_flags&RTNH_F_DEAD)) { | 678 | if (fi && (fi->fib_flags&RTNH_F_DEAD)) { |
644 | write_lock_bh(&fib_hash_lock); | 679 | list_del_rcu(&fa->fa_list); |
645 | list_del(&fa->fa_list); | ||
646 | if (list_empty(&f->fn_alias)) { | 680 | if (list_empty(&f->fn_alias)) { |
647 | hlist_del(&f->fn_hash); | 681 | hlist_del_rcu(&f->fn_hash); |
648 | kill_f = 1; | 682 | kill_f = 1; |
649 | } | 683 | } |
650 | fib_hash_genid++; | 684 | fib_hash_genid++; |
651 | write_unlock_bh(&fib_hash_lock); | ||
652 | 685 | ||
653 | fn_free_alias(fa, f); | 686 | fn_free_alias(fa, f); |
654 | found++; | 687 | found++; |
@@ -662,13 +695,16 @@ static int fn_flush_list(struct fn_zone *fz, int idx) | |||
662 | return found; | 695 | return found; |
663 | } | 696 | } |
664 | 697 | ||
698 | /* caller must hold RTNL. */ | ||
665 | int fib_table_flush(struct fib_table *tb) | 699 | int fib_table_flush(struct fib_table *tb) |
666 | { | 700 | { |
667 | struct fn_hash *table = (struct fn_hash *) tb->tb_data; | 701 | struct fn_hash *table = (struct fn_hash *) tb->tb_data; |
668 | struct fn_zone *fz; | 702 | struct fn_zone *fz; |
669 | int found = 0; | 703 | int found = 0; |
670 | 704 | ||
671 | for (fz = table->fn_zone_list; fz; fz = fz->fz_next) { | 705 | for (fz = rtnl_dereference(table->fn_zone_list); |
706 | fz != NULL; | ||
707 | fz = rtnl_dereference(fz->fz_next)) { | ||
672 | int i; | 708 | int i; |
673 | 709 | ||
674 | for (i = fz->fz_divisor - 1; i >= 0; i--) | 710 | for (i = fz->fz_divisor - 1; i >= 0; i--) |
@@ -690,10 +726,10 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, | |||
690 | 726 | ||
691 | s_i = cb->args[4]; | 727 | s_i = cb->args[4]; |
692 | i = 0; | 728 | i = 0; |
693 | hlist_for_each_entry(f, node, head, fn_hash) { | 729 | hlist_for_each_entry_rcu(f, node, head, fn_hash) { |
694 | struct fib_alias *fa; | 730 | struct fib_alias *fa; |
695 | 731 | ||
696 | list_for_each_entry(fa, &f->fn_alias, fa_list) { | 732 | list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) { |
697 | if (i < s_i) | 733 | if (i < s_i) |
698 | goto next; | 734 | goto next; |
699 | 735 | ||
@@ -711,7 +747,7 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, | |||
711 | cb->args[4] = i; | 747 | cb->args[4] = i; |
712 | return -1; | 748 | return -1; |
713 | } | 749 | } |
714 | next: | 750 | next: |
715 | i++; | 751 | i++; |
716 | } | 752 | } |
717 | } | 753 | } |
@@ -746,23 +782,26 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb, | |||
746 | int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, | 782 | int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, |
747 | struct netlink_callback *cb) | 783 | struct netlink_callback *cb) |
748 | { | 784 | { |
749 | int m, s_m; | 785 | int m = 0, s_m; |
750 | struct fn_zone *fz; | 786 | struct fn_zone *fz; |
751 | struct fn_hash *table = (struct fn_hash *)tb->tb_data; | 787 | struct fn_hash *table = (struct fn_hash *)tb->tb_data; |
752 | 788 | ||
753 | s_m = cb->args[2]; | 789 | s_m = cb->args[2]; |
754 | read_lock(&fib_hash_lock); | 790 | rcu_read_lock(); |
755 | for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) { | 791 | for (fz = rcu_dereference(table->fn_zone_list); |
756 | if (m < s_m) continue; | 792 | fz != NULL; |
793 | fz = rcu_dereference(fz->fz_next), m++) { | ||
794 | if (m < s_m) | ||
795 | continue; | ||
757 | if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { | 796 | if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { |
758 | cb->args[2] = m; | 797 | cb->args[2] = m; |
759 | read_unlock(&fib_hash_lock); | 798 | rcu_read_unlock(); |
760 | return -1; | 799 | return -1; |
761 | } | 800 | } |
762 | memset(&cb->args[3], 0, | 801 | memset(&cb->args[3], 0, |
763 | sizeof(cb->args) - 3*sizeof(cb->args[0])); | 802 | sizeof(cb->args) - 3*sizeof(cb->args[0])); |
764 | } | 803 | } |
765 | read_unlock(&fib_hash_lock); | 804 | rcu_read_unlock(); |
766 | cb->args[2] = m; | 805 | cb->args[2] = m; |
767 | return skb->len; | 806 | return skb->len; |
768 | } | 807 | } |
@@ -825,8 +864,9 @@ static struct fib_alias *fib_get_first(struct seq_file *seq) | |||
825 | iter->genid = fib_hash_genid; | 864 | iter->genid = fib_hash_genid; |
826 | iter->valid = 1; | 865 | iter->valid = 1; |
827 | 866 | ||
828 | for (iter->zone = table->fn_zone_list; iter->zone; | 867 | for (iter->zone = rcu_dereference(table->fn_zone_list); |
829 | iter->zone = iter->zone->fz_next) { | 868 | iter->zone != NULL; |
869 | iter->zone = rcu_dereference(iter->zone->fz_next)) { | ||
830 | int maxslot; | 870 | int maxslot; |
831 | 871 | ||
832 | if (!iter->zone->fz_nent) | 872 | if (!iter->zone->fz_nent) |
@@ -911,7 +951,7 @@ static struct fib_alias *fib_get_next(struct seq_file *seq) | |||
911 | } | 951 | } |
912 | } | 952 | } |
913 | 953 | ||
914 | iter->zone = iter->zone->fz_next; | 954 | iter->zone = rcu_dereference(iter->zone->fz_next); |
915 | 955 | ||
916 | if (!iter->zone) | 956 | if (!iter->zone) |
917 | goto out; | 957 | goto out; |
@@ -950,11 +990,11 @@ static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos) | |||
950 | } | 990 | } |
951 | 991 | ||
952 | static void *fib_seq_start(struct seq_file *seq, loff_t *pos) | 992 | static void *fib_seq_start(struct seq_file *seq, loff_t *pos) |
953 | __acquires(fib_hash_lock) | 993 | __acquires(RCU) |
954 | { | 994 | { |
955 | void *v = NULL; | 995 | void *v = NULL; |
956 | 996 | ||
957 | read_lock(&fib_hash_lock); | 997 | rcu_read_lock(); |
958 | if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN)) | 998 | if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN)) |
959 | v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; | 999 | v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; |
960 | return v; | 1000 | return v; |
@@ -967,15 +1007,16 @@ static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
967 | } | 1007 | } |
968 | 1008 | ||
969 | static void fib_seq_stop(struct seq_file *seq, void *v) | 1009 | static void fib_seq_stop(struct seq_file *seq, void *v) |
970 | __releases(fib_hash_lock) | 1010 | __releases(RCU) |
971 | { | 1011 | { |
972 | read_unlock(&fib_hash_lock); | 1012 | rcu_read_unlock(); |
973 | } | 1013 | } |
974 | 1014 | ||
975 | static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi) | 1015 | static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi) |
976 | { | 1016 | { |
977 | static const unsigned type2flags[RTN_MAX + 1] = { | 1017 | static const unsigned type2flags[RTN_MAX + 1] = { |
978 | [7] = RTF_REJECT, [8] = RTF_REJECT, | 1018 | [7] = RTF_REJECT, |
1019 | [8] = RTF_REJECT, | ||
979 | }; | 1020 | }; |
980 | unsigned flags = type2flags[type]; | 1021 | unsigned flags = type2flags[type]; |
981 | 1022 | ||