diff options
author | Stephen Hemminger <shemminger@vyatta.com> | 2009-04-29 01:36:33 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-04-29 01:36:33 -0400 |
commit | 942e4a2bd680c606af0211e64eb216be2e19bf61 (patch) | |
tree | a83af49242d4a8d53aa0f3b5814eb17da72edc09 /include | |
parent | bf368e4e70cd4e0f880923c44e95a4273d725ab4 (diff) |
netfilter: revised locking for x_tables
The x_tables are organized with a table structure and a per-cpu copies
of the counters and rules. On older kernels there was a reader/writer
lock per table which was a performance bottleneck. In 2.6.30-rc, this
was converted to use RCU and the counters/rules which solved the performance
problems for do_table but made replacing rules much slower because of
the necessary RCU grace period.
This version uses a per-cpu set of spinlocks and counters to allow to
table processing to proceed without the cache thrashing of a global
reader lock and keeps the same performance for table updates.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/netfilter/x_tables.h | 73 |
1 files changed, 68 insertions, 5 deletions
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 7b1a652066c0..1b2e43502ef7 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h | |||
@@ -354,9 +354,6 @@ struct xt_table | |||
354 | /* What hooks you will enter on */ | 354 | /* What hooks you will enter on */ |
355 | unsigned int valid_hooks; | 355 | unsigned int valid_hooks; |
356 | 356 | ||
357 | /* Lock for the curtain */ | ||
358 | struct mutex lock; | ||
359 | |||
360 | /* Man behind the curtain... */ | 357 | /* Man behind the curtain... */ |
361 | struct xt_table_info *private; | 358 | struct xt_table_info *private; |
362 | 359 | ||
@@ -434,8 +431,74 @@ extern void xt_proto_fini(struct net *net, u_int8_t af); | |||
434 | 431 | ||
435 | extern struct xt_table_info *xt_alloc_table_info(unsigned int size); | 432 | extern struct xt_table_info *xt_alloc_table_info(unsigned int size); |
436 | extern void xt_free_table_info(struct xt_table_info *info); | 433 | extern void xt_free_table_info(struct xt_table_info *info); |
437 | extern void xt_table_entry_swap_rcu(struct xt_table_info *old, | 434 | |
438 | struct xt_table_info *new); | 435 | /* |
436 | * Per-CPU spinlock associated with per-cpu table entries, and | ||
437 | * with a counter for the "reading" side that allows a recursive | ||
438 | * reader to avoid taking the lock and deadlocking. | ||
439 | * | ||
440 | * "reading" is used by ip/arp/ip6 tables rule processing which runs per-cpu. | ||
441 | * It needs to ensure that the rules are not being changed while the packet | ||
442 | * is being processed. In some cases, the read lock will be acquired | ||
443 | * twice on the same CPU; this is okay because of the count. | ||
444 | * | ||
445 | * "writing" is used when reading counters. | ||
446 | * During replace any readers that are using the old tables have to complete | ||
447 | * before freeing the old table. This is handled by the write locking | ||
448 | * necessary for reading the counters. | ||
449 | */ | ||
450 | struct xt_info_lock { | ||
451 | spinlock_t lock; | ||
452 | unsigned char readers; | ||
453 | }; | ||
454 | DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks); | ||
455 | |||
456 | /* | ||
457 | * Note: we need to ensure that preemption is disabled before acquiring | ||
458 | * the per-cpu-variable, so we do it as a two step process rather than | ||
459 | * using "spin_lock_bh()". | ||
460 | * | ||
461 | * We _also_ need to disable bottom half processing before updating our | ||
462 | * nesting count, to make sure that the only kind of re-entrancy is this | ||
463 | * code being called by itself: since the count+lock is not an atomic | ||
464 | * operation, we can allow no races. | ||
465 | * | ||
466 | * _Only_ that special combination of being per-cpu and never getting | ||
467 | * re-entered asynchronously means that the count is safe. | ||
468 | */ | ||
469 | static inline void xt_info_rdlock_bh(void) | ||
470 | { | ||
471 | struct xt_info_lock *lock; | ||
472 | |||
473 | local_bh_disable(); | ||
474 | lock = &__get_cpu_var(xt_info_locks); | ||
475 | if (!lock->readers++) | ||
476 | spin_lock(&lock->lock); | ||
477 | } | ||
478 | |||
479 | static inline void xt_info_rdunlock_bh(void) | ||
480 | { | ||
481 | struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks); | ||
482 | |||
483 | if (!--lock->readers) | ||
484 | spin_unlock(&lock->lock); | ||
485 | local_bh_enable(); | ||
486 | } | ||
487 | |||
488 | /* | ||
489 | * The "writer" side needs to get exclusive access to the lock, | ||
490 | * regardless of readers. This must be called with bottom half | ||
491 | * processing (and thus also preemption) disabled. | ||
492 | */ | ||
493 | static inline void xt_info_wrlock(unsigned int cpu) | ||
494 | { | ||
495 | spin_lock(&per_cpu(xt_info_locks, cpu).lock); | ||
496 | } | ||
497 | |||
498 | static inline void xt_info_wrunlock(unsigned int cpu) | ||
499 | { | ||
500 | spin_unlock(&per_cpu(xt_info_locks, cpu).lock); | ||
501 | } | ||
439 | 502 | ||
440 | /* | 503 | /* |
441 | * This helper is performance critical and must be inlined | 504 | * This helper is performance critical and must be inlined |