aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorStephen Hemminger <shemminger@vyatta.com>2009-04-29 01:36:33 -0400
committerDavid S. Miller <davem@davemloft.net>2009-04-29 01:36:33 -0400
commit942e4a2bd680c606af0211e64eb216be2e19bf61 (patch)
treea83af49242d4a8d53aa0f3b5814eb17da72edc09 /include/linux
parentbf368e4e70cd4e0f880923c44e95a4273d725ab4 (diff)
netfilter: revised locking for x_tables
The x_tables are organized with a table structure and a per-cpu copies of the counters and rules. On older kernels there was a reader/writer lock per table which was a performance bottleneck. In 2.6.30-rc, this was converted to use RCU and the counters/rules which solved the performance problems for do_table but made replacing rules much slower because of the necessary RCU grace period. This version uses a per-cpu set of spinlocks and counters to allow to table processing to proceed without the cache thrashing of a global reader lock and keeps the same performance for table updates. Signed-off-by: Stephen Hemminger <shemminger@vyatta.com> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/netfilter/x_tables.h73
1 files changed, 68 insertions, 5 deletions
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 7b1a652066c0..1b2e43502ef7 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -354,9 +354,6 @@ struct xt_table
354 /* What hooks you will enter on */ 354 /* What hooks you will enter on */
355 unsigned int valid_hooks; 355 unsigned int valid_hooks;
356 356
357 /* Lock for the curtain */
358 struct mutex lock;
359
360 /* Man behind the curtain... */ 357 /* Man behind the curtain... */
361 struct xt_table_info *private; 358 struct xt_table_info *private;
362 359
@@ -434,8 +431,74 @@ extern void xt_proto_fini(struct net *net, u_int8_t af);
434 431
435extern struct xt_table_info *xt_alloc_table_info(unsigned int size); 432extern struct xt_table_info *xt_alloc_table_info(unsigned int size);
436extern void xt_free_table_info(struct xt_table_info *info); 433extern void xt_free_table_info(struct xt_table_info *info);
437extern void xt_table_entry_swap_rcu(struct xt_table_info *old, 434
438 struct xt_table_info *new); 435/*
436 * Per-CPU spinlock associated with per-cpu table entries, and
437 * with a counter for the "reading" side that allows a recursive
438 * reader to avoid taking the lock and deadlocking.
439 *
440 * "reading" is used by ip/arp/ip6 tables rule processing which runs per-cpu.
441 * It needs to ensure that the rules are not being changed while the packet
442 * is being processed. In some cases, the read lock will be acquired
443 * twice on the same CPU; this is okay because of the count.
444 *
445 * "writing" is used when reading counters.
446 * During replace any readers that are using the old tables have to complete
447 * before freeing the old table. This is handled by the write locking
448 * necessary for reading the counters.
449 */
450struct xt_info_lock {
451 spinlock_t lock;
452 unsigned char readers;
453};
454DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks);
455
456/*
457 * Note: we need to ensure that preemption is disabled before acquiring
458 * the per-cpu-variable, so we do it as a two step process rather than
459 * using "spin_lock_bh()".
460 *
461 * We _also_ need to disable bottom half processing before updating our
462 * nesting count, to make sure that the only kind of re-entrancy is this
463 * code being called by itself: since the count+lock is not an atomic
464 * operation, we can allow no races.
465 *
466 * _Only_ that special combination of being per-cpu and never getting
467 * re-entered asynchronously means that the count is safe.
468 */
469static inline void xt_info_rdlock_bh(void)
470{
471 struct xt_info_lock *lock;
472
473 local_bh_disable();
474 lock = &__get_cpu_var(xt_info_locks);
475 if (!lock->readers++)
476 spin_lock(&lock->lock);
477}
478
479static inline void xt_info_rdunlock_bh(void)
480{
481 struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks);
482
483 if (!--lock->readers)
484 spin_unlock(&lock->lock);
485 local_bh_enable();
486}
487
488/*
489 * The "writer" side needs to get exclusive access to the lock,
490 * regardless of readers. This must be called with bottom half
491 * processing (and thus also preemption) disabled.
492 */
493static inline void xt_info_wrlock(unsigned int cpu)
494{
495 spin_lock(&per_cpu(xt_info_locks, cpu).lock);
496}
497
498static inline void xt_info_wrunlock(unsigned int cpu)
499{
500 spin_unlock(&per_cpu(xt_info_locks, cpu).lock);
501}
439 502
440/* 503/*
441 * This helper is performance critical and must be inlined 504 * This helper is performance critical and must be inlined