aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorStephen Hemminger <shemminger@vyatta.com>2009-02-20 04:35:32 -0500
committerPatrick McHardy <kaber@trash.net>2009-02-20 04:35:32 -0500
commit784544739a25c30637397ace5489eeb6e15d7d49 (patch)
treec48bbf30f3eb753858de9a03b74e81925cf39018 /net/ipv6
parent323dbf96382f057d035afce0237f08e18571ac1d (diff)
netfilter: iptables: lock free counters
The reader/writer lock in ip_tables is acquired in the critical path of processing packets and is one of the reasons just loading iptables can cause a 20% performance loss. The rwlock serves two functions: 1) it prevents changes to table state (xt_replace) while table is in use. This is now handled by doing rcu on the xt_table. When table is replaced, the new table(s) are put in and the old one table(s) are freed after RCU period. 2) it provides synchronization when accesing the counter values. This is now handled by swapping in new table_info entries for each cpu then summing the old values, and putting the result back onto one cpu. On a busy system it may cause sampling to occur at different times on each cpu, but no packet/byte counts are lost in the process. Signed-off-by: Stephen Hemminger <shemminger@vyatta.com> Sucessfully tested on my dual quad core machine too, but iptables only (no ipv6 here) BTW, my new "tbench 8" result is 2450 MB/s, (it was 2150 MB/s not so long ago) Acked-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/netfilter/ip6_tables.c119
1 files changed, 84 insertions, 35 deletions
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index d64594b6c061..34af7bb8df5f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -382,10 +382,12 @@ ip6t_do_table(struct sk_buff *skb,
382 mtpar.family = tgpar.family = NFPROTO_IPV6; 382 mtpar.family = tgpar.family = NFPROTO_IPV6;
383 tgpar.hooknum = hook; 383 tgpar.hooknum = hook;
384 384
385 read_lock_bh(&table->lock);
386 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 385 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
387 private = table->private; 386
388 table_base = (void *)private->entries[smp_processor_id()]; 387 rcu_read_lock();
388 private = rcu_dereference(table->private);
389 table_base = rcu_dereference(private->entries[smp_processor_id()]);
390
389 e = get_entry(table_base, private->hook_entry[hook]); 391 e = get_entry(table_base, private->hook_entry[hook]);
390 392
391 /* For return from builtin chain */ 393 /* For return from builtin chain */
@@ -483,7 +485,7 @@ ip6t_do_table(struct sk_buff *skb,
483#ifdef CONFIG_NETFILTER_DEBUG 485#ifdef CONFIG_NETFILTER_DEBUG
484 ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; 486 ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
485#endif 487#endif
486 read_unlock_bh(&table->lock); 488 rcu_read_unlock();
487 489
488#ifdef DEBUG_ALLOW_ALL 490#ifdef DEBUG_ALLOW_ALL
489 return NF_ACCEPT; 491 return NF_ACCEPT;
@@ -964,11 +966,64 @@ get_counters(const struct xt_table_info *t,
964 } 966 }
965} 967}
966 968
969/* We're lazy, and add to the first CPU; overflow works its fey magic
970 * and everything is OK. */
971static int
972add_counter_to_entry(struct ip6t_entry *e,
973 const struct xt_counters addme[],
974 unsigned int *i)
975{
976 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
977
978 (*i)++;
979 return 0;
980}
981
982/* Take values from counters and add them back onto the current cpu */
983static void put_counters(struct xt_table_info *t,
984 const struct xt_counters counters[])
985{
986 unsigned int i, cpu;
987
988 local_bh_disable();
989 cpu = smp_processor_id();
990 i = 0;
991 IP6T_ENTRY_ITERATE(t->entries[cpu],
992 t->size,
993 add_counter_to_entry,
994 counters,
995 &i);
996 local_bh_enable();
997}
998
999static inline int
1000zero_entry_counter(struct ip6t_entry *e, void *arg)
1001{
1002 e->counters.bcnt = 0;
1003 e->counters.pcnt = 0;
1004 return 0;
1005}
1006
1007static void
1008clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
1009{
1010 unsigned int cpu;
1011 const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
1012
1013 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1014 for_each_possible_cpu(cpu) {
1015 memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
1016 IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
1017 zero_entry_counter, NULL);
1018 }
1019}
1020
967static struct xt_counters *alloc_counters(struct xt_table *table) 1021static struct xt_counters *alloc_counters(struct xt_table *table)
968{ 1022{
969 unsigned int countersize; 1023 unsigned int countersize;
970 struct xt_counters *counters; 1024 struct xt_counters *counters;
971 const struct xt_table_info *private = table->private; 1025 struct xt_table_info *private = table->private;
1026 struct xt_table_info *info;
972 1027
973 /* We need atomic snapshot of counters: rest doesn't change 1028 /* We need atomic snapshot of counters: rest doesn't change
974 (other than comefrom, which userspace doesn't care 1029 (other than comefrom, which userspace doesn't care
@@ -977,14 +1032,28 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
977 counters = vmalloc_node(countersize, numa_node_id()); 1032 counters = vmalloc_node(countersize, numa_node_id());
978 1033
979 if (counters == NULL) 1034 if (counters == NULL)
980 return ERR_PTR(-ENOMEM); 1035 goto nomem;
1036
1037 info = xt_alloc_table_info(private->size);
1038 if (!info)
1039 goto free_counters;
1040
1041 clone_counters(info, private);
1042
1043 mutex_lock(&table->lock);
1044 xt_table_entry_swap_rcu(private, info);
1045 synchronize_net(); /* Wait until smoke has cleared */
1046
1047 get_counters(info, counters);
1048 put_counters(private, counters);
1049 mutex_unlock(&table->lock);
981 1050
982 /* First, sum counters... */ 1051 xt_free_table_info(info);
983 write_lock_bh(&table->lock);
984 get_counters(private, counters);
985 write_unlock_bh(&table->lock);
986 1052
987 return counters; 1053 free_counters:
1054 vfree(counters);
1055 nomem:
1056 return ERR_PTR(-ENOMEM);
988} 1057}
989 1058
990static int 1059static int
@@ -1351,28 +1420,6 @@ do_replace(struct net *net, void __user *user, unsigned int len)
1351 return ret; 1420 return ret;
1352} 1421}
1353 1422
1354/* We're lazy, and add to the first CPU; overflow works its fey magic
1355 * and everything is OK. */
1356static inline int
1357add_counter_to_entry(struct ip6t_entry *e,
1358 const struct xt_counters addme[],
1359 unsigned int *i)
1360{
1361#if 0
1362 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1363 *i,
1364 (long unsigned int)e->counters.pcnt,
1365 (long unsigned int)e->counters.bcnt,
1366 (long unsigned int)addme[*i].pcnt,
1367 (long unsigned int)addme[*i].bcnt);
1368#endif
1369
1370 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1371
1372 (*i)++;
1373 return 0;
1374}
1375
1376static int 1423static int
1377do_add_counters(struct net *net, void __user *user, unsigned int len, 1424do_add_counters(struct net *net, void __user *user, unsigned int len,
1378 int compat) 1425 int compat)
@@ -1433,13 +1480,14 @@ do_add_counters(struct net *net, void __user *user, unsigned int len,
1433 goto free; 1480 goto free;
1434 } 1481 }
1435 1482
1436 write_lock_bh(&t->lock); 1483 mutex_lock(&t->lock);
1437 private = t->private; 1484 private = t->private;
1438 if (private->number != num_counters) { 1485 if (private->number != num_counters) {
1439 ret = -EINVAL; 1486 ret = -EINVAL;
1440 goto unlock_up_free; 1487 goto unlock_up_free;
1441 } 1488 }
1442 1489
1490 preempt_disable();
1443 i = 0; 1491 i = 0;
1444 /* Choose the copy that is on our node */ 1492 /* Choose the copy that is on our node */
1445 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 1493 loc_cpu_entry = private->entries[raw_smp_processor_id()];
@@ -1448,8 +1496,9 @@ do_add_counters(struct net *net, void __user *user, unsigned int len,
1448 add_counter_to_entry, 1496 add_counter_to_entry,
1449 paddc, 1497 paddc,
1450 &i); 1498 &i);
1499 preempt_enable();
1451 unlock_up_free: 1500 unlock_up_free:
1452 write_unlock_bh(&t->lock); 1501 mutex_unlock(&t->lock);
1453 xt_table_unlock(t); 1502 xt_table_unlock(t);
1454 module_put(t->me); 1503 module_put(t->me);
1455 free: 1504 free: