aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorStephen Hemminger <shemminger@vyatta.com>2009-04-29 01:36:33 -0400
committerDavid S. Miller <davem@davemloft.net>2009-04-29 01:36:33 -0400
commit942e4a2bd680c606af0211e64eb216be2e19bf61 (patch)
treea83af49242d4a8d53aa0f3b5814eb17da72edc09 /net/ipv6
parentbf368e4e70cd4e0f880923c44e95a4273d725ab4 (diff)
netfilter: revised locking for x_tables
The x_tables are organized with a table structure and a per-cpu copies of the counters and rules. On older kernels there was a reader/writer lock per table which was a performance bottleneck. In 2.6.30-rc, this was converted to use RCU and the counters/rules which solved the performance problems for do_table but made replacing rules much slower because of the necessary RCU grace period. This version uses a per-cpu set of spinlocks and counters to allow to table processing to proceed without the cache thrashing of a global reader lock and keeps the same performance for table updates. Signed-off-by: Stephen Hemminger <shemminger@vyatta.com> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/netfilter/ip6_tables.c123
1 files changed, 37 insertions, 86 deletions
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 800ae8542471..219e165aea10 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -365,9 +365,9 @@ ip6t_do_table(struct sk_buff *skb,
365 365
366 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 366 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
367 367
368 rcu_read_lock_bh(); 368 xt_info_rdlock_bh();
369 private = rcu_dereference(table->private); 369 private = table->private;
370 table_base = rcu_dereference(private->entries[smp_processor_id()]); 370 table_base = private->entries[smp_processor_id()];
371 371
372 e = get_entry(table_base, private->hook_entry[hook]); 372 e = get_entry(table_base, private->hook_entry[hook]);
373 373
@@ -466,7 +466,7 @@ ip6t_do_table(struct sk_buff *skb,
466#ifdef CONFIG_NETFILTER_DEBUG 466#ifdef CONFIG_NETFILTER_DEBUG
467 ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; 467 ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
468#endif 468#endif
469 rcu_read_unlock_bh(); 469 xt_info_rdunlock_bh();
470 470
471#ifdef DEBUG_ALLOW_ALL 471#ifdef DEBUG_ALLOW_ALL
472 return NF_ACCEPT; 472 return NF_ACCEPT;
@@ -926,9 +926,12 @@ get_counters(const struct xt_table_info *t,
926 /* Instead of clearing (by a previous call to memset()) 926 /* Instead of clearing (by a previous call to memset())
927 * the counters and using adds, we set the counters 927 * the counters and using adds, we set the counters
928 * with data used by 'current' CPU 928 * with data used by 'current' CPU
929 * We dont care about preemption here. 929 *
930 * Bottom half has to be disabled to prevent deadlock
931 * if new softirq were to run and call ipt_do_table
930 */ 932 */
931 curcpu = raw_smp_processor_id(); 933 local_bh_disable();
934 curcpu = smp_processor_id();
932 935
933 i = 0; 936 i = 0;
934 IP6T_ENTRY_ITERATE(t->entries[curcpu], 937 IP6T_ENTRY_ITERATE(t->entries[curcpu],
@@ -941,72 +944,22 @@ get_counters(const struct xt_table_info *t,
941 if (cpu == curcpu) 944 if (cpu == curcpu)
942 continue; 945 continue;
943 i = 0; 946 i = 0;
947 xt_info_wrlock(cpu);
944 IP6T_ENTRY_ITERATE(t->entries[cpu], 948 IP6T_ENTRY_ITERATE(t->entries[cpu],
945 t->size, 949 t->size,
946 add_entry_to_counter, 950 add_entry_to_counter,
947 counters, 951 counters,
948 &i); 952 &i);
953 xt_info_wrunlock(cpu);
949 } 954 }
950}
951
952/* We're lazy, and add to the first CPU; overflow works its fey magic
953 * and everything is OK. */
954static int
955add_counter_to_entry(struct ip6t_entry *e,
956 const struct xt_counters addme[],
957 unsigned int *i)
958{
959 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
960
961 (*i)++;
962 return 0;
963}
964
965/* Take values from counters and add them back onto the current cpu */
966static void put_counters(struct xt_table_info *t,
967 const struct xt_counters counters[])
968{
969 unsigned int i, cpu;
970
971 local_bh_disable();
972 cpu = smp_processor_id();
973 i = 0;
974 IP6T_ENTRY_ITERATE(t->entries[cpu],
975 t->size,
976 add_counter_to_entry,
977 counters,
978 &i);
979 local_bh_enable(); 955 local_bh_enable();
980} 956}
981 957
982static inline int
983zero_entry_counter(struct ip6t_entry *e, void *arg)
984{
985 e->counters.bcnt = 0;
986 e->counters.pcnt = 0;
987 return 0;
988}
989
990static void
991clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
992{
993 unsigned int cpu;
994 const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
995
996 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
997 for_each_possible_cpu(cpu) {
998 memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
999 IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
1000 zero_entry_counter, NULL);
1001 }
1002}
1003
1004static struct xt_counters *alloc_counters(struct xt_table *table) 958static struct xt_counters *alloc_counters(struct xt_table *table)
1005{ 959{
1006 unsigned int countersize; 960 unsigned int countersize;
1007 struct xt_counters *counters; 961 struct xt_counters *counters;
1008 struct xt_table_info *private = table->private; 962 struct xt_table_info *private = table->private;
1009 struct xt_table_info *info;
1010 963
1011 /* We need atomic snapshot of counters: rest doesn't change 964 /* We need atomic snapshot of counters: rest doesn't change
1012 (other than comefrom, which userspace doesn't care 965 (other than comefrom, which userspace doesn't care
@@ -1015,30 +968,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
1015 counters = vmalloc_node(countersize, numa_node_id()); 968 counters = vmalloc_node(countersize, numa_node_id());
1016 969
1017 if (counters == NULL) 970 if (counters == NULL)
1018 goto nomem; 971 return ERR_PTR(-ENOMEM);
1019 972
1020 info = xt_alloc_table_info(private->size); 973 get_counters(private, counters);
1021 if (!info)
1022 goto free_counters;
1023
1024 clone_counters(info, private);
1025
1026 mutex_lock(&table->lock);
1027 xt_table_entry_swap_rcu(private, info);
1028 synchronize_net(); /* Wait until smoke has cleared */
1029
1030 get_counters(info, counters);
1031 put_counters(private, counters);
1032 mutex_unlock(&table->lock);
1033
1034 xt_free_table_info(info);
1035 974
1036 return counters; 975 return counters;
1037
1038 free_counters:
1039 vfree(counters);
1040 nomem:
1041 return ERR_PTR(-ENOMEM);
1042} 976}
1043 977
1044static int 978static int
@@ -1334,8 +1268,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1334 (newinfo->number <= oldinfo->initial_entries)) 1268 (newinfo->number <= oldinfo->initial_entries))
1335 module_put(t->me); 1269 module_put(t->me);
1336 1270
1337 /* Get the old counters. */ 1271 /* Get the old counters, and synchronize with replace */
1338 get_counters(oldinfo, counters); 1272 get_counters(oldinfo, counters);
1273
1339 /* Decrease module usage counts and free resource */ 1274 /* Decrease module usage counts and free resource */
1340 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; 1275 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1341 IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, 1276 IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
@@ -1405,11 +1340,24 @@ do_replace(struct net *net, void __user *user, unsigned int len)
1405 return ret; 1340 return ret;
1406} 1341}
1407 1342
1343/* We're lazy, and add to the first CPU; overflow works its fey magic
1344 * and everything is OK. */
1345static int
1346add_counter_to_entry(struct ip6t_entry *e,
1347 const struct xt_counters addme[],
1348 unsigned int *i)
1349{
1350 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1351
1352 (*i)++;
1353 return 0;
1354}
1355
1408static int 1356static int
1409do_add_counters(struct net *net, void __user *user, unsigned int len, 1357do_add_counters(struct net *net, void __user *user, unsigned int len,
1410 int compat) 1358 int compat)
1411{ 1359{
1412 unsigned int i; 1360 unsigned int i, curcpu;
1413 struct xt_counters_info tmp; 1361 struct xt_counters_info tmp;
1414 struct xt_counters *paddc; 1362 struct xt_counters *paddc;
1415 unsigned int num_counters; 1363 unsigned int num_counters;
@@ -1465,25 +1413,28 @@ do_add_counters(struct net *net, void __user *user, unsigned int len,
1465 goto free; 1413 goto free;
1466 } 1414 }
1467 1415
1468 mutex_lock(&t->lock); 1416
1417 local_bh_disable();
1469 private = t->private; 1418 private = t->private;
1470 if (private->number != num_counters) { 1419 if (private->number != num_counters) {
1471 ret = -EINVAL; 1420 ret = -EINVAL;
1472 goto unlock_up_free; 1421 goto unlock_up_free;
1473 } 1422 }
1474 1423
1475 preempt_disable();
1476 i = 0; 1424 i = 0;
1477 /* Choose the copy that is on our node */ 1425 /* Choose the copy that is on our node */
1478 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 1426 curcpu = smp_processor_id();
1427 xt_info_wrlock(curcpu);
1428 loc_cpu_entry = private->entries[curcpu];
1479 IP6T_ENTRY_ITERATE(loc_cpu_entry, 1429 IP6T_ENTRY_ITERATE(loc_cpu_entry,
1480 private->size, 1430 private->size,
1481 add_counter_to_entry, 1431 add_counter_to_entry,
1482 paddc, 1432 paddc,
1483 &i); 1433 &i);
1484 preempt_enable(); 1434 xt_info_wrunlock(curcpu);
1435
1485 unlock_up_free: 1436 unlock_up_free:
1486 mutex_unlock(&t->lock); 1437 local_bh_enable();
1487 xt_table_unlock(t); 1438 xt_table_unlock(t);
1488 module_put(t->me); 1439 module_put(t->me);
1489 free: 1440 free: