aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Hemminger <shemminger@vyatta.com>2009-02-20 04:35:32 -0500
committerPatrick McHardy <kaber@trash.net>2009-02-20 04:35:32 -0500
commit784544739a25c30637397ace5489eeb6e15d7d49 (patch)
treec48bbf30f3eb753858de9a03b74e81925cf39018
parent323dbf96382f057d035afce0237f08e18571ac1d (diff)
netfilter: iptables: lock free counters
The reader/writer lock in ip_tables is acquired in the critical path of processing packets and is one of the reasons just loading iptables can cause a 20% performance loss. The rwlock serves two functions: 1) it prevents changes to table state (xt_replace) while table is in use. This is now handled by doing rcu on the xt_table. When table is replaced, the new table(s) are put in and the old one table(s) are freed after RCU period. 2) it provides synchronization when accesing the counter values. This is now handled by swapping in new table_info entries for each cpu then summing the old values, and putting the result back onto one cpu. On a busy system it may cause sampling to occur at different times on each cpu, but no packet/byte counts are lost in the process. Signed-off-by: Stephen Hemminger <shemminger@vyatta.com> Sucessfully tested on my dual quad core machine too, but iptables only (no ipv6 here) BTW, my new "tbench 8" result is 2450 MB/s, (it was 2150 MB/s not so long ago) Acked-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: Patrick McHardy <kaber@trash.net>
-rw-r--r--include/linux/netfilter/x_tables.h6
-rw-r--r--net/ipv4/netfilter/arp_tables.c115
-rw-r--r--net/ipv4/netfilter/ip_tables.c120
-rw-r--r--net/ipv6/netfilter/ip6_tables.c119
-rw-r--r--net/netfilter/x_tables.c26
5 files changed, 284 insertions, 102 deletions
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 9fac88fc0e72..e8e08d036752 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -353,7 +353,7 @@ struct xt_table
353 unsigned int valid_hooks; 353 unsigned int valid_hooks;
354 354
355 /* Lock for the curtain */ 355 /* Lock for the curtain */
356 rwlock_t lock; 356 struct mutex lock;
357 357
358 /* Man behind the curtain... */ 358 /* Man behind the curtain... */
359 struct xt_table_info *private; 359 struct xt_table_info *private;
@@ -385,7 +385,7 @@ struct xt_table_info
385 385
386 /* ipt_entry tables: one per CPU */ 386 /* ipt_entry tables: one per CPU */
387 /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */ 387 /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
388 char *entries[1]; 388 void *entries[1];
389}; 389};
390 390
391#define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \ 391#define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \
@@ -432,6 +432,8 @@ extern void xt_proto_fini(struct net *net, u_int8_t af);
432 432
433extern struct xt_table_info *xt_alloc_table_info(unsigned int size); 433extern struct xt_table_info *xt_alloc_table_info(unsigned int size);
434extern void xt_free_table_info(struct xt_table_info *info); 434extern void xt_free_table_info(struct xt_table_info *info);
435extern void xt_table_entry_swap_rcu(struct xt_table_info *old,
436 struct xt_table_info *new);
435 437
436#ifdef CONFIG_COMPAT 438#ifdef CONFIG_COMPAT
437#include <net/compat.h> 439#include <net/compat.h>
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index b5db46342614..64a7c6ce0b98 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -261,9 +261,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
261 indev = in ? in->name : nulldevname; 261 indev = in ? in->name : nulldevname;
262 outdev = out ? out->name : nulldevname; 262 outdev = out ? out->name : nulldevname;
263 263
264 read_lock_bh(&table->lock); 264 rcu_read_lock();
265 private = table->private; 265 private = rcu_dereference(table->private);
266 table_base = (void *)private->entries[smp_processor_id()]; 266 table_base = rcu_dereference(private->entries[smp_processor_id()]);
267
267 e = get_entry(table_base, private->hook_entry[hook]); 268 e = get_entry(table_base, private->hook_entry[hook]);
268 back = get_entry(table_base, private->underflow[hook]); 269 back = get_entry(table_base, private->underflow[hook]);
269 270
@@ -335,7 +336,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
335 e = (void *)e + e->next_offset; 336 e = (void *)e + e->next_offset;
336 } 337 }
337 } while (!hotdrop); 338 } while (!hotdrop);
338 read_unlock_bh(&table->lock); 339
340 rcu_read_unlock();
339 341
340 if (hotdrop) 342 if (hotdrop)
341 return NF_DROP; 343 return NF_DROP;
@@ -738,11 +740,65 @@ static void get_counters(const struct xt_table_info *t,
738 } 740 }
739} 741}
740 742
741static inline struct xt_counters *alloc_counters(struct xt_table *table) 743
744/* We're lazy, and add to the first CPU; overflow works its fey magic
745 * and everything is OK. */
746static int
747add_counter_to_entry(struct arpt_entry *e,
748 const struct xt_counters addme[],
749 unsigned int *i)
750{
751 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
752
753 (*i)++;
754 return 0;
755}
756
757/* Take values from counters and add them back onto the current cpu */
758static void put_counters(struct xt_table_info *t,
759 const struct xt_counters counters[])
760{
761 unsigned int i, cpu;
762
763 local_bh_disable();
764 cpu = smp_processor_id();
765 i = 0;
766 ARPT_ENTRY_ITERATE(t->entries[cpu],
767 t->size,
768 add_counter_to_entry,
769 counters,
770 &i);
771 local_bh_enable();
772}
773
774static inline int
775zero_entry_counter(struct arpt_entry *e, void *arg)
776{
777 e->counters.bcnt = 0;
778 e->counters.pcnt = 0;
779 return 0;
780}
781
782static void
783clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
784{
785 unsigned int cpu;
786 const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
787
788 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
789 for_each_possible_cpu(cpu) {
790 memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
791 ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
792 zero_entry_counter, NULL);
793 }
794}
795
796static struct xt_counters *alloc_counters(struct xt_table *table)
742{ 797{
743 unsigned int countersize; 798 unsigned int countersize;
744 struct xt_counters *counters; 799 struct xt_counters *counters;
745 const struct xt_table_info *private = table->private; 800 struct xt_table_info *private = table->private;
801 struct xt_table_info *info;
746 802
747 /* We need atomic snapshot of counters: rest doesn't change 803 /* We need atomic snapshot of counters: rest doesn't change
748 * (other than comefrom, which userspace doesn't care 804 * (other than comefrom, which userspace doesn't care
@@ -752,14 +808,30 @@ static inline struct xt_counters *alloc_counters(struct xt_table *table)
752 counters = vmalloc_node(countersize, numa_node_id()); 808 counters = vmalloc_node(countersize, numa_node_id());
753 809
754 if (counters == NULL) 810 if (counters == NULL)
755 return ERR_PTR(-ENOMEM); 811 goto nomem;
812
813 info = xt_alloc_table_info(private->size);
814 if (!info)
815 goto free_counters;
756 816
757 /* First, sum counters... */ 817 clone_counters(info, private);
758 write_lock_bh(&table->lock); 818
759 get_counters(private, counters); 819 mutex_lock(&table->lock);
760 write_unlock_bh(&table->lock); 820 xt_table_entry_swap_rcu(private, info);
821 synchronize_net(); /* Wait until smoke has cleared */
822
823 get_counters(info, counters);
824 put_counters(private, counters);
825 mutex_unlock(&table->lock);
826
827 xt_free_table_info(info);
761 828
762 return counters; 829 return counters;
830
831 free_counters:
832 vfree(counters);
833 nomem:
834 return ERR_PTR(-ENOMEM);
763} 835}
764 836
765static int copy_entries_to_user(unsigned int total_size, 837static int copy_entries_to_user(unsigned int total_size,
@@ -1099,20 +1171,6 @@ static int do_replace(struct net *net, void __user *user, unsigned int len)
1099 return ret; 1171 return ret;
1100} 1172}
1101 1173
1102/* We're lazy, and add to the first CPU; overflow works its fey magic
1103 * and everything is OK.
1104 */
1105static inline int add_counter_to_entry(struct arpt_entry *e,
1106 const struct xt_counters addme[],
1107 unsigned int *i)
1108{
1109
1110 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1111
1112 (*i)++;
1113 return 0;
1114}
1115
1116static int do_add_counters(struct net *net, void __user *user, unsigned int len, 1174static int do_add_counters(struct net *net, void __user *user, unsigned int len,
1117 int compat) 1175 int compat)
1118{ 1176{
@@ -1172,13 +1230,14 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
1172 goto free; 1230 goto free;
1173 } 1231 }
1174 1232
1175 write_lock_bh(&t->lock); 1233 mutex_lock(&t->lock);
1176 private = t->private; 1234 private = t->private;
1177 if (private->number != num_counters) { 1235 if (private->number != num_counters) {
1178 ret = -EINVAL; 1236 ret = -EINVAL;
1179 goto unlock_up_free; 1237 goto unlock_up_free;
1180 } 1238 }
1181 1239
1240 preempt_disable();
1182 i = 0; 1241 i = 0;
1183 /* Choose the copy that is on our node */ 1242 /* Choose the copy that is on our node */
1184 loc_cpu_entry = private->entries[smp_processor_id()]; 1243 loc_cpu_entry = private->entries[smp_processor_id()];
@@ -1187,8 +1246,10 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
1187 add_counter_to_entry, 1246 add_counter_to_entry,
1188 paddc, 1247 paddc,
1189 &i); 1248 &i);
1249 preempt_enable();
1190 unlock_up_free: 1250 unlock_up_free:
1191 write_unlock_bh(&t->lock); 1251 mutex_unlock(&t->lock);
1252
1192 xt_table_unlock(t); 1253 xt_table_unlock(t);
1193 module_put(t->me); 1254 module_put(t->me);
1194 free: 1255 free:
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index ef8b6ca068b2..08cde5bd70a5 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -347,10 +347,12 @@ ipt_do_table(struct sk_buff *skb,
347 mtpar.family = tgpar.family = NFPROTO_IPV4; 347 mtpar.family = tgpar.family = NFPROTO_IPV4;
348 tgpar.hooknum = hook; 348 tgpar.hooknum = hook;
349 349
350 read_lock_bh(&table->lock);
351 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 350 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
352 private = table->private; 351
353 table_base = (void *)private->entries[smp_processor_id()]; 352 rcu_read_lock();
353 private = rcu_dereference(table->private);
354 table_base = rcu_dereference(private->entries[smp_processor_id()]);
355
354 e = get_entry(table_base, private->hook_entry[hook]); 356 e = get_entry(table_base, private->hook_entry[hook]);
355 357
356 /* For return from builtin chain */ 358 /* For return from builtin chain */
@@ -445,7 +447,7 @@ ipt_do_table(struct sk_buff *skb,
445 } 447 }
446 } while (!hotdrop); 448 } while (!hotdrop);
447 449
448 read_unlock_bh(&table->lock); 450 rcu_read_unlock();
449 451
450#ifdef DEBUG_ALLOW_ALL 452#ifdef DEBUG_ALLOW_ALL
451 return NF_ACCEPT; 453 return NF_ACCEPT;
@@ -924,13 +926,68 @@ get_counters(const struct xt_table_info *t,
924 counters, 926 counters,
925 &i); 927 &i);
926 } 928 }
929
930}
931
932/* We're lazy, and add to the first CPU; overflow works its fey magic
933 * and everything is OK. */
934static int
935add_counter_to_entry(struct ipt_entry *e,
936 const struct xt_counters addme[],
937 unsigned int *i)
938{
939 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
940
941 (*i)++;
942 return 0;
943}
944
945/* Take values from counters and add them back onto the current cpu */
946static void put_counters(struct xt_table_info *t,
947 const struct xt_counters counters[])
948{
949 unsigned int i, cpu;
950
951 local_bh_disable();
952 cpu = smp_processor_id();
953 i = 0;
954 IPT_ENTRY_ITERATE(t->entries[cpu],
955 t->size,
956 add_counter_to_entry,
957 counters,
958 &i);
959 local_bh_enable();
960}
961
962
963static inline int
964zero_entry_counter(struct ipt_entry *e, void *arg)
965{
966 e->counters.bcnt = 0;
967 e->counters.pcnt = 0;
968 return 0;
969}
970
971static void
972clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
973{
974 unsigned int cpu;
975 const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
976
977 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
978 for_each_possible_cpu(cpu) {
979 memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
980 IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
981 zero_entry_counter, NULL);
982 }
927} 983}
928 984
929static struct xt_counters * alloc_counters(struct xt_table *table) 985static struct xt_counters * alloc_counters(struct xt_table *table)
930{ 986{
931 unsigned int countersize; 987 unsigned int countersize;
932 struct xt_counters *counters; 988 struct xt_counters *counters;
933 const struct xt_table_info *private = table->private; 989 struct xt_table_info *private = table->private;
990 struct xt_table_info *info;
934 991
935 /* We need atomic snapshot of counters: rest doesn't change 992 /* We need atomic snapshot of counters: rest doesn't change
936 (other than comefrom, which userspace doesn't care 993 (other than comefrom, which userspace doesn't care
@@ -939,14 +996,30 @@ static struct xt_counters * alloc_counters(struct xt_table *table)
939 counters = vmalloc_node(countersize, numa_node_id()); 996 counters = vmalloc_node(countersize, numa_node_id());
940 997
941 if (counters == NULL) 998 if (counters == NULL)
942 return ERR_PTR(-ENOMEM); 999 goto nomem;
943 1000
944 /* First, sum counters... */ 1001 info = xt_alloc_table_info(private->size);
945 write_lock_bh(&table->lock); 1002 if (!info)
946 get_counters(private, counters); 1003 goto free_counters;
947 write_unlock_bh(&table->lock); 1004
1005 clone_counters(info, private);
1006
1007 mutex_lock(&table->lock);
1008 xt_table_entry_swap_rcu(private, info);
1009 synchronize_net(); /* Wait until smoke has cleared */
1010
1011 get_counters(info, counters);
1012 put_counters(private, counters);
1013 mutex_unlock(&table->lock);
1014
1015 xt_free_table_info(info);
948 1016
949 return counters; 1017 return counters;
1018
1019 free_counters:
1020 vfree(counters);
1021 nomem:
1022 return ERR_PTR(-ENOMEM);
950} 1023}
951 1024
952static int 1025static int
@@ -1312,27 +1385,6 @@ do_replace(struct net *net, void __user *user, unsigned int len)
1312 return ret; 1385 return ret;
1313} 1386}
1314 1387
1315/* We're lazy, and add to the first CPU; overflow works its fey magic
1316 * and everything is OK. */
1317static int
1318add_counter_to_entry(struct ipt_entry *e,
1319 const struct xt_counters addme[],
1320 unsigned int *i)
1321{
1322#if 0
1323 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1324 *i,
1325 (long unsigned int)e->counters.pcnt,
1326 (long unsigned int)e->counters.bcnt,
1327 (long unsigned int)addme[*i].pcnt,
1328 (long unsigned int)addme[*i].bcnt);
1329#endif
1330
1331 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1332
1333 (*i)++;
1334 return 0;
1335}
1336 1388
1337static int 1389static int
1338do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) 1390do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
@@ -1393,13 +1445,14 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
1393 goto free; 1445 goto free;
1394 } 1446 }
1395 1447
1396 write_lock_bh(&t->lock); 1448 mutex_lock(&t->lock);
1397 private = t->private; 1449 private = t->private;
1398 if (private->number != num_counters) { 1450 if (private->number != num_counters) {
1399 ret = -EINVAL; 1451 ret = -EINVAL;
1400 goto unlock_up_free; 1452 goto unlock_up_free;
1401 } 1453 }
1402 1454
1455 preempt_disable();
1403 i = 0; 1456 i = 0;
1404 /* Choose the copy that is on our node */ 1457 /* Choose the copy that is on our node */
1405 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 1458 loc_cpu_entry = private->entries[raw_smp_processor_id()];
@@ -1408,8 +1461,9 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
1408 add_counter_to_entry, 1461 add_counter_to_entry,
1409 paddc, 1462 paddc,
1410 &i); 1463 &i);
1464 preempt_enable();
1411 unlock_up_free: 1465 unlock_up_free:
1412 write_unlock_bh(&t->lock); 1466 mutex_unlock(&t->lock);
1413 xt_table_unlock(t); 1467 xt_table_unlock(t);
1414 module_put(t->me); 1468 module_put(t->me);
1415 free: 1469 free:
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index d64594b6c061..34af7bb8df5f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -382,10 +382,12 @@ ip6t_do_table(struct sk_buff *skb,
382 mtpar.family = tgpar.family = NFPROTO_IPV6; 382 mtpar.family = tgpar.family = NFPROTO_IPV6;
383 tgpar.hooknum = hook; 383 tgpar.hooknum = hook;
384 384
385 read_lock_bh(&table->lock);
386 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 385 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
387 private = table->private; 386
388 table_base = (void *)private->entries[smp_processor_id()]; 387 rcu_read_lock();
388 private = rcu_dereference(table->private);
389 table_base = rcu_dereference(private->entries[smp_processor_id()]);
390
389 e = get_entry(table_base, private->hook_entry[hook]); 391 e = get_entry(table_base, private->hook_entry[hook]);
390 392
391 /* For return from builtin chain */ 393 /* For return from builtin chain */
@@ -483,7 +485,7 @@ ip6t_do_table(struct sk_buff *skb,
483#ifdef CONFIG_NETFILTER_DEBUG 485#ifdef CONFIG_NETFILTER_DEBUG
484 ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; 486 ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
485#endif 487#endif
486 read_unlock_bh(&table->lock); 488 rcu_read_unlock();
487 489
488#ifdef DEBUG_ALLOW_ALL 490#ifdef DEBUG_ALLOW_ALL
489 return NF_ACCEPT; 491 return NF_ACCEPT;
@@ -964,11 +966,64 @@ get_counters(const struct xt_table_info *t,
964 } 966 }
965} 967}
966 968
969/* We're lazy, and add to the first CPU; overflow works its fey magic
970 * and everything is OK. */
971static int
972add_counter_to_entry(struct ip6t_entry *e,
973 const struct xt_counters addme[],
974 unsigned int *i)
975{
976 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
977
978 (*i)++;
979 return 0;
980}
981
982/* Take values from counters and add them back onto the current cpu */
983static void put_counters(struct xt_table_info *t,
984 const struct xt_counters counters[])
985{
986 unsigned int i, cpu;
987
988 local_bh_disable();
989 cpu = smp_processor_id();
990 i = 0;
991 IP6T_ENTRY_ITERATE(t->entries[cpu],
992 t->size,
993 add_counter_to_entry,
994 counters,
995 &i);
996 local_bh_enable();
997}
998
999static inline int
1000zero_entry_counter(struct ip6t_entry *e, void *arg)
1001{
1002 e->counters.bcnt = 0;
1003 e->counters.pcnt = 0;
1004 return 0;
1005}
1006
1007static void
1008clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
1009{
1010 unsigned int cpu;
1011 const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
1012
1013 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1014 for_each_possible_cpu(cpu) {
1015 memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
1016 IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
1017 zero_entry_counter, NULL);
1018 }
1019}
1020
967static struct xt_counters *alloc_counters(struct xt_table *table) 1021static struct xt_counters *alloc_counters(struct xt_table *table)
968{ 1022{
969 unsigned int countersize; 1023 unsigned int countersize;
970 struct xt_counters *counters; 1024 struct xt_counters *counters;
971 const struct xt_table_info *private = table->private; 1025 struct xt_table_info *private = table->private;
1026 struct xt_table_info *info;
972 1027
973 /* We need atomic snapshot of counters: rest doesn't change 1028 /* We need atomic snapshot of counters: rest doesn't change
974 (other than comefrom, which userspace doesn't care 1029 (other than comefrom, which userspace doesn't care
@@ -977,14 +1032,28 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
977 counters = vmalloc_node(countersize, numa_node_id()); 1032 counters = vmalloc_node(countersize, numa_node_id());
978 1033
979 if (counters == NULL) 1034 if (counters == NULL)
980 return ERR_PTR(-ENOMEM); 1035 goto nomem;
1036
1037 info = xt_alloc_table_info(private->size);
1038 if (!info)
1039 goto free_counters;
1040
1041 clone_counters(info, private);
1042
1043 mutex_lock(&table->lock);
1044 xt_table_entry_swap_rcu(private, info);
1045 synchronize_net(); /* Wait until smoke has cleared */
1046
1047 get_counters(info, counters);
1048 put_counters(private, counters);
1049 mutex_unlock(&table->lock);
981 1050
982 /* First, sum counters... */ 1051 xt_free_table_info(info);
983 write_lock_bh(&table->lock);
984 get_counters(private, counters);
985 write_unlock_bh(&table->lock);
986 1052
987 return counters; 1053 free_counters:
1054 vfree(counters);
1055 nomem:
1056 return ERR_PTR(-ENOMEM);
988} 1057}
989 1058
990static int 1059static int
@@ -1351,28 +1420,6 @@ do_replace(struct net *net, void __user *user, unsigned int len)
1351 return ret; 1420 return ret;
1352} 1421}
1353 1422
1354/* We're lazy, and add to the first CPU; overflow works its fey magic
1355 * and everything is OK. */
1356static inline int
1357add_counter_to_entry(struct ip6t_entry *e,
1358 const struct xt_counters addme[],
1359 unsigned int *i)
1360{
1361#if 0
1362 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1363 *i,
1364 (long unsigned int)e->counters.pcnt,
1365 (long unsigned int)e->counters.bcnt,
1366 (long unsigned int)addme[*i].pcnt,
1367 (long unsigned int)addme[*i].bcnt);
1368#endif
1369
1370 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1371
1372 (*i)++;
1373 return 0;
1374}
1375
1376static int 1423static int
1377do_add_counters(struct net *net, void __user *user, unsigned int len, 1424do_add_counters(struct net *net, void __user *user, unsigned int len,
1378 int compat) 1425 int compat)
@@ -1433,13 +1480,14 @@ do_add_counters(struct net *net, void __user *user, unsigned int len,
1433 goto free; 1480 goto free;
1434 } 1481 }
1435 1482
1436 write_lock_bh(&t->lock); 1483 mutex_lock(&t->lock);
1437 private = t->private; 1484 private = t->private;
1438 if (private->number != num_counters) { 1485 if (private->number != num_counters) {
1439 ret = -EINVAL; 1486 ret = -EINVAL;
1440 goto unlock_up_free; 1487 goto unlock_up_free;
1441 } 1488 }
1442 1489
1490 preempt_disable();
1443 i = 0; 1491 i = 0;
1444 /* Choose the copy that is on our node */ 1492 /* Choose the copy that is on our node */
1445 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 1493 loc_cpu_entry = private->entries[raw_smp_processor_id()];
@@ -1448,8 +1496,9 @@ do_add_counters(struct net *net, void __user *user, unsigned int len,
1448 add_counter_to_entry, 1496 add_counter_to_entry,
1449 paddc, 1497 paddc,
1450 &i); 1498 &i);
1499 preempt_enable();
1451 unlock_up_free: 1500 unlock_up_free:
1452 write_unlock_bh(&t->lock); 1501 mutex_unlock(&t->lock);
1453 xt_table_unlock(t); 1502 xt_table_unlock(t);
1454 module_put(t->me); 1503 module_put(t->me);
1455 free: 1504 free:
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index bfbf521f6ea5..bfcac92d5563 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -625,6 +625,20 @@ void xt_free_table_info(struct xt_table_info *info)
625} 625}
626EXPORT_SYMBOL(xt_free_table_info); 626EXPORT_SYMBOL(xt_free_table_info);
627 627
628void xt_table_entry_swap_rcu(struct xt_table_info *oldinfo,
629 struct xt_table_info *newinfo)
630{
631 unsigned int cpu;
632
633 for_each_possible_cpu(cpu) {
634 void *p = oldinfo->entries[cpu];
635 rcu_assign_pointer(oldinfo->entries[cpu], newinfo->entries[cpu]);
636 newinfo->entries[cpu] = p;
637 }
638
639}
640EXPORT_SYMBOL_GPL(xt_table_entry_swap_rcu);
641
628/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ 642/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
629struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, 643struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
630 const char *name) 644 const char *name)
@@ -671,21 +685,22 @@ xt_replace_table(struct xt_table *table,
671 struct xt_table_info *oldinfo, *private; 685 struct xt_table_info *oldinfo, *private;
672 686
673 /* Do the substitution. */ 687 /* Do the substitution. */
674 write_lock_bh(&table->lock); 688 mutex_lock(&table->lock);
675 private = table->private; 689 private = table->private;
676 /* Check inside lock: is the old number correct? */ 690 /* Check inside lock: is the old number correct? */
677 if (num_counters != private->number) { 691 if (num_counters != private->number) {
678 duprintf("num_counters != table->private->number (%u/%u)\n", 692 duprintf("num_counters != table->private->number (%u/%u)\n",
679 num_counters, private->number); 693 num_counters, private->number);
680 write_unlock_bh(&table->lock); 694 mutex_unlock(&table->lock);
681 *error = -EAGAIN; 695 *error = -EAGAIN;
682 return NULL; 696 return NULL;
683 } 697 }
684 oldinfo = private; 698 oldinfo = private;
685 table->private = newinfo; 699 rcu_assign_pointer(table->private, newinfo);
686 newinfo->initial_entries = oldinfo->initial_entries; 700 newinfo->initial_entries = oldinfo->initial_entries;
687 write_unlock_bh(&table->lock); 701 mutex_unlock(&table->lock);
688 702
703 synchronize_net();
689 return oldinfo; 704 return oldinfo;
690} 705}
691EXPORT_SYMBOL_GPL(xt_replace_table); 706EXPORT_SYMBOL_GPL(xt_replace_table);
@@ -719,7 +734,8 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table *table,
719 734
720 /* Simplifies replace_table code. */ 735 /* Simplifies replace_table code. */
721 table->private = bootstrap; 736 table->private = bootstrap;
722 rwlock_init(&table->lock); 737 mutex_init(&table->lock);
738
723 if (!xt_replace_table(table, 0, newinfo, &ret)) 739 if (!xt_replace_table(table, 0, newinfo, &ret))
724 goto unlock; 740 goto unlock;
725 741