aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2011-01-10 14:11:38 -0500
committerPablo Neira Ayuso <pablo@netfilter.org>2011-01-10 14:11:38 -0500
commit83723d60717f8da0f53f91cf42a845ed56c09662 (patch)
tree9d32edc2c6dc4849e63d422f8dad42606b2f984f /net
parent45b9f509b7f5d2d792b3c03b78ddc8ec543e921b (diff)
netfilter: x_tables: dont block BH while reading counters
Using "iptables -L" with a lot of rules have a too big BH latency. Jesper mentioned ~6 ms and worried of frame drops. Switch to a per_cpu seqlock scheme, so that taking a snapshot of counters doesnt need to block BH (for this cpu, but also other cpus). This adds two increments on seqlock sequence per ipt_do_table() call, its a reasonable cost for allowing "iptables -L" not block BH processing. Reported-by: Jesper Dangaard Brouer <hawk@comx.dk> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> CC: Patrick McHardy <kaber@trash.net> Acked-by: Stephen Hemminger <shemminger@vyatta.com> Acked-by: Jesper Dangaard Brouer <hawk@comx.dk> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/netfilter/arp_tables.c45
-rw-r--r--net/ipv4/netfilter/ip_tables.c45
-rw-r--r--net/ipv6/netfilter/ip6_tables.c45
-rw-r--r--net/netfilter/x_tables.c3
4 files changed, 44 insertions, 94 deletions
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3fac340a28d5..e855fffaed95 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -710,42 +710,25 @@ static void get_counters(const struct xt_table_info *t,
710 struct arpt_entry *iter; 710 struct arpt_entry *iter;
711 unsigned int cpu; 711 unsigned int cpu;
712 unsigned int i; 712 unsigned int i;
713 unsigned int curcpu = get_cpu();
714
715 /* Instead of clearing (by a previous call to memset())
716 * the counters and using adds, we set the counters
717 * with data used by 'current' CPU
718 *
719 * Bottom half has to be disabled to prevent deadlock
720 * if new softirq were to run and call ipt_do_table
721 */
722 local_bh_disable();
723 i = 0;
724 xt_entry_foreach(iter, t->entries[curcpu], t->size) {
725 SET_COUNTER(counters[i], iter->counters.bcnt,
726 iter->counters.pcnt);
727 ++i;
728 }
729 local_bh_enable();
730 /* Processing counters from other cpus, we can let bottom half enabled,
731 * (preemption is disabled)
732 */
733 713
734 for_each_possible_cpu(cpu) { 714 for_each_possible_cpu(cpu) {
735 if (cpu == curcpu) 715 seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
736 continue; 716
737 i = 0; 717 i = 0;
738 local_bh_disable();
739 xt_info_wrlock(cpu);
740 xt_entry_foreach(iter, t->entries[cpu], t->size) { 718 xt_entry_foreach(iter, t->entries[cpu], t->size) {
741 ADD_COUNTER(counters[i], iter->counters.bcnt, 719 u64 bcnt, pcnt;
742 iter->counters.pcnt); 720 unsigned int start;
721
722 do {
723 start = read_seqbegin(lock);
724 bcnt = iter->counters.bcnt;
725 pcnt = iter->counters.pcnt;
726 } while (read_seqretry(lock, start));
727
728 ADD_COUNTER(counters[i], bcnt, pcnt);
743 ++i; 729 ++i;
744 } 730 }
745 xt_info_wrunlock(cpu);
746 local_bh_enable();
747 } 731 }
748 put_cpu();
749} 732}
750 733
751static struct xt_counters *alloc_counters(const struct xt_table *table) 734static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -759,7 +742,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
759 * about). 742 * about).
760 */ 743 */
761 countersize = sizeof(struct xt_counters) * private->number; 744 countersize = sizeof(struct xt_counters) * private->number;
762 counters = vmalloc(countersize); 745 counters = vzalloc(countersize);
763 746
764 if (counters == NULL) 747 if (counters == NULL)
765 return ERR_PTR(-ENOMEM); 748 return ERR_PTR(-ENOMEM);
@@ -1007,7 +990,7 @@ static int __do_replace(struct net *net, const char *name,
1007 struct arpt_entry *iter; 990 struct arpt_entry *iter;
1008 991
1009 ret = 0; 992 ret = 0;
1010 counters = vmalloc(num_counters * sizeof(struct xt_counters)); 993 counters = vzalloc(num_counters * sizeof(struct xt_counters));
1011 if (!counters) { 994 if (!counters) {
1012 ret = -ENOMEM; 995 ret = -ENOMEM;
1013 goto out; 996 goto out;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index a846d633b3b6..652efea013dc 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -884,42 +884,25 @@ get_counters(const struct xt_table_info *t,
884 struct ipt_entry *iter; 884 struct ipt_entry *iter;
885 unsigned int cpu; 885 unsigned int cpu;
886 unsigned int i; 886 unsigned int i;
887 unsigned int curcpu = get_cpu();
888
889 /* Instead of clearing (by a previous call to memset())
890 * the counters and using adds, we set the counters
891 * with data used by 'current' CPU.
892 *
893 * Bottom half has to be disabled to prevent deadlock
894 * if new softirq were to run and call ipt_do_table
895 */
896 local_bh_disable();
897 i = 0;
898 xt_entry_foreach(iter, t->entries[curcpu], t->size) {
899 SET_COUNTER(counters[i], iter->counters.bcnt,
900 iter->counters.pcnt);
901 ++i;
902 }
903 local_bh_enable();
904 /* Processing counters from other cpus, we can let bottom half enabled,
905 * (preemption is disabled)
906 */
907 887
908 for_each_possible_cpu(cpu) { 888 for_each_possible_cpu(cpu) {
909 if (cpu == curcpu) 889 seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
910 continue; 890
911 i = 0; 891 i = 0;
912 local_bh_disable();
913 xt_info_wrlock(cpu);
914 xt_entry_foreach(iter, t->entries[cpu], t->size) { 892 xt_entry_foreach(iter, t->entries[cpu], t->size) {
915 ADD_COUNTER(counters[i], iter->counters.bcnt, 893 u64 bcnt, pcnt;
916 iter->counters.pcnt); 894 unsigned int start;
895
896 do {
897 start = read_seqbegin(lock);
898 bcnt = iter->counters.bcnt;
899 pcnt = iter->counters.pcnt;
900 } while (read_seqretry(lock, start));
901
902 ADD_COUNTER(counters[i], bcnt, pcnt);
917 ++i; /* macro does multi eval of i */ 903 ++i; /* macro does multi eval of i */
918 } 904 }
919 xt_info_wrunlock(cpu);
920 local_bh_enable();
921 } 905 }
922 put_cpu();
923} 906}
924 907
925static struct xt_counters *alloc_counters(const struct xt_table *table) 908static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -932,7 +915,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
932 (other than comefrom, which userspace doesn't care 915 (other than comefrom, which userspace doesn't care
933 about). */ 916 about). */
934 countersize = sizeof(struct xt_counters) * private->number; 917 countersize = sizeof(struct xt_counters) * private->number;
935 counters = vmalloc(countersize); 918 counters = vzalloc(countersize);
936 919
937 if (counters == NULL) 920 if (counters == NULL)
938 return ERR_PTR(-ENOMEM); 921 return ERR_PTR(-ENOMEM);
@@ -1203,7 +1186,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1203 struct ipt_entry *iter; 1186 struct ipt_entry *iter;
1204 1187
1205 ret = 0; 1188 ret = 0;
1206 counters = vmalloc(num_counters * sizeof(struct xt_counters)); 1189 counters = vzalloc(num_counters * sizeof(struct xt_counters));
1207 if (!counters) { 1190 if (!counters) {
1208 ret = -ENOMEM; 1191 ret = -ENOMEM;
1209 goto out; 1192 goto out;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 455582384ece..7d227c644f72 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -897,42 +897,25 @@ get_counters(const struct xt_table_info *t,
897 struct ip6t_entry *iter; 897 struct ip6t_entry *iter;
898 unsigned int cpu; 898 unsigned int cpu;
899 unsigned int i; 899 unsigned int i;
900 unsigned int curcpu = get_cpu();
901
902 /* Instead of clearing (by a previous call to memset())
903 * the counters and using adds, we set the counters
904 * with data used by 'current' CPU
905 *
906 * Bottom half has to be disabled to prevent deadlock
907 * if new softirq were to run and call ipt_do_table
908 */
909 local_bh_disable();
910 i = 0;
911 xt_entry_foreach(iter, t->entries[curcpu], t->size) {
912 SET_COUNTER(counters[i], iter->counters.bcnt,
913 iter->counters.pcnt);
914 ++i;
915 }
916 local_bh_enable();
917 /* Processing counters from other cpus, we can let bottom half enabled,
918 * (preemption is disabled)
919 */
920 900
921 for_each_possible_cpu(cpu) { 901 for_each_possible_cpu(cpu) {
922 if (cpu == curcpu) 902 seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
923 continue; 903
924 i = 0; 904 i = 0;
925 local_bh_disable();
926 xt_info_wrlock(cpu);
927 xt_entry_foreach(iter, t->entries[cpu], t->size) { 905 xt_entry_foreach(iter, t->entries[cpu], t->size) {
928 ADD_COUNTER(counters[i], iter->counters.bcnt, 906 u64 bcnt, pcnt;
929 iter->counters.pcnt); 907 unsigned int start;
908
909 do {
910 start = read_seqbegin(lock);
911 bcnt = iter->counters.bcnt;
912 pcnt = iter->counters.pcnt;
913 } while (read_seqretry(lock, start));
914
915 ADD_COUNTER(counters[i], bcnt, pcnt);
930 ++i; 916 ++i;
931 } 917 }
932 xt_info_wrunlock(cpu);
933 local_bh_enable();
934 } 918 }
935 put_cpu();
936} 919}
937 920
938static struct xt_counters *alloc_counters(const struct xt_table *table) 921static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -945,7 +928,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
945 (other than comefrom, which userspace doesn't care 928 (other than comefrom, which userspace doesn't care
946 about). */ 929 about). */
947 countersize = sizeof(struct xt_counters) * private->number; 930 countersize = sizeof(struct xt_counters) * private->number;
948 counters = vmalloc(countersize); 931 counters = vzalloc(countersize);
949 932
950 if (counters == NULL) 933 if (counters == NULL)
951 return ERR_PTR(-ENOMEM); 934 return ERR_PTR(-ENOMEM);
@@ -1216,7 +1199,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1216 struct ip6t_entry *iter; 1199 struct ip6t_entry *iter;
1217 1200
1218 ret = 0; 1201 ret = 0;
1219 counters = vmalloc(num_counters * sizeof(struct xt_counters)); 1202 counters = vzalloc(num_counters * sizeof(struct xt_counters));
1220 if (!counters) { 1203 if (!counters) {
1221 ret = -ENOMEM; 1204 ret = -ENOMEM;
1222 goto out; 1205 goto out;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 80463507420e..c94237631077 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1325,7 +1325,8 @@ static int __init xt_init(void)
1325 1325
1326 for_each_possible_cpu(i) { 1326 for_each_possible_cpu(i) {
1327 struct xt_info_lock *lock = &per_cpu(xt_info_locks, i); 1327 struct xt_info_lock *lock = &per_cpu(xt_info_locks, i);
1328 spin_lock_init(&lock->lock); 1328
1329 seqlock_init(&lock->lock);
1329 lock->readers = 0; 1330 lock->readers = 0;
1330 } 1331 }
1331 1332