diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2011-01-10 14:11:38 -0500 |
---|---|---|
committer | Pablo Neira Ayuso <pablo@netfilter.org> | 2011-01-10 14:11:38 -0500 |
commit | 83723d60717f8da0f53f91cf42a845ed56c09662 (patch) | |
tree | 9d32edc2c6dc4849e63d422f8dad42606b2f984f /net | |
parent | 45b9f509b7f5d2d792b3c03b78ddc8ec543e921b (diff) |
netfilter: x_tables: dont block BH while reading counters
Using "iptables -L" with a lot of rules have a too big BH latency.
Jesper mentioned ~6 ms and worried of frame drops.
Switch to a per_cpu seqlock scheme, so that taking a snapshot of
counters doesnt need to block BH (for this cpu, but also other cpus).
This adds two increments on seqlock sequence per ipt_do_table() call,
its a reasonable cost for allowing "iptables -L" not block BH
processing.
Reported-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/netfilter/arp_tables.c | 45 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_tables.c | 45 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6_tables.c | 45 | ||||
-rw-r--r-- | net/netfilter/x_tables.c | 3 |
4 files changed, 44 insertions, 94 deletions
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 3fac340a28d5..e855fffaed95 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -710,42 +710,25 @@ static void get_counters(const struct xt_table_info *t, | |||
710 | struct arpt_entry *iter; | 710 | struct arpt_entry *iter; |
711 | unsigned int cpu; | 711 | unsigned int cpu; |
712 | unsigned int i; | 712 | unsigned int i; |
713 | unsigned int curcpu = get_cpu(); | ||
714 | |||
715 | /* Instead of clearing (by a previous call to memset()) | ||
716 | * the counters and using adds, we set the counters | ||
717 | * with data used by 'current' CPU | ||
718 | * | ||
719 | * Bottom half has to be disabled to prevent deadlock | ||
720 | * if new softirq were to run and call ipt_do_table | ||
721 | */ | ||
722 | local_bh_disable(); | ||
723 | i = 0; | ||
724 | xt_entry_foreach(iter, t->entries[curcpu], t->size) { | ||
725 | SET_COUNTER(counters[i], iter->counters.bcnt, | ||
726 | iter->counters.pcnt); | ||
727 | ++i; | ||
728 | } | ||
729 | local_bh_enable(); | ||
730 | /* Processing counters from other cpus, we can let bottom half enabled, | ||
731 | * (preemption is disabled) | ||
732 | */ | ||
733 | 713 | ||
734 | for_each_possible_cpu(cpu) { | 714 | for_each_possible_cpu(cpu) { |
735 | if (cpu == curcpu) | 715 | seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; |
736 | continue; | 716 | |
737 | i = 0; | 717 | i = 0; |
738 | local_bh_disable(); | ||
739 | xt_info_wrlock(cpu); | ||
740 | xt_entry_foreach(iter, t->entries[cpu], t->size) { | 718 | xt_entry_foreach(iter, t->entries[cpu], t->size) { |
741 | ADD_COUNTER(counters[i], iter->counters.bcnt, | 719 | u64 bcnt, pcnt; |
742 | iter->counters.pcnt); | 720 | unsigned int start; |
721 | |||
722 | do { | ||
723 | start = read_seqbegin(lock); | ||
724 | bcnt = iter->counters.bcnt; | ||
725 | pcnt = iter->counters.pcnt; | ||
726 | } while (read_seqretry(lock, start)); | ||
727 | |||
728 | ADD_COUNTER(counters[i], bcnt, pcnt); | ||
743 | ++i; | 729 | ++i; |
744 | } | 730 | } |
745 | xt_info_wrunlock(cpu); | ||
746 | local_bh_enable(); | ||
747 | } | 731 | } |
748 | put_cpu(); | ||
749 | } | 732 | } |
750 | 733 | ||
751 | static struct xt_counters *alloc_counters(const struct xt_table *table) | 734 | static struct xt_counters *alloc_counters(const struct xt_table *table) |
@@ -759,7 +742,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) | |||
759 | * about). | 742 | * about). |
760 | */ | 743 | */ |
761 | countersize = sizeof(struct xt_counters) * private->number; | 744 | countersize = sizeof(struct xt_counters) * private->number; |
762 | counters = vmalloc(countersize); | 745 | counters = vzalloc(countersize); |
763 | 746 | ||
764 | if (counters == NULL) | 747 | if (counters == NULL) |
765 | return ERR_PTR(-ENOMEM); | 748 | return ERR_PTR(-ENOMEM); |
@@ -1007,7 +990,7 @@ static int __do_replace(struct net *net, const char *name, | |||
1007 | struct arpt_entry *iter; | 990 | struct arpt_entry *iter; |
1008 | 991 | ||
1009 | ret = 0; | 992 | ret = 0; |
1010 | counters = vmalloc(num_counters * sizeof(struct xt_counters)); | 993 | counters = vzalloc(num_counters * sizeof(struct xt_counters)); |
1011 | if (!counters) { | 994 | if (!counters) { |
1012 | ret = -ENOMEM; | 995 | ret = -ENOMEM; |
1013 | goto out; | 996 | goto out; |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index a846d633b3b6..652efea013dc 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -884,42 +884,25 @@ get_counters(const struct xt_table_info *t, | |||
884 | struct ipt_entry *iter; | 884 | struct ipt_entry *iter; |
885 | unsigned int cpu; | 885 | unsigned int cpu; |
886 | unsigned int i; | 886 | unsigned int i; |
887 | unsigned int curcpu = get_cpu(); | ||
888 | |||
889 | /* Instead of clearing (by a previous call to memset()) | ||
890 | * the counters and using adds, we set the counters | ||
891 | * with data used by 'current' CPU. | ||
892 | * | ||
893 | * Bottom half has to be disabled to prevent deadlock | ||
894 | * if new softirq were to run and call ipt_do_table | ||
895 | */ | ||
896 | local_bh_disable(); | ||
897 | i = 0; | ||
898 | xt_entry_foreach(iter, t->entries[curcpu], t->size) { | ||
899 | SET_COUNTER(counters[i], iter->counters.bcnt, | ||
900 | iter->counters.pcnt); | ||
901 | ++i; | ||
902 | } | ||
903 | local_bh_enable(); | ||
904 | /* Processing counters from other cpus, we can let bottom half enabled, | ||
905 | * (preemption is disabled) | ||
906 | */ | ||
907 | 887 | ||
908 | for_each_possible_cpu(cpu) { | 888 | for_each_possible_cpu(cpu) { |
909 | if (cpu == curcpu) | 889 | seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; |
910 | continue; | 890 | |
911 | i = 0; | 891 | i = 0; |
912 | local_bh_disable(); | ||
913 | xt_info_wrlock(cpu); | ||
914 | xt_entry_foreach(iter, t->entries[cpu], t->size) { | 892 | xt_entry_foreach(iter, t->entries[cpu], t->size) { |
915 | ADD_COUNTER(counters[i], iter->counters.bcnt, | 893 | u64 bcnt, pcnt; |
916 | iter->counters.pcnt); | 894 | unsigned int start; |
895 | |||
896 | do { | ||
897 | start = read_seqbegin(lock); | ||
898 | bcnt = iter->counters.bcnt; | ||
899 | pcnt = iter->counters.pcnt; | ||
900 | } while (read_seqretry(lock, start)); | ||
901 | |||
902 | ADD_COUNTER(counters[i], bcnt, pcnt); | ||
917 | ++i; /* macro does multi eval of i */ | 903 | ++i; /* macro does multi eval of i */ |
918 | } | 904 | } |
919 | xt_info_wrunlock(cpu); | ||
920 | local_bh_enable(); | ||
921 | } | 905 | } |
922 | put_cpu(); | ||
923 | } | 906 | } |
924 | 907 | ||
925 | static struct xt_counters *alloc_counters(const struct xt_table *table) | 908 | static struct xt_counters *alloc_counters(const struct xt_table *table) |
@@ -932,7 +915,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) | |||
932 | (other than comefrom, which userspace doesn't care | 915 | (other than comefrom, which userspace doesn't care |
933 | about). */ | 916 | about). */ |
934 | countersize = sizeof(struct xt_counters) * private->number; | 917 | countersize = sizeof(struct xt_counters) * private->number; |
935 | counters = vmalloc(countersize); | 918 | counters = vzalloc(countersize); |
936 | 919 | ||
937 | if (counters == NULL) | 920 | if (counters == NULL) |
938 | return ERR_PTR(-ENOMEM); | 921 | return ERR_PTR(-ENOMEM); |
@@ -1203,7 +1186,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, | |||
1203 | struct ipt_entry *iter; | 1186 | struct ipt_entry *iter; |
1204 | 1187 | ||
1205 | ret = 0; | 1188 | ret = 0; |
1206 | counters = vmalloc(num_counters * sizeof(struct xt_counters)); | 1189 | counters = vzalloc(num_counters * sizeof(struct xt_counters)); |
1207 | if (!counters) { | 1190 | if (!counters) { |
1208 | ret = -ENOMEM; | 1191 | ret = -ENOMEM; |
1209 | goto out; | 1192 | goto out; |
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 455582384ece..7d227c644f72 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c | |||
@@ -897,42 +897,25 @@ get_counters(const struct xt_table_info *t, | |||
897 | struct ip6t_entry *iter; | 897 | struct ip6t_entry *iter; |
898 | unsigned int cpu; | 898 | unsigned int cpu; |
899 | unsigned int i; | 899 | unsigned int i; |
900 | unsigned int curcpu = get_cpu(); | ||
901 | |||
902 | /* Instead of clearing (by a previous call to memset()) | ||
903 | * the counters and using adds, we set the counters | ||
904 | * with data used by 'current' CPU | ||
905 | * | ||
906 | * Bottom half has to be disabled to prevent deadlock | ||
907 | * if new softirq were to run and call ipt_do_table | ||
908 | */ | ||
909 | local_bh_disable(); | ||
910 | i = 0; | ||
911 | xt_entry_foreach(iter, t->entries[curcpu], t->size) { | ||
912 | SET_COUNTER(counters[i], iter->counters.bcnt, | ||
913 | iter->counters.pcnt); | ||
914 | ++i; | ||
915 | } | ||
916 | local_bh_enable(); | ||
917 | /* Processing counters from other cpus, we can let bottom half enabled, | ||
918 | * (preemption is disabled) | ||
919 | */ | ||
920 | 900 | ||
921 | for_each_possible_cpu(cpu) { | 901 | for_each_possible_cpu(cpu) { |
922 | if (cpu == curcpu) | 902 | seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; |
923 | continue; | 903 | |
924 | i = 0; | 904 | i = 0; |
925 | local_bh_disable(); | ||
926 | xt_info_wrlock(cpu); | ||
927 | xt_entry_foreach(iter, t->entries[cpu], t->size) { | 905 | xt_entry_foreach(iter, t->entries[cpu], t->size) { |
928 | ADD_COUNTER(counters[i], iter->counters.bcnt, | 906 | u64 bcnt, pcnt; |
929 | iter->counters.pcnt); | 907 | unsigned int start; |
908 | |||
909 | do { | ||
910 | start = read_seqbegin(lock); | ||
911 | bcnt = iter->counters.bcnt; | ||
912 | pcnt = iter->counters.pcnt; | ||
913 | } while (read_seqretry(lock, start)); | ||
914 | |||
915 | ADD_COUNTER(counters[i], bcnt, pcnt); | ||
930 | ++i; | 916 | ++i; |
931 | } | 917 | } |
932 | xt_info_wrunlock(cpu); | ||
933 | local_bh_enable(); | ||
934 | } | 918 | } |
935 | put_cpu(); | ||
936 | } | 919 | } |
937 | 920 | ||
938 | static struct xt_counters *alloc_counters(const struct xt_table *table) | 921 | static struct xt_counters *alloc_counters(const struct xt_table *table) |
@@ -945,7 +928,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) | |||
945 | (other than comefrom, which userspace doesn't care | 928 | (other than comefrom, which userspace doesn't care |
946 | about). */ | 929 | about). */ |
947 | countersize = sizeof(struct xt_counters) * private->number; | 930 | countersize = sizeof(struct xt_counters) * private->number; |
948 | counters = vmalloc(countersize); | 931 | counters = vzalloc(countersize); |
949 | 932 | ||
950 | if (counters == NULL) | 933 | if (counters == NULL) |
951 | return ERR_PTR(-ENOMEM); | 934 | return ERR_PTR(-ENOMEM); |
@@ -1216,7 +1199,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, | |||
1216 | struct ip6t_entry *iter; | 1199 | struct ip6t_entry *iter; |
1217 | 1200 | ||
1218 | ret = 0; | 1201 | ret = 0; |
1219 | counters = vmalloc(num_counters * sizeof(struct xt_counters)); | 1202 | counters = vzalloc(num_counters * sizeof(struct xt_counters)); |
1220 | if (!counters) { | 1203 | if (!counters) { |
1221 | ret = -ENOMEM; | 1204 | ret = -ENOMEM; |
1222 | goto out; | 1205 | goto out; |
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 80463507420e..c94237631077 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c | |||
@@ -1325,7 +1325,8 @@ static int __init xt_init(void) | |||
1325 | 1325 | ||
1326 | for_each_possible_cpu(i) { | 1326 | for_each_possible_cpu(i) { |
1327 | struct xt_info_lock *lock = &per_cpu(xt_info_locks, i); | 1327 | struct xt_info_lock *lock = &per_cpu(xt_info_locks, i); |
1328 | spin_lock_init(&lock->lock); | 1328 | |
1329 | seqlock_init(&lock->lock); | ||
1329 | lock->readers = 0; | 1330 | lock->readers = 0; |
1330 | } | 1331 | } |
1331 | 1332 | ||