diff options
| author | Stephen Hemminger <shemminger@vyatta.com> | 2009-04-29 01:36:33 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2009-04-29 01:36:33 -0400 |
| commit | 942e4a2bd680c606af0211e64eb216be2e19bf61 (patch) | |
| tree | a83af49242d4a8d53aa0f3b5814eb17da72edc09 | |
| parent | bf368e4e70cd4e0f880923c44e95a4273d725ab4 (diff) | |
netfilter: revised locking for x_tables
The x_tables are organized with a table structure and a per-cpu copies
of the counters and rules. On older kernels there was a reader/writer
lock per table which was a performance bottleneck. In 2.6.30-rc, this
was converted to use RCU and the counters/rules which solved the performance
problems for do_table but made replacing rules much slower because of
the necessary RCU grace period.
This version uses a per-cpu set of spinlocks and counters to allow to
table processing to proceed without the cache thrashing of a global
reader lock and keeps the same performance for table updates.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
| -rw-r--r-- | include/linux/netfilter/x_tables.h | 73 | ||||
| -rw-r--r-- | net/ipv4/netfilter/arp_tables.c | 125 | ||||
| -rw-r--r-- | net/ipv4/netfilter/ip_tables.c | 126 | ||||
| -rw-r--r-- | net/ipv6/netfilter/ip6_tables.c | 123 | ||||
| -rw-r--r-- | net/netfilter/x_tables.c | 53 |
5 files changed, 204 insertions, 296 deletions
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 7b1a652066c0..1b2e43502ef7 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h | |||
| @@ -354,9 +354,6 @@ struct xt_table | |||
| 354 | /* What hooks you will enter on */ | 354 | /* What hooks you will enter on */ |
| 355 | unsigned int valid_hooks; | 355 | unsigned int valid_hooks; |
| 356 | 356 | ||
| 357 | /* Lock for the curtain */ | ||
| 358 | struct mutex lock; | ||
| 359 | |||
| 360 | /* Man behind the curtain... */ | 357 | /* Man behind the curtain... */ |
| 361 | struct xt_table_info *private; | 358 | struct xt_table_info *private; |
| 362 | 359 | ||
| @@ -434,8 +431,74 @@ extern void xt_proto_fini(struct net *net, u_int8_t af); | |||
| 434 | 431 | ||
| 435 | extern struct xt_table_info *xt_alloc_table_info(unsigned int size); | 432 | extern struct xt_table_info *xt_alloc_table_info(unsigned int size); |
| 436 | extern void xt_free_table_info(struct xt_table_info *info); | 433 | extern void xt_free_table_info(struct xt_table_info *info); |
| 437 | extern void xt_table_entry_swap_rcu(struct xt_table_info *old, | 434 | |
| 438 | struct xt_table_info *new); | 435 | /* |
| 436 | * Per-CPU spinlock associated with per-cpu table entries, and | ||
| 437 | * with a counter for the "reading" side that allows a recursive | ||
| 438 | * reader to avoid taking the lock and deadlocking. | ||
| 439 | * | ||
| 440 | * "reading" is used by ip/arp/ip6 tables rule processing which runs per-cpu. | ||
| 441 | * It needs to ensure that the rules are not being changed while the packet | ||
| 442 | * is being processed. In some cases, the read lock will be acquired | ||
| 443 | * twice on the same CPU; this is okay because of the count. | ||
| 444 | * | ||
| 445 | * "writing" is used when reading counters. | ||
| 446 | * During replace any readers that are using the old tables have to complete | ||
| 447 | * before freeing the old table. This is handled by the write locking | ||
| 448 | * necessary for reading the counters. | ||
| 449 | */ | ||
| 450 | struct xt_info_lock { | ||
| 451 | spinlock_t lock; | ||
| 452 | unsigned char readers; | ||
| 453 | }; | ||
| 454 | DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks); | ||
| 455 | |||
| 456 | /* | ||
| 457 | * Note: we need to ensure that preemption is disabled before acquiring | ||
| 458 | * the per-cpu-variable, so we do it as a two step process rather than | ||
| 459 | * using "spin_lock_bh()". | ||
| 460 | * | ||
| 461 | * We _also_ need to disable bottom half processing before updating our | ||
| 462 | * nesting count, to make sure that the only kind of re-entrancy is this | ||
| 463 | * code being called by itself: since the count+lock is not an atomic | ||
| 464 | * operation, we can allow no races. | ||
| 465 | * | ||
| 466 | * _Only_ that special combination of being per-cpu and never getting | ||
| 467 | * re-entered asynchronously means that the count is safe. | ||
| 468 | */ | ||
| 469 | static inline void xt_info_rdlock_bh(void) | ||
| 470 | { | ||
| 471 | struct xt_info_lock *lock; | ||
| 472 | |||
| 473 | local_bh_disable(); | ||
| 474 | lock = &__get_cpu_var(xt_info_locks); | ||
| 475 | if (!lock->readers++) | ||
| 476 | spin_lock(&lock->lock); | ||
| 477 | } | ||
| 478 | |||
| 479 | static inline void xt_info_rdunlock_bh(void) | ||
| 480 | { | ||
| 481 | struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks); | ||
| 482 | |||
| 483 | if (!--lock->readers) | ||
| 484 | spin_unlock(&lock->lock); | ||
| 485 | local_bh_enable(); | ||
| 486 | } | ||
| 487 | |||
| 488 | /* | ||
| 489 | * The "writer" side needs to get exclusive access to the lock, | ||
| 490 | * regardless of readers. This must be called with bottom half | ||
| 491 | * processing (and thus also preemption) disabled. | ||
| 492 | */ | ||
| 493 | static inline void xt_info_wrlock(unsigned int cpu) | ||
| 494 | { | ||
| 495 | spin_lock(&per_cpu(xt_info_locks, cpu).lock); | ||
| 496 | } | ||
| 497 | |||
| 498 | static inline void xt_info_wrunlock(unsigned int cpu) | ||
| 499 | { | ||
| 500 | spin_unlock(&per_cpu(xt_info_locks, cpu).lock); | ||
| 501 | } | ||
| 439 | 502 | ||
| 440 | /* | 503 | /* |
| 441 | * This helper is performance critical and must be inlined | 504 | * This helper is performance critical and must be inlined |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 5ba533d234db..831fe1879dc0 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
| @@ -253,9 +253,9 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
| 253 | indev = in ? in->name : nulldevname; | 253 | indev = in ? in->name : nulldevname; |
| 254 | outdev = out ? out->name : nulldevname; | 254 | outdev = out ? out->name : nulldevname; |
| 255 | 255 | ||
| 256 | rcu_read_lock_bh(); | 256 | xt_info_rdlock_bh(); |
| 257 | private = rcu_dereference(table->private); | 257 | private = table->private; |
| 258 | table_base = rcu_dereference(private->entries[smp_processor_id()]); | 258 | table_base = private->entries[smp_processor_id()]; |
| 259 | 259 | ||
| 260 | e = get_entry(table_base, private->hook_entry[hook]); | 260 | e = get_entry(table_base, private->hook_entry[hook]); |
| 261 | back = get_entry(table_base, private->underflow[hook]); | 261 | back = get_entry(table_base, private->underflow[hook]); |
| @@ -273,6 +273,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
| 273 | 273 | ||
| 274 | hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + | 274 | hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + |
| 275 | (2 * skb->dev->addr_len); | 275 | (2 * skb->dev->addr_len); |
| 276 | |||
| 276 | ADD_COUNTER(e->counters, hdr_len, 1); | 277 | ADD_COUNTER(e->counters, hdr_len, 1); |
| 277 | 278 | ||
| 278 | t = arpt_get_target(e); | 279 | t = arpt_get_target(e); |
| @@ -328,8 +329,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
| 328 | e = (void *)e + e->next_offset; | 329 | e = (void *)e + e->next_offset; |
| 329 | } | 330 | } |
| 330 | } while (!hotdrop); | 331 | } while (!hotdrop); |
| 331 | 332 | xt_info_rdunlock_bh(); | |
| 332 | rcu_read_unlock_bh(); | ||
| 333 | 333 | ||
| 334 | if (hotdrop) | 334 | if (hotdrop) |
| 335 | return NF_DROP; | 335 | return NF_DROP; |
| @@ -711,9 +711,12 @@ static void get_counters(const struct xt_table_info *t, | |||
| 711 | /* Instead of clearing (by a previous call to memset()) | 711 | /* Instead of clearing (by a previous call to memset()) |
| 712 | * the counters and using adds, we set the counters | 712 | * the counters and using adds, we set the counters |
| 713 | * with data used by 'current' CPU | 713 | * with data used by 'current' CPU |
| 714 | * We dont care about preemption here. | 714 | * |
| 715 | * Bottom half has to be disabled to prevent deadlock | ||
| 716 | * if new softirq were to run and call ipt_do_table | ||
| 715 | */ | 717 | */ |
| 716 | curcpu = raw_smp_processor_id(); | 718 | local_bh_disable(); |
| 719 | curcpu = smp_processor_id(); | ||
| 717 | 720 | ||
| 718 | i = 0; | 721 | i = 0; |
| 719 | ARPT_ENTRY_ITERATE(t->entries[curcpu], | 722 | ARPT_ENTRY_ITERATE(t->entries[curcpu], |
| @@ -726,73 +729,22 @@ static void get_counters(const struct xt_table_info *t, | |||
| 726 | if (cpu == curcpu) | 729 | if (cpu == curcpu) |
| 727 | continue; | 730 | continue; |
| 728 | i = 0; | 731 | i = 0; |
| 732 | xt_info_wrlock(cpu); | ||
| 729 | ARPT_ENTRY_ITERATE(t->entries[cpu], | 733 | ARPT_ENTRY_ITERATE(t->entries[cpu], |
| 730 | t->size, | 734 | t->size, |
| 731 | add_entry_to_counter, | 735 | add_entry_to_counter, |
| 732 | counters, | 736 | counters, |
| 733 | &i); | 737 | &i); |
| 738 | xt_info_wrunlock(cpu); | ||
| 734 | } | 739 | } |
| 735 | } | ||
| 736 | |||
| 737 | |||
| 738 | /* We're lazy, and add to the first CPU; overflow works its fey magic | ||
| 739 | * and everything is OK. */ | ||
| 740 | static int | ||
| 741 | add_counter_to_entry(struct arpt_entry *e, | ||
| 742 | const struct xt_counters addme[], | ||
| 743 | unsigned int *i) | ||
| 744 | { | ||
| 745 | ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); | ||
| 746 | |||
| 747 | (*i)++; | ||
| 748 | return 0; | ||
| 749 | } | ||
| 750 | |||
| 751 | /* Take values from counters and add them back onto the current cpu */ | ||
| 752 | static void put_counters(struct xt_table_info *t, | ||
| 753 | const struct xt_counters counters[]) | ||
| 754 | { | ||
| 755 | unsigned int i, cpu; | ||
| 756 | |||
| 757 | local_bh_disable(); | ||
| 758 | cpu = smp_processor_id(); | ||
| 759 | i = 0; | ||
| 760 | ARPT_ENTRY_ITERATE(t->entries[cpu], | ||
| 761 | t->size, | ||
| 762 | add_counter_to_entry, | ||
| 763 | counters, | ||
| 764 | &i); | ||
| 765 | local_bh_enable(); | 740 | local_bh_enable(); |
| 766 | } | 741 | } |
| 767 | 742 | ||
| 768 | static inline int | ||
| 769 | zero_entry_counter(struct arpt_entry *e, void *arg) | ||
| 770 | { | ||
| 771 | e->counters.bcnt = 0; | ||
| 772 | e->counters.pcnt = 0; | ||
| 773 | return 0; | ||
| 774 | } | ||
| 775 | |||
| 776 | static void | ||
| 777 | clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) | ||
| 778 | { | ||
| 779 | unsigned int cpu; | ||
| 780 | const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; | ||
| 781 | |||
| 782 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | ||
| 783 | for_each_possible_cpu(cpu) { | ||
| 784 | memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); | ||
| 785 | ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, | ||
| 786 | zero_entry_counter, NULL); | ||
| 787 | } | ||
| 788 | } | ||
| 789 | |||
| 790 | static struct xt_counters *alloc_counters(struct xt_table *table) | 743 | static struct xt_counters *alloc_counters(struct xt_table *table) |
| 791 | { | 744 | { |
| 792 | unsigned int countersize; | 745 | unsigned int countersize; |
| 793 | struct xt_counters *counters; | 746 | struct xt_counters *counters; |
| 794 | struct xt_table_info *private = table->private; | 747 | struct xt_table_info *private = table->private; |
| 795 | struct xt_table_info *info; | ||
| 796 | 748 | ||
| 797 | /* We need atomic snapshot of counters: rest doesn't change | 749 | /* We need atomic snapshot of counters: rest doesn't change |
| 798 | * (other than comefrom, which userspace doesn't care | 750 | * (other than comefrom, which userspace doesn't care |
| @@ -802,30 +754,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) | |||
| 802 | counters = vmalloc_node(countersize, numa_node_id()); | 754 | counters = vmalloc_node(countersize, numa_node_id()); |
| 803 | 755 | ||
| 804 | if (counters == NULL) | 756 | if (counters == NULL) |
| 805 | goto nomem; | 757 | return ERR_PTR(-ENOMEM); |
| 806 | |||
| 807 | info = xt_alloc_table_info(private->size); | ||
| 808 | if (!info) | ||
| 809 | goto free_counters; | ||
| 810 | |||
| 811 | clone_counters(info, private); | ||
| 812 | |||
| 813 | mutex_lock(&table->lock); | ||
| 814 | xt_table_entry_swap_rcu(private, info); | ||
| 815 | synchronize_net(); /* Wait until smoke has cleared */ | ||
| 816 | 758 | ||
| 817 | get_counters(info, counters); | 759 | get_counters(private, counters); |
| 818 | put_counters(private, counters); | ||
| 819 | mutex_unlock(&table->lock); | ||
| 820 | |||
| 821 | xt_free_table_info(info); | ||
| 822 | 760 | ||
| 823 | return counters; | 761 | return counters; |
| 824 | |||
| 825 | free_counters: | ||
| 826 | vfree(counters); | ||
| 827 | nomem: | ||
| 828 | return ERR_PTR(-ENOMEM); | ||
| 829 | } | 762 | } |
| 830 | 763 | ||
| 831 | static int copy_entries_to_user(unsigned int total_size, | 764 | static int copy_entries_to_user(unsigned int total_size, |
| @@ -1094,8 +1027,9 @@ static int __do_replace(struct net *net, const char *name, | |||
| 1094 | (newinfo->number <= oldinfo->initial_entries)) | 1027 | (newinfo->number <= oldinfo->initial_entries)) |
| 1095 | module_put(t->me); | 1028 | module_put(t->me); |
| 1096 | 1029 | ||
| 1097 | /* Get the old counters. */ | 1030 | /* Get the old counters, and synchronize with replace */ |
| 1098 | get_counters(oldinfo, counters); | 1031 | get_counters(oldinfo, counters); |
| 1032 | |||
| 1099 | /* Decrease module usage counts and free resource */ | 1033 | /* Decrease module usage counts and free resource */ |
| 1100 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; | 1034 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; |
| 1101 | ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, | 1035 | ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, |
| @@ -1165,10 +1099,23 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) | |||
| 1165 | return ret; | 1099 | return ret; |
| 1166 | } | 1100 | } |
| 1167 | 1101 | ||
| 1102 | /* We're lazy, and add to the first CPU; overflow works its fey magic | ||
| 1103 | * and everything is OK. */ | ||
| 1104 | static int | ||
| 1105 | add_counter_to_entry(struct arpt_entry *e, | ||
| 1106 | const struct xt_counters addme[], | ||
| 1107 | unsigned int *i) | ||
| 1108 | { | ||
| 1109 | ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); | ||
| 1110 | |||
| 1111 | (*i)++; | ||
| 1112 | return 0; | ||
| 1113 | } | ||
| 1114 | |||
| 1168 | static int do_add_counters(struct net *net, void __user *user, unsigned int len, | 1115 | static int do_add_counters(struct net *net, void __user *user, unsigned int len, |
| 1169 | int compat) | 1116 | int compat) |
| 1170 | { | 1117 | { |
| 1171 | unsigned int i; | 1118 | unsigned int i, curcpu; |
| 1172 | struct xt_counters_info tmp; | 1119 | struct xt_counters_info tmp; |
| 1173 | struct xt_counters *paddc; | 1120 | struct xt_counters *paddc; |
| 1174 | unsigned int num_counters; | 1121 | unsigned int num_counters; |
| @@ -1224,26 +1171,26 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, | |||
| 1224 | goto free; | 1171 | goto free; |
| 1225 | } | 1172 | } |
| 1226 | 1173 | ||
| 1227 | mutex_lock(&t->lock); | 1174 | local_bh_disable(); |
| 1228 | private = t->private; | 1175 | private = t->private; |
| 1229 | if (private->number != num_counters) { | 1176 | if (private->number != num_counters) { |
| 1230 | ret = -EINVAL; | 1177 | ret = -EINVAL; |
| 1231 | goto unlock_up_free; | 1178 | goto unlock_up_free; |
| 1232 | } | 1179 | } |
| 1233 | 1180 | ||
| 1234 | preempt_disable(); | ||
| 1235 | i = 0; | 1181 | i = 0; |
| 1236 | /* Choose the copy that is on our node */ | 1182 | /* Choose the copy that is on our node */ |
| 1237 | loc_cpu_entry = private->entries[smp_processor_id()]; | 1183 | curcpu = smp_processor_id(); |
| 1184 | loc_cpu_entry = private->entries[curcpu]; | ||
| 1185 | xt_info_wrlock(curcpu); | ||
| 1238 | ARPT_ENTRY_ITERATE(loc_cpu_entry, | 1186 | ARPT_ENTRY_ITERATE(loc_cpu_entry, |
| 1239 | private->size, | 1187 | private->size, |
| 1240 | add_counter_to_entry, | 1188 | add_counter_to_entry, |
| 1241 | paddc, | 1189 | paddc, |
| 1242 | &i); | 1190 | &i); |
| 1243 | preempt_enable(); | 1191 | xt_info_wrunlock(curcpu); |
| 1244 | unlock_up_free: | 1192 | unlock_up_free: |
| 1245 | mutex_unlock(&t->lock); | 1193 | local_bh_enable(); |
| 1246 | |||
| 1247 | xt_table_unlock(t); | 1194 | xt_table_unlock(t); |
| 1248 | module_put(t->me); | 1195 | module_put(t->me); |
| 1249 | free: | 1196 | free: |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 810c0b62c7d4..2ec8d7290c40 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
| @@ -338,10 +338,9 @@ ipt_do_table(struct sk_buff *skb, | |||
| 338 | tgpar.hooknum = hook; | 338 | tgpar.hooknum = hook; |
| 339 | 339 | ||
| 340 | IP_NF_ASSERT(table->valid_hooks & (1 << hook)); | 340 | IP_NF_ASSERT(table->valid_hooks & (1 << hook)); |
| 341 | 341 | xt_info_rdlock_bh(); | |
| 342 | rcu_read_lock_bh(); | 342 | private = table->private; |
| 343 | private = rcu_dereference(table->private); | 343 | table_base = private->entries[smp_processor_id()]; |
| 344 | table_base = rcu_dereference(private->entries[smp_processor_id()]); | ||
| 345 | 344 | ||
| 346 | e = get_entry(table_base, private->hook_entry[hook]); | 345 | e = get_entry(table_base, private->hook_entry[hook]); |
| 347 | 346 | ||
| @@ -436,8 +435,7 @@ ipt_do_table(struct sk_buff *skb, | |||
| 436 | e = (void *)e + e->next_offset; | 435 | e = (void *)e + e->next_offset; |
| 437 | } | 436 | } |
| 438 | } while (!hotdrop); | 437 | } while (!hotdrop); |
| 439 | 438 | xt_info_rdunlock_bh(); | |
| 440 | rcu_read_unlock_bh(); | ||
| 441 | 439 | ||
| 442 | #ifdef DEBUG_ALLOW_ALL | 440 | #ifdef DEBUG_ALLOW_ALL |
| 443 | return NF_ACCEPT; | 441 | return NF_ACCEPT; |
| @@ -896,10 +894,13 @@ get_counters(const struct xt_table_info *t, | |||
| 896 | 894 | ||
| 897 | /* Instead of clearing (by a previous call to memset()) | 895 | /* Instead of clearing (by a previous call to memset()) |
| 898 | * the counters and using adds, we set the counters | 896 | * the counters and using adds, we set the counters |
| 899 | * with data used by 'current' CPU | 897 | * with data used by 'current' CPU. |
| 900 | * We dont care about preemption here. | 898 | * |
| 899 | * Bottom half has to be disabled to prevent deadlock | ||
| 900 | * if new softirq were to run and call ipt_do_table | ||
| 901 | */ | 901 | */ |
| 902 | curcpu = raw_smp_processor_id(); | 902 | local_bh_disable(); |
| 903 | curcpu = smp_processor_id(); | ||
| 903 | 904 | ||
| 904 | i = 0; | 905 | i = 0; |
| 905 | IPT_ENTRY_ITERATE(t->entries[curcpu], | 906 | IPT_ENTRY_ITERATE(t->entries[curcpu], |
| @@ -912,74 +913,22 @@ get_counters(const struct xt_table_info *t, | |||
| 912 | if (cpu == curcpu) | 913 | if (cpu == curcpu) |
| 913 | continue; | 914 | continue; |
| 914 | i = 0; | 915 | i = 0; |
| 916 | xt_info_wrlock(cpu); | ||
| 915 | IPT_ENTRY_ITERATE(t->entries[cpu], | 917 | IPT_ENTRY_ITERATE(t->entries[cpu], |
| 916 | t->size, | 918 | t->size, |
| 917 | add_entry_to_counter, | 919 | add_entry_to_counter, |
| 918 | counters, | 920 | counters, |
| 919 | &i); | 921 | &i); |
| 922 | xt_info_wrunlock(cpu); | ||
| 920 | } | 923 | } |
| 921 | |||
| 922 | } | ||
| 923 | |||
| 924 | /* We're lazy, and add to the first CPU; overflow works its fey magic | ||
| 925 | * and everything is OK. */ | ||
| 926 | static int | ||
| 927 | add_counter_to_entry(struct ipt_entry *e, | ||
| 928 | const struct xt_counters addme[], | ||
| 929 | unsigned int *i) | ||
| 930 | { | ||
| 931 | ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); | ||
| 932 | |||
| 933 | (*i)++; | ||
| 934 | return 0; | ||
| 935 | } | ||
| 936 | |||
| 937 | /* Take values from counters and add them back onto the current cpu */ | ||
| 938 | static void put_counters(struct xt_table_info *t, | ||
| 939 | const struct xt_counters counters[]) | ||
| 940 | { | ||
| 941 | unsigned int i, cpu; | ||
| 942 | |||
| 943 | local_bh_disable(); | ||
| 944 | cpu = smp_processor_id(); | ||
| 945 | i = 0; | ||
| 946 | IPT_ENTRY_ITERATE(t->entries[cpu], | ||
| 947 | t->size, | ||
| 948 | add_counter_to_entry, | ||
| 949 | counters, | ||
| 950 | &i); | ||
| 951 | local_bh_enable(); | 924 | local_bh_enable(); |
| 952 | } | 925 | } |
| 953 | 926 | ||
| 954 | |||
| 955 | static inline int | ||
| 956 | zero_entry_counter(struct ipt_entry *e, void *arg) | ||
| 957 | { | ||
| 958 | e->counters.bcnt = 0; | ||
| 959 | e->counters.pcnt = 0; | ||
| 960 | return 0; | ||
| 961 | } | ||
| 962 | |||
| 963 | static void | ||
| 964 | clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) | ||
| 965 | { | ||
| 966 | unsigned int cpu; | ||
| 967 | const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; | ||
| 968 | |||
| 969 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | ||
| 970 | for_each_possible_cpu(cpu) { | ||
| 971 | memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); | ||
| 972 | IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, | ||
| 973 | zero_entry_counter, NULL); | ||
| 974 | } | ||
| 975 | } | ||
| 976 | |||
| 977 | static struct xt_counters * alloc_counters(struct xt_table *table) | 927 | static struct xt_counters * alloc_counters(struct xt_table *table) |
| 978 | { | 928 | { |
| 979 | unsigned int countersize; | 929 | unsigned int countersize; |
| 980 | struct xt_counters *counters; | 930 | struct xt_counters *counters; |
| 981 | struct xt_table_info *private = table->private; | 931 | struct xt_table_info *private = table->private; |
| 982 | struct xt_table_info *info; | ||
| 983 | 932 | ||
| 984 | /* We need atomic snapshot of counters: rest doesn't change | 933 | /* We need atomic snapshot of counters: rest doesn't change |
| 985 | (other than comefrom, which userspace doesn't care | 934 | (other than comefrom, which userspace doesn't care |
| @@ -988,30 +937,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table) | |||
| 988 | counters = vmalloc_node(countersize, numa_node_id()); | 937 | counters = vmalloc_node(countersize, numa_node_id()); |
| 989 | 938 | ||
| 990 | if (counters == NULL) | 939 | if (counters == NULL) |
| 991 | goto nomem; | 940 | return ERR_PTR(-ENOMEM); |
| 992 | 941 | ||
| 993 | info = xt_alloc_table_info(private->size); | 942 | get_counters(private, counters); |
| 994 | if (!info) | ||
| 995 | goto free_counters; | ||
| 996 | |||
| 997 | clone_counters(info, private); | ||
| 998 | |||
| 999 | mutex_lock(&table->lock); | ||
| 1000 | xt_table_entry_swap_rcu(private, info); | ||
| 1001 | synchronize_net(); /* Wait until smoke has cleared */ | ||
| 1002 | |||
| 1003 | get_counters(info, counters); | ||
| 1004 | put_counters(private, counters); | ||
| 1005 | mutex_unlock(&table->lock); | ||
| 1006 | |||
| 1007 | xt_free_table_info(info); | ||
| 1008 | 943 | ||
| 1009 | return counters; | 944 | return counters; |
| 1010 | |||
| 1011 | free_counters: | ||
| 1012 | vfree(counters); | ||
| 1013 | nomem: | ||
| 1014 | return ERR_PTR(-ENOMEM); | ||
| 1015 | } | 945 | } |
| 1016 | 946 | ||
| 1017 | static int | 947 | static int |
| @@ -1306,8 +1236,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, | |||
| 1306 | (newinfo->number <= oldinfo->initial_entries)) | 1236 | (newinfo->number <= oldinfo->initial_entries)) |
| 1307 | module_put(t->me); | 1237 | module_put(t->me); |
| 1308 | 1238 | ||
| 1309 | /* Get the old counters. */ | 1239 | /* Get the old counters, and synchronize with replace */ |
| 1310 | get_counters(oldinfo, counters); | 1240 | get_counters(oldinfo, counters); |
| 1241 | |||
| 1311 | /* Decrease module usage counts and free resource */ | 1242 | /* Decrease module usage counts and free resource */ |
| 1312 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; | 1243 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; |
| 1313 | IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, | 1244 | IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, |
| @@ -1377,11 +1308,23 @@ do_replace(struct net *net, void __user *user, unsigned int len) | |||
| 1377 | return ret; | 1308 | return ret; |
| 1378 | } | 1309 | } |
| 1379 | 1310 | ||
| 1311 | /* We're lazy, and add to the first CPU; overflow works its fey magic | ||
| 1312 | * and everything is OK. */ | ||
| 1313 | static int | ||
| 1314 | add_counter_to_entry(struct ipt_entry *e, | ||
| 1315 | const struct xt_counters addme[], | ||
| 1316 | unsigned int *i) | ||
| 1317 | { | ||
| 1318 | ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); | ||
| 1319 | |||
| 1320 | (*i)++; | ||
| 1321 | return 0; | ||
| 1322 | } | ||
| 1380 | 1323 | ||
| 1381 | static int | 1324 | static int |
| 1382 | do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) | 1325 | do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) |
| 1383 | { | 1326 | { |
| 1384 | unsigned int i; | 1327 | unsigned int i, curcpu; |
| 1385 | struct xt_counters_info tmp; | 1328 | struct xt_counters_info tmp; |
| 1386 | struct xt_counters *paddc; | 1329 | struct xt_counters *paddc; |
| 1387 | unsigned int num_counters; | 1330 | unsigned int num_counters; |
| @@ -1437,25 +1380,26 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat | |||
| 1437 | goto free; | 1380 | goto free; |
| 1438 | } | 1381 | } |
| 1439 | 1382 | ||
| 1440 | mutex_lock(&t->lock); | 1383 | local_bh_disable(); |
| 1441 | private = t->private; | 1384 | private = t->private; |
| 1442 | if (private->number != num_counters) { | 1385 | if (private->number != num_counters) { |
| 1443 | ret = -EINVAL; | 1386 | ret = -EINVAL; |
| 1444 | goto unlock_up_free; | 1387 | goto unlock_up_free; |
| 1445 | } | 1388 | } |
| 1446 | 1389 | ||
| 1447 | preempt_disable(); | ||
| 1448 | i = 0; | 1390 | i = 0; |
| 1449 | /* Choose the copy that is on our node */ | 1391 | /* Choose the copy that is on our node */ |
| 1450 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; | 1392 | curcpu = smp_processor_id(); |
| 1393 | loc_cpu_entry = private->entries[curcpu]; | ||
| 1394 | xt_info_wrlock(curcpu); | ||
| 1451 | IPT_ENTRY_ITERATE(loc_cpu_entry, | 1395 | IPT_ENTRY_ITERATE(loc_cpu_entry, |
| 1452 | private->size, | 1396 | private->size, |
| 1453 | add_counter_to_entry, | 1397 | add_counter_to_entry, |
| 1454 | paddc, | 1398 | paddc, |
| 1455 | &i); | 1399 | &i); |
| 1456 | preempt_enable(); | 1400 | xt_info_wrunlock(curcpu); |
| 1457 | unlock_up_free: | 1401 | unlock_up_free: |
| 1458 | mutex_unlock(&t->lock); | 1402 | local_bh_enable(); |
| 1459 | xt_table_unlock(t); | 1403 | xt_table_unlock(t); |
| 1460 | module_put(t->me); | 1404 | module_put(t->me); |
| 1461 | free: | 1405 | free: |
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 800ae8542471..219e165aea10 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c | |||
| @@ -365,9 +365,9 @@ ip6t_do_table(struct sk_buff *skb, | |||
| 365 | 365 | ||
| 366 | IP_NF_ASSERT(table->valid_hooks & (1 << hook)); | 366 | IP_NF_ASSERT(table->valid_hooks & (1 << hook)); |
| 367 | 367 | ||
| 368 | rcu_read_lock_bh(); | 368 | xt_info_rdlock_bh(); |
| 369 | private = rcu_dereference(table->private); | 369 | private = table->private; |
| 370 | table_base = rcu_dereference(private->entries[smp_processor_id()]); | 370 | table_base = private->entries[smp_processor_id()]; |
| 371 | 371 | ||
| 372 | e = get_entry(table_base, private->hook_entry[hook]); | 372 | e = get_entry(table_base, private->hook_entry[hook]); |
| 373 | 373 | ||
| @@ -466,7 +466,7 @@ ip6t_do_table(struct sk_buff *skb, | |||
| 466 | #ifdef CONFIG_NETFILTER_DEBUG | 466 | #ifdef CONFIG_NETFILTER_DEBUG |
| 467 | ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; | 467 | ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; |
| 468 | #endif | 468 | #endif |
| 469 | rcu_read_unlock_bh(); | 469 | xt_info_rdunlock_bh(); |
| 470 | 470 | ||
| 471 | #ifdef DEBUG_ALLOW_ALL | 471 | #ifdef DEBUG_ALLOW_ALL |
| 472 | return NF_ACCEPT; | 472 | return NF_ACCEPT; |
| @@ -926,9 +926,12 @@ get_counters(const struct xt_table_info *t, | |||
| 926 | /* Instead of clearing (by a previous call to memset()) | 926 | /* Instead of clearing (by a previous call to memset()) |
| 927 | * the counters and using adds, we set the counters | 927 | * the counters and using adds, we set the counters |
| 928 | * with data used by 'current' CPU | 928 | * with data used by 'current' CPU |
| 929 | * We dont care about preemption here. | 929 | * |
| 930 | * Bottom half has to be disabled to prevent deadlock | ||
| 931 | * if new softirq were to run and call ipt_do_table | ||
| 930 | */ | 932 | */ |
| 931 | curcpu = raw_smp_processor_id(); | 933 | local_bh_disable(); |
| 934 | curcpu = smp_processor_id(); | ||
| 932 | 935 | ||
| 933 | i = 0; | 936 | i = 0; |
| 934 | IP6T_ENTRY_ITERATE(t->entries[curcpu], | 937 | IP6T_ENTRY_ITERATE(t->entries[curcpu], |
| @@ -941,72 +944,22 @@ get_counters(const struct xt_table_info *t, | |||
| 941 | if (cpu == curcpu) | 944 | if (cpu == curcpu) |
| 942 | continue; | 945 | continue; |
| 943 | i = 0; | 946 | i = 0; |
| 947 | xt_info_wrlock(cpu); | ||
| 944 | IP6T_ENTRY_ITERATE(t->entries[cpu], | 948 | IP6T_ENTRY_ITERATE(t->entries[cpu], |
| 945 | t->size, | 949 | t->size, |
| 946 | add_entry_to_counter, | 950 | add_entry_to_counter, |
| 947 | counters, | 951 | counters, |
| 948 | &i); | 952 | &i); |
| 953 | xt_info_wrunlock(cpu); | ||
| 949 | } | 954 | } |
| 950 | } | ||
| 951 | |||
| 952 | /* We're lazy, and add to the first CPU; overflow works its fey magic | ||
| 953 | * and everything is OK. */ | ||
| 954 | static int | ||
| 955 | add_counter_to_entry(struct ip6t_entry *e, | ||
| 956 | const struct xt_counters addme[], | ||
| 957 | unsigned int *i) | ||
| 958 | { | ||
| 959 | ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); | ||
| 960 | |||
| 961 | (*i)++; | ||
| 962 | return 0; | ||
| 963 | } | ||
| 964 | |||
| 965 | /* Take values from counters and add them back onto the current cpu */ | ||
| 966 | static void put_counters(struct xt_table_info *t, | ||
| 967 | const struct xt_counters counters[]) | ||
| 968 | { | ||
| 969 | unsigned int i, cpu; | ||
| 970 | |||
| 971 | local_bh_disable(); | ||
| 972 | cpu = smp_processor_id(); | ||
| 973 | i = 0; | ||
| 974 | IP6T_ENTRY_ITERATE(t->entries[cpu], | ||
| 975 | t->size, | ||
| 976 | add_counter_to_entry, | ||
| 977 | counters, | ||
| 978 | &i); | ||
| 979 | local_bh_enable(); | 955 | local_bh_enable(); |
| 980 | } | 956 | } |
| 981 | 957 | ||
| 982 | static inline int | ||
| 983 | zero_entry_counter(struct ip6t_entry *e, void *arg) | ||
| 984 | { | ||
| 985 | e->counters.bcnt = 0; | ||
| 986 | e->counters.pcnt = 0; | ||
| 987 | return 0; | ||
| 988 | } | ||
| 989 | |||
| 990 | static void | ||
| 991 | clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) | ||
| 992 | { | ||
| 993 | unsigned int cpu; | ||
| 994 | const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; | ||
| 995 | |||
| 996 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | ||
| 997 | for_each_possible_cpu(cpu) { | ||
| 998 | memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); | ||
| 999 | IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, | ||
| 1000 | zero_entry_counter, NULL); | ||
| 1001 | } | ||
| 1002 | } | ||
| 1003 | |||
| 1004 | static struct xt_counters *alloc_counters(struct xt_table *table) | 958 | static struct xt_counters *alloc_counters(struct xt_table *table) |
| 1005 | { | 959 | { |
| 1006 | unsigned int countersize; | 960 | unsigned int countersize; |
| 1007 | struct xt_counters *counters; | 961 | struct xt_counters *counters; |
| 1008 | struct xt_table_info *private = table->private; | 962 | struct xt_table_info *private = table->private; |
| 1009 | struct xt_table_info *info; | ||
| 1010 | 963 | ||
| 1011 | /* We need atomic snapshot of counters: rest doesn't change | 964 | /* We need atomic snapshot of counters: rest doesn't change |
| 1012 | (other than comefrom, which userspace doesn't care | 965 | (other than comefrom, which userspace doesn't care |
| @@ -1015,30 +968,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) | |||
| 1015 | counters = vmalloc_node(countersize, numa_node_id()); | 968 | counters = vmalloc_node(countersize, numa_node_id()); |
| 1016 | 969 | ||
| 1017 | if (counters == NULL) | 970 | if (counters == NULL) |
| 1018 | goto nomem; | 971 | return ERR_PTR(-ENOMEM); |
| 1019 | 972 | ||
| 1020 | info = xt_alloc_table_info(private->size); | 973 | get_counters(private, counters); |
| 1021 | if (!info) | ||
| 1022 | goto free_counters; | ||
| 1023 | |||
| 1024 | clone_counters(info, private); | ||
| 1025 | |||
| 1026 | mutex_lock(&table->lock); | ||
| 1027 | xt_table_entry_swap_rcu(private, info); | ||
| 1028 | synchronize_net(); /* Wait until smoke has cleared */ | ||
| 1029 | |||
| 1030 | get_counters(info, counters); | ||
| 1031 | put_counters(private, counters); | ||
| 1032 | mutex_unlock(&table->lock); | ||
| 1033 | |||
| 1034 | xt_free_table_info(info); | ||
| 1035 | 974 | ||
| 1036 | return counters; | 975 | return counters; |
| 1037 | |||
| 1038 | free_counters: | ||
| 1039 | vfree(counters); | ||
| 1040 | nomem: | ||
| 1041 | return ERR_PTR(-ENOMEM); | ||
| 1042 | } | 976 | } |
| 1043 | 977 | ||
| 1044 | static int | 978 | static int |
| @@ -1334,8 +1268,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, | |||
| 1334 | (newinfo->number <= oldinfo->initial_entries)) | 1268 | (newinfo->number <= oldinfo->initial_entries)) |
| 1335 | module_put(t->me); | 1269 | module_put(t->me); |
| 1336 | 1270 | ||
| 1337 | /* Get the old counters. */ | 1271 | /* Get the old counters, and synchronize with replace */ |
| 1338 | get_counters(oldinfo, counters); | 1272 | get_counters(oldinfo, counters); |
| 1273 | |||
| 1339 | /* Decrease module usage counts and free resource */ | 1274 | /* Decrease module usage counts and free resource */ |
| 1340 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; | 1275 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; |
| 1341 | IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, | 1276 | IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, |
| @@ -1405,11 +1340,24 @@ do_replace(struct net *net, void __user *user, unsigned int len) | |||
| 1405 | return ret; | 1340 | return ret; |
| 1406 | } | 1341 | } |
| 1407 | 1342 | ||
| 1343 | /* We're lazy, and add to the first CPU; overflow works its fey magic | ||
| 1344 | * and everything is OK. */ | ||
| 1345 | static int | ||
| 1346 | add_counter_to_entry(struct ip6t_entry *e, | ||
| 1347 | const struct xt_counters addme[], | ||
| 1348 | unsigned int *i) | ||
| 1349 | { | ||
| 1350 | ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); | ||
| 1351 | |||
| 1352 | (*i)++; | ||
| 1353 | return 0; | ||
| 1354 | } | ||
| 1355 | |||
| 1408 | static int | 1356 | static int |
| 1409 | do_add_counters(struct net *net, void __user *user, unsigned int len, | 1357 | do_add_counters(struct net *net, void __user *user, unsigned int len, |
| 1410 | int compat) | 1358 | int compat) |
| 1411 | { | 1359 | { |
| 1412 | unsigned int i; | 1360 | unsigned int i, curcpu; |
| 1413 | struct xt_counters_info tmp; | 1361 | struct xt_counters_info tmp; |
| 1414 | struct xt_counters *paddc; | 1362 | struct xt_counters *paddc; |
| 1415 | unsigned int num_counters; | 1363 | unsigned int num_counters; |
| @@ -1465,25 +1413,28 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, | |||
| 1465 | goto free; | 1413 | goto free; |
| 1466 | } | 1414 | } |
| 1467 | 1415 | ||
| 1468 | mutex_lock(&t->lock); | 1416 | |
| 1417 | local_bh_disable(); | ||
| 1469 | private = t->private; | 1418 | private = t->private; |
| 1470 | if (private->number != num_counters) { | 1419 | if (private->number != num_counters) { |
| 1471 | ret = -EINVAL; | 1420 | ret = -EINVAL; |
| 1472 | goto unlock_up_free; | 1421 | goto unlock_up_free; |
| 1473 | } | 1422 | } |
| 1474 | 1423 | ||
| 1475 | preempt_disable(); | ||
| 1476 | i = 0; | 1424 | i = 0; |
| 1477 | /* Choose the copy that is on our node */ | 1425 | /* Choose the copy that is on our node */ |
| 1478 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; | 1426 | curcpu = smp_processor_id(); |
| 1427 | xt_info_wrlock(curcpu); | ||
| 1428 | loc_cpu_entry = private->entries[curcpu]; | ||
| 1479 | IP6T_ENTRY_ITERATE(loc_cpu_entry, | 1429 | IP6T_ENTRY_ITERATE(loc_cpu_entry, |
| 1480 | private->size, | 1430 | private->size, |
| 1481 | add_counter_to_entry, | 1431 | add_counter_to_entry, |
| 1482 | paddc, | 1432 | paddc, |
| 1483 | &i); | 1433 | &i); |
| 1484 | preempt_enable(); | 1434 | xt_info_wrunlock(curcpu); |
| 1435 | |||
| 1485 | unlock_up_free: | 1436 | unlock_up_free: |
| 1486 | mutex_unlock(&t->lock); | 1437 | local_bh_enable(); |
| 1487 | xt_table_unlock(t); | 1438 | xt_table_unlock(t); |
| 1488 | module_put(t->me); | 1439 | module_put(t->me); |
| 1489 | free: | 1440 | free: |
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 509a95621f9f..150e5cf62f85 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c | |||
| @@ -625,20 +625,6 @@ void xt_free_table_info(struct xt_table_info *info) | |||
| 625 | } | 625 | } |
| 626 | EXPORT_SYMBOL(xt_free_table_info); | 626 | EXPORT_SYMBOL(xt_free_table_info); |
| 627 | 627 | ||
| 628 | void xt_table_entry_swap_rcu(struct xt_table_info *oldinfo, | ||
| 629 | struct xt_table_info *newinfo) | ||
| 630 | { | ||
| 631 | unsigned int cpu; | ||
| 632 | |||
| 633 | for_each_possible_cpu(cpu) { | ||
| 634 | void *p = oldinfo->entries[cpu]; | ||
| 635 | rcu_assign_pointer(oldinfo->entries[cpu], newinfo->entries[cpu]); | ||
| 636 | newinfo->entries[cpu] = p; | ||
| 637 | } | ||
| 638 | |||
| 639 | } | ||
| 640 | EXPORT_SYMBOL_GPL(xt_table_entry_swap_rcu); | ||
| 641 | |||
| 642 | /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ | 628 | /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ |
| 643 | struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, | 629 | struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, |
| 644 | const char *name) | 630 | const char *name) |
| @@ -676,32 +662,43 @@ void xt_compat_unlock(u_int8_t af) | |||
| 676 | EXPORT_SYMBOL_GPL(xt_compat_unlock); | 662 | EXPORT_SYMBOL_GPL(xt_compat_unlock); |
| 677 | #endif | 663 | #endif |
| 678 | 664 | ||
| 665 | DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks); | ||
| 666 | EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks); | ||
| 667 | |||
| 668 | |||
| 679 | struct xt_table_info * | 669 | struct xt_table_info * |
| 680 | xt_replace_table(struct xt_table *table, | 670 | xt_replace_table(struct xt_table *table, |
| 681 | unsigned int num_counters, | 671 | unsigned int num_counters, |
| 682 | struct xt_table_info *newinfo, | 672 | struct xt_table_info *newinfo, |
| 683 | int *error) | 673 | int *error) |
| 684 | { | 674 | { |
| 685 | struct xt_table_info *oldinfo, *private; | 675 | struct xt_table_info *private; |
| 686 | 676 | ||
| 687 | /* Do the substitution. */ | 677 | /* Do the substitution. */ |
| 688 | mutex_lock(&table->lock); | 678 | local_bh_disable(); |
| 689 | private = table->private; | 679 | private = table->private; |
| 680 | |||
| 690 | /* Check inside lock: is the old number correct? */ | 681 | /* Check inside lock: is the old number correct? */ |
| 691 | if (num_counters != private->number) { | 682 | if (num_counters != private->number) { |
| 692 | duprintf("num_counters != table->private->number (%u/%u)\n", | 683 | duprintf("num_counters != table->private->number (%u/%u)\n", |
| 693 | num_counters, private->number); | 684 | num_counters, private->number); |
| 694 | mutex_unlock(&table->lock); | 685 | local_bh_enable(); |
| 695 | *error = -EAGAIN; | 686 | *error = -EAGAIN; |
| 696 | return NULL; | 687 | return NULL; |
| 697 | } | 688 | } |
| 698 | oldinfo = private; | ||
| 699 | rcu_assign_pointer(table->private, newinfo); | ||
| 700 | newinfo->initial_entries = oldinfo->initial_entries; | ||
| 701 | mutex_unlock(&table->lock); | ||
| 702 | 689 | ||
| 703 | synchronize_net(); | 690 | table->private = newinfo; |
| 704 | return oldinfo; | 691 | newinfo->initial_entries = private->initial_entries; |
| 692 | |||
| 693 | /* | ||
| 694 | * Even though table entries have now been swapped, other CPU's | ||
| 695 | * may still be using the old entries. This is okay, because | ||
| 696 | * resynchronization happens because of the locking done | ||
| 697 | * during the get_counters() routine. | ||
| 698 | */ | ||
| 699 | local_bh_enable(); | ||
| 700 | |||
| 701 | return private; | ||
| 705 | } | 702 | } |
| 706 | EXPORT_SYMBOL_GPL(xt_replace_table); | 703 | EXPORT_SYMBOL_GPL(xt_replace_table); |
| 707 | 704 | ||
| @@ -734,7 +731,6 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table *table, | |||
| 734 | 731 | ||
| 735 | /* Simplifies replace_table code. */ | 732 | /* Simplifies replace_table code. */ |
| 736 | table->private = bootstrap; | 733 | table->private = bootstrap; |
| 737 | mutex_init(&table->lock); | ||
| 738 | 734 | ||
| 739 | if (!xt_replace_table(table, 0, newinfo, &ret)) | 735 | if (!xt_replace_table(table, 0, newinfo, &ret)) |
| 740 | goto unlock; | 736 | goto unlock; |
| @@ -1147,7 +1143,14 @@ static struct pernet_operations xt_net_ops = { | |||
| 1147 | 1143 | ||
| 1148 | static int __init xt_init(void) | 1144 | static int __init xt_init(void) |
| 1149 | { | 1145 | { |
| 1150 | int i, rv; | 1146 | unsigned int i; |
| 1147 | int rv; | ||
| 1148 | |||
| 1149 | for_each_possible_cpu(i) { | ||
| 1150 | struct xt_info_lock *lock = &per_cpu(xt_info_locks, i); | ||
| 1151 | spin_lock_init(&lock->lock); | ||
| 1152 | lock->readers = 0; | ||
| 1153 | } | ||
| 1151 | 1154 | ||
| 1152 | xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL); | 1155 | xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL); |
| 1153 | if (!xt) | 1156 | if (!xt) |
