aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2005-12-14 02:13:48 -0500
committerDavid S. Miller <davem@sunset.davemloft.net>2006-01-03 16:10:29 -0500
commit318360646941d6f3d4c6e4ee99107392728a4079 (patch)
tree26ab4ddc68f917dd4e8813ace504956620eba3a8
parentdf3271f3361b61ce02da0026b4a53e63bc2720cb (diff)
[NETFILTER] ip_tables: NUMA-aware allocation
Part of a performance problem with ip_tables is that memory allocation is not NUMA aware, but 'only' SMP aware (ie each CPU normally touch separate cache lines) Even with small iptables rules, the cost of this misplacement can be high on common workloads. Instead of using one vmalloc() area (located in the node of the iptables process), we now allocate an area for each possible CPU, using vmalloc_node() so that memory should be allocated in the CPU's node if possible. Port to arp_tables and ip6_tables by Harald Welte. Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/ipv4/netfilter/arp_tables.c175
-rw-r--r--net/ipv4/netfilter/ip_tables.c199
-rw-r--r--net/ipv6/netfilter/ip6_tables.c190
3 files changed, 382 insertions, 182 deletions
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3c2e9639bba6..bba156304695 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -68,19 +68,14 @@ struct arpt_table_info {
68 unsigned int initial_entries; 68 unsigned int initial_entries;
69 unsigned int hook_entry[NF_ARP_NUMHOOKS]; 69 unsigned int hook_entry[NF_ARP_NUMHOOKS];
70 unsigned int underflow[NF_ARP_NUMHOOKS]; 70 unsigned int underflow[NF_ARP_NUMHOOKS];
71 char entries[0] __attribute__((aligned(SMP_CACHE_BYTES))); 71 void *entries[NR_CPUS];
72}; 72};
73 73
74static LIST_HEAD(arpt_target); 74static LIST_HEAD(arpt_target);
75static LIST_HEAD(arpt_tables); 75static LIST_HEAD(arpt_tables);
76#define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
76#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) 77#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
77 78
78#ifdef CONFIG_SMP
79#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
80#else
81#define TABLE_OFFSET(t,p) 0
82#endif
83
84static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, 79static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
85 char *hdr_addr, int len) 80 char *hdr_addr, int len)
86{ 81{
@@ -269,9 +264,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
269 outdev = out ? out->name : nulldevname; 264 outdev = out ? out->name : nulldevname;
270 265
271 read_lock_bh(&table->lock); 266 read_lock_bh(&table->lock);
272 table_base = (void *)table->private->entries 267 table_base = (void *)table->private->entries[smp_processor_id()];
273 + TABLE_OFFSET(table->private,
274 smp_processor_id());
275 e = get_entry(table_base, table->private->hook_entry[hook]); 268 e = get_entry(table_base, table->private->hook_entry[hook]);
276 back = get_entry(table_base, table->private->underflow[hook]); 269 back = get_entry(table_base, table->private->underflow[hook]);
277 270
@@ -462,7 +455,8 @@ static inline int unconditional(const struct arpt_arp *arp)
462/* Figures out from what hook each rule can be called: returns 0 if 455/* Figures out from what hook each rule can be called: returns 0 if
463 * there are loops. Puts hook bitmask in comefrom. 456 * there are loops. Puts hook bitmask in comefrom.
464 */ 457 */
465static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int valid_hooks) 458static int mark_source_chains(struct arpt_table_info *newinfo,
459 unsigned int valid_hooks, void *entry0)
466{ 460{
467 unsigned int hook; 461 unsigned int hook;
468 462
@@ -472,7 +466,7 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali
472 for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) { 466 for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) {
473 unsigned int pos = newinfo->hook_entry[hook]; 467 unsigned int pos = newinfo->hook_entry[hook];
474 struct arpt_entry *e 468 struct arpt_entry *e
475 = (struct arpt_entry *)(newinfo->entries + pos); 469 = (struct arpt_entry *)(entry0 + pos);
476 470
477 if (!(valid_hooks & (1 << hook))) 471 if (!(valid_hooks & (1 << hook)))
478 continue; 472 continue;
@@ -514,13 +508,13 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali
514 goto next; 508 goto next;
515 509
516 e = (struct arpt_entry *) 510 e = (struct arpt_entry *)
517 (newinfo->entries + pos); 511 (entry0 + pos);
518 } while (oldpos == pos + e->next_offset); 512 } while (oldpos == pos + e->next_offset);
519 513
520 /* Move along one */ 514 /* Move along one */
521 size = e->next_offset; 515 size = e->next_offset;
522 e = (struct arpt_entry *) 516 e = (struct arpt_entry *)
523 (newinfo->entries + pos + size); 517 (entry0 + pos + size);
524 e->counters.pcnt = pos; 518 e->counters.pcnt = pos;
525 pos += size; 519 pos += size;
526 } else { 520 } else {
@@ -537,7 +531,7 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali
537 newpos = pos + e->next_offset; 531 newpos = pos + e->next_offset;
538 } 532 }
539 e = (struct arpt_entry *) 533 e = (struct arpt_entry *)
540 (newinfo->entries + newpos); 534 (entry0 + newpos);
541 e->counters.pcnt = pos; 535 e->counters.pcnt = pos;
542 pos = newpos; 536 pos = newpos;
543 } 537 }
@@ -689,6 +683,7 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
689static int translate_table(const char *name, 683static int translate_table(const char *name,
690 unsigned int valid_hooks, 684 unsigned int valid_hooks,
691 struct arpt_table_info *newinfo, 685 struct arpt_table_info *newinfo,
686 void *entry0,
692 unsigned int size, 687 unsigned int size,
693 unsigned int number, 688 unsigned int number,
694 const unsigned int *hook_entries, 689 const unsigned int *hook_entries,
@@ -710,11 +705,11 @@ static int translate_table(const char *name,
710 i = 0; 705 i = 0;
711 706
712 /* Walk through entries, checking offsets. */ 707 /* Walk through entries, checking offsets. */
713 ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, 708 ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
714 check_entry_size_and_hooks, 709 check_entry_size_and_hooks,
715 newinfo, 710 newinfo,
716 newinfo->entries, 711 entry0,
717 newinfo->entries + size, 712 entry0 + size,
718 hook_entries, underflows, &i); 713 hook_entries, underflows, &i);
719 duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); 714 duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
720 if (ret != 0) 715 if (ret != 0)
@@ -743,29 +738,26 @@ static int translate_table(const char *name,
743 } 738 }
744 } 739 }
745 740
746 if (!mark_source_chains(newinfo, valid_hooks)) { 741 if (!mark_source_chains(newinfo, valid_hooks, entry0)) {
747 duprintf("Looping hook\n"); 742 duprintf("Looping hook\n");
748 return -ELOOP; 743 return -ELOOP;
749 } 744 }
750 745
751 /* Finally, each sanity check must pass */ 746 /* Finally, each sanity check must pass */
752 i = 0; 747 i = 0;
753 ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, 748 ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
754 check_entry, name, size, &i); 749 check_entry, name, size, &i);
755 750
756 if (ret != 0) { 751 if (ret != 0) {
757 ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, 752 ARPT_ENTRY_ITERATE(entry0, newinfo->size,
758 cleanup_entry, &i); 753 cleanup_entry, &i);
759 return ret; 754 return ret;
760 } 755 }
761 756
762 /* And one copy for every other CPU */ 757 /* And one copy for every other CPU */
763 for_each_cpu(i) { 758 for_each_cpu(i) {
764 if (i == 0) 759 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
765 continue; 760 memcpy(newinfo->entries[i], entry0, newinfo->size);
766 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
767 newinfo->entries,
768 SMP_ALIGN(newinfo->size));
769 } 761 }
770 762
771 return ret; 763 return ret;
@@ -807,15 +799,42 @@ static inline int add_entry_to_counter(const struct arpt_entry *e,
807 return 0; 799 return 0;
808} 800}
809 801
802static inline int set_entry_to_counter(const struct arpt_entry *e,
803 struct arpt_counters total[],
804 unsigned int *i)
805{
806 SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
807
808 (*i)++;
809 return 0;
810}
811
810static void get_counters(const struct arpt_table_info *t, 812static void get_counters(const struct arpt_table_info *t,
811 struct arpt_counters counters[]) 813 struct arpt_counters counters[])
812{ 814{
813 unsigned int cpu; 815 unsigned int cpu;
814 unsigned int i; 816 unsigned int i;
817 unsigned int curcpu;
818
819 /* Instead of clearing (by a previous call to memset())
820 * the counters and using adds, we set the counters
821 * with data used by 'current' CPU
822 * We dont care about preemption here.
823 */
824 curcpu = raw_smp_processor_id();
825
826 i = 0;
827 ARPT_ENTRY_ITERATE(t->entries[curcpu],
828 t->size,
829 set_entry_to_counter,
830 counters,
831 &i);
815 832
816 for_each_cpu(cpu) { 833 for_each_cpu(cpu) {
834 if (cpu == curcpu)
835 continue;
817 i = 0; 836 i = 0;
818 ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), 837 ARPT_ENTRY_ITERATE(t->entries[cpu],
819 t->size, 838 t->size,
820 add_entry_to_counter, 839 add_entry_to_counter,
821 counters, 840 counters,
@@ -831,6 +850,7 @@ static int copy_entries_to_user(unsigned int total_size,
831 struct arpt_entry *e; 850 struct arpt_entry *e;
832 struct arpt_counters *counters; 851 struct arpt_counters *counters;
833 int ret = 0; 852 int ret = 0;
853 void *loc_cpu_entry;
834 854
835 /* We need atomic snapshot of counters: rest doesn't change 855 /* We need atomic snapshot of counters: rest doesn't change
836 * (other than comefrom, which userspace doesn't care 856 * (other than comefrom, which userspace doesn't care
@@ -843,13 +863,13 @@ static int copy_entries_to_user(unsigned int total_size,
843 return -ENOMEM; 863 return -ENOMEM;
844 864
845 /* First, sum counters... */ 865 /* First, sum counters... */
846 memset(counters, 0, countersize);
847 write_lock_bh(&table->lock); 866 write_lock_bh(&table->lock);
848 get_counters(table->private, counters); 867 get_counters(table->private, counters);
849 write_unlock_bh(&table->lock); 868 write_unlock_bh(&table->lock);
850 869
851 /* ... then copy entire thing from CPU 0... */ 870 loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
852 if (copy_to_user(userptr, table->private->entries, total_size) != 0) { 871 /* ... then copy entire thing ... */
872 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
853 ret = -EFAULT; 873 ret = -EFAULT;
854 goto free_counters; 874 goto free_counters;
855 } 875 }
@@ -859,7 +879,7 @@ static int copy_entries_to_user(unsigned int total_size,
859 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ 879 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
860 struct arpt_entry_target *t; 880 struct arpt_entry_target *t;
861 881
862 e = (struct arpt_entry *)(table->private->entries + off); 882 e = (struct arpt_entry *)(loc_cpu_entry + off);
863 if (copy_to_user(userptr + off 883 if (copy_to_user(userptr + off
864 + offsetof(struct arpt_entry, counters), 884 + offsetof(struct arpt_entry, counters),
865 &counters[num], 885 &counters[num],
@@ -911,6 +931,47 @@ static int get_entries(const struct arpt_get_entries *entries,
911 return ret; 931 return ret;
912} 932}
913 933
934static void free_table_info(struct arpt_table_info *info)
935{
936 int cpu;
937 for_each_cpu(cpu) {
938 if (info->size <= PAGE_SIZE)
939 kfree(info->entries[cpu]);
940 else
941 vfree(info->entries[cpu]);
942 }
943 kfree(info);
944}
945
946static struct arpt_table_info *alloc_table_info(unsigned int size)
947{
948 struct arpt_table_info *newinfo;
949 int cpu;
950
951 newinfo = kzalloc(sizeof(struct arpt_table_info), GFP_KERNEL);
952 if (!newinfo)
953 return NULL;
954
955 newinfo->size = size;
956
957 for_each_cpu(cpu) {
958 if (size <= PAGE_SIZE)
959 newinfo->entries[cpu] = kmalloc_node(size,
960 GFP_KERNEL,
961 cpu_to_node(cpu));
962 else
963 newinfo->entries[cpu] = vmalloc_node(size,
964 cpu_to_node(cpu));
965
966 if (newinfo->entries[cpu] == NULL) {
967 free_table_info(newinfo);
968 return NULL;
969 }
970 }
971
972 return newinfo;
973}
974
914static int do_replace(void __user *user, unsigned int len) 975static int do_replace(void __user *user, unsigned int len)
915{ 976{
916 int ret; 977 int ret;
@@ -918,6 +979,7 @@ static int do_replace(void __user *user, unsigned int len)
918 struct arpt_table *t; 979 struct arpt_table *t;
919 struct arpt_table_info *newinfo, *oldinfo; 980 struct arpt_table_info *newinfo, *oldinfo;
920 struct arpt_counters *counters; 981 struct arpt_counters *counters;
982 void *loc_cpu_entry, *loc_cpu_old_entry;
921 983
922 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 984 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
923 return -EFAULT; 985 return -EFAULT;
@@ -930,13 +992,13 @@ static int do_replace(void __user *user, unsigned int len)
930 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) 992 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
931 return -ENOMEM; 993 return -ENOMEM;
932 994
933 newinfo = vmalloc(sizeof(struct arpt_table_info) 995 newinfo = alloc_table_info(tmp.size);
934 + SMP_ALIGN(tmp.size) *
935 (highest_possible_processor_id()+1));
936 if (!newinfo) 996 if (!newinfo)
937 return -ENOMEM; 997 return -ENOMEM;
938 998
939 if (copy_from_user(newinfo->entries, user + sizeof(tmp), 999 /* choose the copy that is on our node/cpu */
1000 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1001 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
940 tmp.size) != 0) { 1002 tmp.size) != 0) {
941 ret = -EFAULT; 1003 ret = -EFAULT;
942 goto free_newinfo; 1004 goto free_newinfo;
@@ -947,10 +1009,9 @@ static int do_replace(void __user *user, unsigned int len)
947 ret = -ENOMEM; 1009 ret = -ENOMEM;
948 goto free_newinfo; 1010 goto free_newinfo;
949 } 1011 }
950 memset(counters, 0, tmp.num_counters * sizeof(struct arpt_counters));
951 1012
952 ret = translate_table(tmp.name, tmp.valid_hooks, 1013 ret = translate_table(tmp.name, tmp.valid_hooks,
953 newinfo, tmp.size, tmp.num_entries, 1014 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
954 tmp.hook_entry, tmp.underflow); 1015 tmp.hook_entry, tmp.underflow);
955 if (ret != 0) 1016 if (ret != 0)
956 goto free_newinfo_counters; 1017 goto free_newinfo_counters;
@@ -989,8 +1050,10 @@ static int do_replace(void __user *user, unsigned int len)
989 /* Get the old counters. */ 1050 /* Get the old counters. */
990 get_counters(oldinfo, counters); 1051 get_counters(oldinfo, counters);
991 /* Decrease module usage counts and free resource */ 1052 /* Decrease module usage counts and free resource */
992 ARPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); 1053 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
993 vfree(oldinfo); 1054 ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
1055
1056 free_table_info(oldinfo);
994 if (copy_to_user(tmp.counters, counters, 1057 if (copy_to_user(tmp.counters, counters,
995 sizeof(struct arpt_counters) * tmp.num_counters) != 0) 1058 sizeof(struct arpt_counters) * tmp.num_counters) != 0)
996 ret = -EFAULT; 1059 ret = -EFAULT;
@@ -1002,11 +1065,11 @@ static int do_replace(void __user *user, unsigned int len)
1002 module_put(t->me); 1065 module_put(t->me);
1003 up(&arpt_mutex); 1066 up(&arpt_mutex);
1004 free_newinfo_counters_untrans: 1067 free_newinfo_counters_untrans:
1005 ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry, NULL); 1068 ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1006 free_newinfo_counters: 1069 free_newinfo_counters:
1007 vfree(counters); 1070 vfree(counters);
1008 free_newinfo: 1071 free_newinfo:
1009 vfree(newinfo); 1072 free_table_info(newinfo);
1010 return ret; 1073 return ret;
1011} 1074}
1012 1075
@@ -1030,6 +1093,7 @@ static int do_add_counters(void __user *user, unsigned int len)
1030 struct arpt_counters_info tmp, *paddc; 1093 struct arpt_counters_info tmp, *paddc;
1031 struct arpt_table *t; 1094 struct arpt_table *t;
1032 int ret = 0; 1095 int ret = 0;
1096 void *loc_cpu_entry;
1033 1097
1034 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1098 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1035 return -EFAULT; 1099 return -EFAULT;
@@ -1059,7 +1123,9 @@ static int do_add_counters(void __user *user, unsigned int len)
1059 } 1123 }
1060 1124
1061 i = 0; 1125 i = 0;
1062 ARPT_ENTRY_ITERATE(t->private->entries, 1126 /* Choose the copy that is on our node */
1127 loc_cpu_entry = t->private->entries[smp_processor_id()];
1128 ARPT_ENTRY_ITERATE(loc_cpu_entry,
1063 t->private->size, 1129 t->private->size,
1064 add_counter_to_entry, 1130 add_counter_to_entry,
1065 paddc->counters, 1131 paddc->counters,
@@ -1220,30 +1286,32 @@ int arpt_register_table(struct arpt_table *table,
1220 struct arpt_table_info *newinfo; 1286 struct arpt_table_info *newinfo;
1221 static struct arpt_table_info bootstrap 1287 static struct arpt_table_info bootstrap
1222 = { 0, 0, 0, { 0 }, { 0 }, { } }; 1288 = { 0, 0, 0, { 0 }, { 0 }, { } };
1289 void *loc_cpu_entry;
1223 1290
1224 newinfo = vmalloc(sizeof(struct arpt_table_info) 1291 newinfo = alloc_table_info(repl->size);
1225 + SMP_ALIGN(repl->size) *
1226 (highest_possible_processor_id()+1));
1227 if (!newinfo) { 1292 if (!newinfo) {
1228 ret = -ENOMEM; 1293 ret = -ENOMEM;
1229 return ret; 1294 return ret;
1230 } 1295 }
1231 memcpy(newinfo->entries, repl->entries, repl->size); 1296
1297 /* choose the copy on our node/cpu */
1298 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1299 memcpy(loc_cpu_entry, repl->entries, repl->size);
1232 1300
1233 ret = translate_table(table->name, table->valid_hooks, 1301 ret = translate_table(table->name, table->valid_hooks,
1234 newinfo, repl->size, 1302 newinfo, loc_cpu_entry, repl->size,
1235 repl->num_entries, 1303 repl->num_entries,
1236 repl->hook_entry, 1304 repl->hook_entry,
1237 repl->underflow); 1305 repl->underflow);
1238 duprintf("arpt_register_table: translate table gives %d\n", ret); 1306 duprintf("arpt_register_table: translate table gives %d\n", ret);
1239 if (ret != 0) { 1307 if (ret != 0) {
1240 vfree(newinfo); 1308 free_table_info(newinfo);
1241 return ret; 1309 return ret;
1242 } 1310 }
1243 1311
1244 ret = down_interruptible(&arpt_mutex); 1312 ret = down_interruptible(&arpt_mutex);
1245 if (ret != 0) { 1313 if (ret != 0) {
1246 vfree(newinfo); 1314 free_table_info(newinfo);
1247 return ret; 1315 return ret;
1248 } 1316 }
1249 1317
@@ -1272,20 +1340,23 @@ int arpt_register_table(struct arpt_table *table,
1272 return ret; 1340 return ret;
1273 1341
1274 free_unlock: 1342 free_unlock:
1275 vfree(newinfo); 1343 free_table_info(newinfo);
1276 goto unlock; 1344 goto unlock;
1277} 1345}
1278 1346
1279void arpt_unregister_table(struct arpt_table *table) 1347void arpt_unregister_table(struct arpt_table *table)
1280{ 1348{
1349 void *loc_cpu_entry;
1350
1281 down(&arpt_mutex); 1351 down(&arpt_mutex);
1282 LIST_DELETE(&arpt_tables, table); 1352 LIST_DELETE(&arpt_tables, table);
1283 up(&arpt_mutex); 1353 up(&arpt_mutex);
1284 1354
1285 /* Decrease module usage counts and free resources */ 1355 /* Decrease module usage counts and free resources */
1286 ARPT_ENTRY_ITERATE(table->private->entries, table->private->size, 1356 loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1357 ARPT_ENTRY_ITERATE(loc_cpu_entry, table->private->size,
1287 cleanup_entry, NULL); 1358 cleanup_entry, NULL);
1288 vfree(table->private); 1359 free_table_info(table->private);
1289} 1360}
1290 1361
1291/* The built-in targets: standard (NULL) and error. */ 1362/* The built-in targets: standard (NULL) and error. */
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 45886c8475e8..2a26d167e149 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -83,11 +83,6 @@ static DECLARE_MUTEX(ipt_mutex);
83 context stops packets coming through and allows user context to read 83 context stops packets coming through and allows user context to read
84 the counters or update the rules. 84 the counters or update the rules.
85 85
86 To be cache friendly on SMP, we arrange them like so:
87 [ n-entries ]
88 ... cache-align padding ...
89 [ n-entries ]
90
91 Hence the start of any table is given by get_table() below. */ 86 Hence the start of any table is given by get_table() below. */
92 87
93/* The table itself */ 88/* The table itself */
@@ -105,20 +100,15 @@ struct ipt_table_info
105 unsigned int underflow[NF_IP_NUMHOOKS]; 100 unsigned int underflow[NF_IP_NUMHOOKS];
106 101
107 /* ipt_entry tables: one per CPU */ 102 /* ipt_entry tables: one per CPU */
108 char entries[0] ____cacheline_aligned; 103 void *entries[NR_CPUS];
109}; 104};
110 105
111static LIST_HEAD(ipt_target); 106static LIST_HEAD(ipt_target);
112static LIST_HEAD(ipt_match); 107static LIST_HEAD(ipt_match);
113static LIST_HEAD(ipt_tables); 108static LIST_HEAD(ipt_tables);
109#define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
114#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) 110#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
115 111
116#ifdef CONFIG_SMP
117#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
118#else
119#define TABLE_OFFSET(t,p) 0
120#endif
121
122#if 0 112#if 0
123#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0) 113#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
124#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; }) 114#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
@@ -290,8 +280,7 @@ ipt_do_table(struct sk_buff **pskb,
290 280
291 read_lock_bh(&table->lock); 281 read_lock_bh(&table->lock);
292 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 282 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
293 table_base = (void *)table->private->entries 283 table_base = (void *)table->private->entries[smp_processor_id()];
294 + TABLE_OFFSET(table->private, smp_processor_id());
295 e = get_entry(table_base, table->private->hook_entry[hook]); 284 e = get_entry(table_base, table->private->hook_entry[hook]);
296 285
297#ifdef CONFIG_NETFILTER_DEBUG 286#ifdef CONFIG_NETFILTER_DEBUG
@@ -563,7 +552,8 @@ unconditional(const struct ipt_ip *ip)
563/* Figures out from what hook each rule can be called: returns 0 if 552/* Figures out from what hook each rule can be called: returns 0 if
564 there are loops. Puts hook bitmask in comefrom. */ 553 there are loops. Puts hook bitmask in comefrom. */
565static int 554static int
566mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks) 555mark_source_chains(struct ipt_table_info *newinfo,
556 unsigned int valid_hooks, void *entry0)
567{ 557{
568 unsigned int hook; 558 unsigned int hook;
569 559
@@ -572,7 +562,7 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
572 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) { 562 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
573 unsigned int pos = newinfo->hook_entry[hook]; 563 unsigned int pos = newinfo->hook_entry[hook];
574 struct ipt_entry *e 564 struct ipt_entry *e
575 = (struct ipt_entry *)(newinfo->entries + pos); 565 = (struct ipt_entry *)(entry0 + pos);
576 566
577 if (!(valid_hooks & (1 << hook))) 567 if (!(valid_hooks & (1 << hook)))
578 continue; 568 continue;
@@ -622,13 +612,13 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
622 goto next; 612 goto next;
623 613
624 e = (struct ipt_entry *) 614 e = (struct ipt_entry *)
625 (newinfo->entries + pos); 615 (entry0 + pos);
626 } while (oldpos == pos + e->next_offset); 616 } while (oldpos == pos + e->next_offset);
627 617
628 /* Move along one */ 618 /* Move along one */
629 size = e->next_offset; 619 size = e->next_offset;
630 e = (struct ipt_entry *) 620 e = (struct ipt_entry *)
631 (newinfo->entries + pos + size); 621 (entry0 + pos + size);
632 e->counters.pcnt = pos; 622 e->counters.pcnt = pos;
633 pos += size; 623 pos += size;
634 } else { 624 } else {
@@ -645,7 +635,7 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
645 newpos = pos + e->next_offset; 635 newpos = pos + e->next_offset;
646 } 636 }
647 e = (struct ipt_entry *) 637 e = (struct ipt_entry *)
648 (newinfo->entries + newpos); 638 (entry0 + newpos);
649 e->counters.pcnt = pos; 639 e->counters.pcnt = pos;
650 pos = newpos; 640 pos = newpos;
651 } 641 }
@@ -855,6 +845,7 @@ static int
855translate_table(const char *name, 845translate_table(const char *name,
856 unsigned int valid_hooks, 846 unsigned int valid_hooks,
857 struct ipt_table_info *newinfo, 847 struct ipt_table_info *newinfo,
848 void *entry0,
858 unsigned int size, 849 unsigned int size,
859 unsigned int number, 850 unsigned int number,
860 const unsigned int *hook_entries, 851 const unsigned int *hook_entries,
@@ -875,11 +866,11 @@ translate_table(const char *name,
875 duprintf("translate_table: size %u\n", newinfo->size); 866 duprintf("translate_table: size %u\n", newinfo->size);
876 i = 0; 867 i = 0;
877 /* Walk through entries, checking offsets. */ 868 /* Walk through entries, checking offsets. */
878 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, 869 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
879 check_entry_size_and_hooks, 870 check_entry_size_and_hooks,
880 newinfo, 871 newinfo,
881 newinfo->entries, 872 entry0,
882 newinfo->entries + size, 873 entry0 + size,
883 hook_entries, underflows, &i); 874 hook_entries, underflows, &i);
884 if (ret != 0) 875 if (ret != 0)
885 return ret; 876 return ret;
@@ -907,27 +898,24 @@ translate_table(const char *name,
907 } 898 }
908 } 899 }
909 900
910 if (!mark_source_chains(newinfo, valid_hooks)) 901 if (!mark_source_chains(newinfo, valid_hooks, entry0))
911 return -ELOOP; 902 return -ELOOP;
912 903
913 /* Finally, each sanity check must pass */ 904 /* Finally, each sanity check must pass */
914 i = 0; 905 i = 0;
915 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, 906 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
916 check_entry, name, size, &i); 907 check_entry, name, size, &i);
917 908
918 if (ret != 0) { 909 if (ret != 0) {
919 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, 910 IPT_ENTRY_ITERATE(entry0, newinfo->size,
920 cleanup_entry, &i); 911 cleanup_entry, &i);
921 return ret; 912 return ret;
922 } 913 }
923 914
924 /* And one copy for every other CPU */ 915 /* And one copy for every other CPU */
925 for_each_cpu(i) { 916 for_each_cpu(i) {
926 if (i == 0) 917 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
927 continue; 918 memcpy(newinfo->entries[i], entry0, newinfo->size);
928 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
929 newinfo->entries,
930 SMP_ALIGN(newinfo->size));
931 } 919 }
932 920
933 return ret; 921 return ret;
@@ -943,15 +931,12 @@ replace_table(struct ipt_table *table,
943 931
944#ifdef CONFIG_NETFILTER_DEBUG 932#ifdef CONFIG_NETFILTER_DEBUG
945 { 933 {
946 struct ipt_entry *table_base; 934 int cpu;
947 unsigned int i;
948 935
949 for_each_cpu(i) { 936 for_each_cpu(cpu) {
950 table_base = 937 struct ipt_entry *table_base = newinfo->entries[cpu];
951 (void *)newinfo->entries 938 if (table_base)
952 + TABLE_OFFSET(newinfo, i); 939 table_base->comefrom = 0xdead57ac;
953
954 table_base->comefrom = 0xdead57ac;
955 } 940 }
956 } 941 }
957#endif 942#endif
@@ -986,16 +971,44 @@ add_entry_to_counter(const struct ipt_entry *e,
986 return 0; 971 return 0;
987} 972}
988 973
974static inline int
975set_entry_to_counter(const struct ipt_entry *e,
976 struct ipt_counters total[],
977 unsigned int *i)
978{
979 SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
980
981 (*i)++;
982 return 0;
983}
984
989static void 985static void
990get_counters(const struct ipt_table_info *t, 986get_counters(const struct ipt_table_info *t,
991 struct ipt_counters counters[]) 987 struct ipt_counters counters[])
992{ 988{
993 unsigned int cpu; 989 unsigned int cpu;
994 unsigned int i; 990 unsigned int i;
991 unsigned int curcpu;
992
993 /* Instead of clearing (by a previous call to memset())
994 * the counters and using adds, we set the counters
995 * with data used by 'current' CPU
996 * We dont care about preemption here.
997 */
998 curcpu = raw_smp_processor_id();
999
1000 i = 0;
1001 IPT_ENTRY_ITERATE(t->entries[curcpu],
1002 t->size,
1003 set_entry_to_counter,
1004 counters,
1005 &i);
995 1006
996 for_each_cpu(cpu) { 1007 for_each_cpu(cpu) {
1008 if (cpu == curcpu)
1009 continue;
997 i = 0; 1010 i = 0;
998 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), 1011 IPT_ENTRY_ITERATE(t->entries[cpu],
999 t->size, 1012 t->size,
1000 add_entry_to_counter, 1013 add_entry_to_counter,
1001 counters, 1014 counters,
@@ -1012,24 +1025,29 @@ copy_entries_to_user(unsigned int total_size,
1012 struct ipt_entry *e; 1025 struct ipt_entry *e;
1013 struct ipt_counters *counters; 1026 struct ipt_counters *counters;
1014 int ret = 0; 1027 int ret = 0;
1028 void *loc_cpu_entry;
1015 1029
1016 /* We need atomic snapshot of counters: rest doesn't change 1030 /* We need atomic snapshot of counters: rest doesn't change
1017 (other than comefrom, which userspace doesn't care 1031 (other than comefrom, which userspace doesn't care
1018 about). */ 1032 about). */
1019 countersize = sizeof(struct ipt_counters) * table->private->number; 1033 countersize = sizeof(struct ipt_counters) * table->private->number;
1020 counters = vmalloc(countersize); 1034 counters = vmalloc_node(countersize, numa_node_id());
1021 1035
1022 if (counters == NULL) 1036 if (counters == NULL)
1023 return -ENOMEM; 1037 return -ENOMEM;
1024 1038
1025 /* First, sum counters... */ 1039 /* First, sum counters... */
1026 memset(counters, 0, countersize);
1027 write_lock_bh(&table->lock); 1040 write_lock_bh(&table->lock);
1028 get_counters(table->private, counters); 1041 get_counters(table->private, counters);
1029 write_unlock_bh(&table->lock); 1042 write_unlock_bh(&table->lock);
1030 1043
1031 /* ... then copy entire thing from CPU 0... */ 1044 /* choose the copy that is on our node/cpu, ...
1032 if (copy_to_user(userptr, table->private->entries, total_size) != 0) { 1045 * This choice is lazy (because current thread is
1046 * allowed to migrate to another cpu)
1047 */
1048 loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1049 /* ... then copy entire thing ... */
1050 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
1033 ret = -EFAULT; 1051 ret = -EFAULT;
1034 goto free_counters; 1052 goto free_counters;
1035 } 1053 }
@@ -1041,7 +1059,7 @@ copy_entries_to_user(unsigned int total_size,
1041 struct ipt_entry_match *m; 1059 struct ipt_entry_match *m;
1042 struct ipt_entry_target *t; 1060 struct ipt_entry_target *t;
1043 1061
1044 e = (struct ipt_entry *)(table->private->entries + off); 1062 e = (struct ipt_entry *)(loc_cpu_entry + off);
1045 if (copy_to_user(userptr + off 1063 if (copy_to_user(userptr + off
1046 + offsetof(struct ipt_entry, counters), 1064 + offsetof(struct ipt_entry, counters),
1047 &counters[num], 1065 &counters[num],
@@ -1110,6 +1128,45 @@ get_entries(const struct ipt_get_entries *entries,
1110 return ret; 1128 return ret;
1111} 1129}
1112 1130
1131static void free_table_info(struct ipt_table_info *info)
1132{
1133 int cpu;
1134 for_each_cpu(cpu) {
1135 if (info->size <= PAGE_SIZE)
1136 kfree(info->entries[cpu]);
1137 else
1138 vfree(info->entries[cpu]);
1139 }
1140 kfree(info);
1141}
1142
1143static struct ipt_table_info *alloc_table_info(unsigned int size)
1144{
1145 struct ipt_table_info *newinfo;
1146 int cpu;
1147
1148 newinfo = kzalloc(sizeof(struct ipt_table_info), GFP_KERNEL);
1149 if (!newinfo)
1150 return NULL;
1151
1152 newinfo->size = size;
1153
1154 for_each_cpu(cpu) {
1155 if (size <= PAGE_SIZE)
1156 newinfo->entries[cpu] = kmalloc_node(size,
1157 GFP_KERNEL,
1158 cpu_to_node(cpu));
1159 else
1160 newinfo->entries[cpu] = vmalloc_node(size, cpu_to_node(cpu));
1161 if (newinfo->entries[cpu] == 0) {
1162 free_table_info(newinfo);
1163 return NULL;
1164 }
1165 }
1166
1167 return newinfo;
1168}
1169
1113static int 1170static int
1114do_replace(void __user *user, unsigned int len) 1171do_replace(void __user *user, unsigned int len)
1115{ 1172{
@@ -1118,6 +1175,7 @@ do_replace(void __user *user, unsigned int len)
1118 struct ipt_table *t; 1175 struct ipt_table *t;
1119 struct ipt_table_info *newinfo, *oldinfo; 1176 struct ipt_table_info *newinfo, *oldinfo;
1120 struct ipt_counters *counters; 1177 struct ipt_counters *counters;
1178 void *loc_cpu_entry, *loc_cpu_old_entry;
1121 1179
1122 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1180 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1123 return -EFAULT; 1181 return -EFAULT;
@@ -1130,13 +1188,13 @@ do_replace(void __user *user, unsigned int len)
1130 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) 1188 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1131 return -ENOMEM; 1189 return -ENOMEM;
1132 1190
1133 newinfo = vmalloc(sizeof(struct ipt_table_info) 1191 newinfo = alloc_table_info(tmp.size);
1134 + SMP_ALIGN(tmp.size) *
1135 (highest_possible_processor_id()+1));
1136 if (!newinfo) 1192 if (!newinfo)
1137 return -ENOMEM; 1193 return -ENOMEM;
1138 1194
1139 if (copy_from_user(newinfo->entries, user + sizeof(tmp), 1195 /* choose the copy that is our node/cpu */
1196 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1197 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1140 tmp.size) != 0) { 1198 tmp.size) != 0) {
1141 ret = -EFAULT; 1199 ret = -EFAULT;
1142 goto free_newinfo; 1200 goto free_newinfo;
@@ -1147,10 +1205,9 @@ do_replace(void __user *user, unsigned int len)
1147 ret = -ENOMEM; 1205 ret = -ENOMEM;
1148 goto free_newinfo; 1206 goto free_newinfo;
1149 } 1207 }
1150 memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1151 1208
1152 ret = translate_table(tmp.name, tmp.valid_hooks, 1209 ret = translate_table(tmp.name, tmp.valid_hooks,
1153 newinfo, tmp.size, tmp.num_entries, 1210 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1154 tmp.hook_entry, tmp.underflow); 1211 tmp.hook_entry, tmp.underflow);
1155 if (ret != 0) 1212 if (ret != 0)
1156 goto free_newinfo_counters; 1213 goto free_newinfo_counters;
@@ -1189,8 +1246,9 @@ do_replace(void __user *user, unsigned int len)
1189 /* Get the old counters. */ 1246 /* Get the old counters. */
1190 get_counters(oldinfo, counters); 1247 get_counters(oldinfo, counters);
1191 /* Decrease module usage counts and free resource */ 1248 /* Decrease module usage counts and free resource */
1192 IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); 1249 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1193 vfree(oldinfo); 1250 IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
1251 free_table_info(oldinfo);
1194 if (copy_to_user(tmp.counters, counters, 1252 if (copy_to_user(tmp.counters, counters,
1195 sizeof(struct ipt_counters) * tmp.num_counters) != 0) 1253 sizeof(struct ipt_counters) * tmp.num_counters) != 0)
1196 ret = -EFAULT; 1254 ret = -EFAULT;
@@ -1202,11 +1260,11 @@ do_replace(void __user *user, unsigned int len)
1202 module_put(t->me); 1260 module_put(t->me);
1203 up(&ipt_mutex); 1261 up(&ipt_mutex);
1204 free_newinfo_counters_untrans: 1262 free_newinfo_counters_untrans:
1205 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL); 1263 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
1206 free_newinfo_counters: 1264 free_newinfo_counters:
1207 vfree(counters); 1265 vfree(counters);
1208 free_newinfo: 1266 free_newinfo:
1209 vfree(newinfo); 1267 free_table_info(newinfo);
1210 return ret; 1268 return ret;
1211} 1269}
1212 1270
@@ -1239,6 +1297,7 @@ do_add_counters(void __user *user, unsigned int len)
1239 struct ipt_counters_info tmp, *paddc; 1297 struct ipt_counters_info tmp, *paddc;
1240 struct ipt_table *t; 1298 struct ipt_table *t;
1241 int ret = 0; 1299 int ret = 0;
1300 void *loc_cpu_entry;
1242 1301
1243 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1302 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1244 return -EFAULT; 1303 return -EFAULT;
@@ -1246,7 +1305,7 @@ do_add_counters(void __user *user, unsigned int len)
1246 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters)) 1305 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1247 return -EINVAL; 1306 return -EINVAL;
1248 1307
1249 paddc = vmalloc(len); 1308 paddc = vmalloc_node(len, numa_node_id());
1250 if (!paddc) 1309 if (!paddc)
1251 return -ENOMEM; 1310 return -ENOMEM;
1252 1311
@@ -1268,7 +1327,9 @@ do_add_counters(void __user *user, unsigned int len)
1268 } 1327 }
1269 1328
1270 i = 0; 1329 i = 0;
1271 IPT_ENTRY_ITERATE(t->private->entries, 1330 /* Choose the copy that is on our node */
1331 loc_cpu_entry = t->private->entries[raw_smp_processor_id()];
1332 IPT_ENTRY_ITERATE(loc_cpu_entry,
1272 t->private->size, 1333 t->private->size,
1273 add_counter_to_entry, 1334 add_counter_to_entry,
1274 paddc->counters, 1335 paddc->counters,
@@ -1460,28 +1521,31 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
1460 struct ipt_table_info *newinfo; 1521 struct ipt_table_info *newinfo;
1461 static struct ipt_table_info bootstrap 1522 static struct ipt_table_info bootstrap
1462 = { 0, 0, 0, { 0 }, { 0 }, { } }; 1523 = { 0, 0, 0, { 0 }, { 0 }, { } };
1524 void *loc_cpu_entry;
1463 1525
1464 newinfo = vmalloc(sizeof(struct ipt_table_info) 1526 newinfo = alloc_table_info(repl->size);
1465 + SMP_ALIGN(repl->size) *
1466 (highest_possible_processor_id()+1));
1467 if (!newinfo) 1527 if (!newinfo)
1468 return -ENOMEM; 1528 return -ENOMEM;
1469 1529
1470 memcpy(newinfo->entries, repl->entries, repl->size); 1530 /* choose the copy on our node/cpu
1531 * but dont care of preemption
1532 */
1533 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1534 memcpy(loc_cpu_entry, repl->entries, repl->size);
1471 1535
1472 ret = translate_table(table->name, table->valid_hooks, 1536 ret = translate_table(table->name, table->valid_hooks,
1473 newinfo, repl->size, 1537 newinfo, loc_cpu_entry, repl->size,
1474 repl->num_entries, 1538 repl->num_entries,
1475 repl->hook_entry, 1539 repl->hook_entry,
1476 repl->underflow); 1540 repl->underflow);
1477 if (ret != 0) { 1541 if (ret != 0) {
1478 vfree(newinfo); 1542 free_table_info(newinfo);
1479 return ret; 1543 return ret;
1480 } 1544 }
1481 1545
1482 ret = down_interruptible(&ipt_mutex); 1546 ret = down_interruptible(&ipt_mutex);
1483 if (ret != 0) { 1547 if (ret != 0) {
1484 vfree(newinfo); 1548 free_table_info(newinfo);
1485 return ret; 1549 return ret;
1486 } 1550 }
1487 1551
@@ -1510,20 +1574,23 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
1510 return ret; 1574 return ret;
1511 1575
1512 free_unlock: 1576 free_unlock:
1513 vfree(newinfo); 1577 free_table_info(newinfo);
1514 goto unlock; 1578 goto unlock;
1515} 1579}
1516 1580
1517void ipt_unregister_table(struct ipt_table *table) 1581void ipt_unregister_table(struct ipt_table *table)
1518{ 1582{
1583 void *loc_cpu_entry;
1584
1519 down(&ipt_mutex); 1585 down(&ipt_mutex);
1520 LIST_DELETE(&ipt_tables, table); 1586 LIST_DELETE(&ipt_tables, table);
1521 up(&ipt_mutex); 1587 up(&ipt_mutex);
1522 1588
1523 /* Decrease module usage counts and free resources */ 1589 /* Decrease module usage counts and free resources */
1524 IPT_ENTRY_ITERATE(table->private->entries, table->private->size, 1590 loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1591 IPT_ENTRY_ITERATE(loc_cpu_entry, table->private->size,
1525 cleanup_entry, NULL); 1592 cleanup_entry, NULL);
1526 vfree(table->private); 1593 free_table_info(table->private);
1527} 1594}
1528 1595
1529/* Returns 1 if the port is matched by the range, 0 otherwise */ 1596/* Returns 1 if the port is matched by the range, 0 otherwise */
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 95d469271c4d..dd80020d8740 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -86,11 +86,6 @@ static DECLARE_MUTEX(ip6t_mutex);
86 context stops packets coming through and allows user context to read 86 context stops packets coming through and allows user context to read
87 the counters or update the rules. 87 the counters or update the rules.
88 88
89 To be cache friendly on SMP, we arrange them like so:
90 [ n-entries ]
91 ... cache-align padding ...
92 [ n-entries ]
93
94 Hence the start of any table is given by get_table() below. */ 89 Hence the start of any table is given by get_table() below. */
95 90
96/* The table itself */ 91/* The table itself */
@@ -108,20 +103,15 @@ struct ip6t_table_info
108 unsigned int underflow[NF_IP6_NUMHOOKS]; 103 unsigned int underflow[NF_IP6_NUMHOOKS];
109 104
110 /* ip6t_entry tables: one per CPU */ 105 /* ip6t_entry tables: one per CPU */
111 char entries[0] ____cacheline_aligned; 106 void *entries[NR_CPUS];
112}; 107};
113 108
114static LIST_HEAD(ip6t_target); 109static LIST_HEAD(ip6t_target);
115static LIST_HEAD(ip6t_match); 110static LIST_HEAD(ip6t_match);
116static LIST_HEAD(ip6t_tables); 111static LIST_HEAD(ip6t_tables);
112#define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
117#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) 113#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
118 114
119#ifdef CONFIG_SMP
120#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
121#else
122#define TABLE_OFFSET(t,p) 0
123#endif
124
125#if 0 115#if 0
126#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0) 116#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
127#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; }) 117#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
@@ -376,8 +366,7 @@ ip6t_do_table(struct sk_buff **pskb,
376 366
377 read_lock_bh(&table->lock); 367 read_lock_bh(&table->lock);
378 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 368 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
379 table_base = (void *)table->private->entries 369 table_base = (void *)table->private->entries[smp_processor_id()];
380 + TABLE_OFFSET(table->private, smp_processor_id());
381 e = get_entry(table_base, table->private->hook_entry[hook]); 370 e = get_entry(table_base, table->private->hook_entry[hook]);
382 371
383#ifdef CONFIG_NETFILTER_DEBUG 372#ifdef CONFIG_NETFILTER_DEBUG
@@ -649,7 +638,8 @@ unconditional(const struct ip6t_ip6 *ipv6)
649/* Figures out from what hook each rule can be called: returns 0 if 638/* Figures out from what hook each rule can be called: returns 0 if
650 there are loops. Puts hook bitmask in comefrom. */ 639 there are loops. Puts hook bitmask in comefrom. */
651static int 640static int
652mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) 641mark_source_chains(struct ip6t_table_info *newinfo,
642 unsigned int valid_hooks, void *entry0)
653{ 643{
654 unsigned int hook; 644 unsigned int hook;
655 645
@@ -658,7 +648,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
658 for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) { 648 for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) {
659 unsigned int pos = newinfo->hook_entry[hook]; 649 unsigned int pos = newinfo->hook_entry[hook];
660 struct ip6t_entry *e 650 struct ip6t_entry *e
661 = (struct ip6t_entry *)(newinfo->entries + pos); 651 = (struct ip6t_entry *)(entry0 + pos);
662 652
663 if (!(valid_hooks & (1 << hook))) 653 if (!(valid_hooks & (1 << hook)))
664 continue; 654 continue;
@@ -708,13 +698,13 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
708 goto next; 698 goto next;
709 699
710 e = (struct ip6t_entry *) 700 e = (struct ip6t_entry *)
711 (newinfo->entries + pos); 701 (entry0 + pos);
712 } while (oldpos == pos + e->next_offset); 702 } while (oldpos == pos + e->next_offset);
713 703
714 /* Move along one */ 704 /* Move along one */
715 size = e->next_offset; 705 size = e->next_offset;
716 e = (struct ip6t_entry *) 706 e = (struct ip6t_entry *)
717 (newinfo->entries + pos + size); 707 (entry0 + pos + size);
718 e->counters.pcnt = pos; 708 e->counters.pcnt = pos;
719 pos += size; 709 pos += size;
720 } else { 710 } else {
@@ -731,7 +721,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
731 newpos = pos + e->next_offset; 721 newpos = pos + e->next_offset;
732 } 722 }
733 e = (struct ip6t_entry *) 723 e = (struct ip6t_entry *)
734 (newinfo->entries + newpos); 724 (entry0 + newpos);
735 e->counters.pcnt = pos; 725 e->counters.pcnt = pos;
736 pos = newpos; 726 pos = newpos;
737 } 727 }
@@ -941,6 +931,7 @@ static int
941translate_table(const char *name, 931translate_table(const char *name,
942 unsigned int valid_hooks, 932 unsigned int valid_hooks,
943 struct ip6t_table_info *newinfo, 933 struct ip6t_table_info *newinfo,
934 void *entry0,
944 unsigned int size, 935 unsigned int size,
945 unsigned int number, 936 unsigned int number,
946 const unsigned int *hook_entries, 937 const unsigned int *hook_entries,
@@ -961,11 +952,11 @@ translate_table(const char *name,
961 duprintf("translate_table: size %u\n", newinfo->size); 952 duprintf("translate_table: size %u\n", newinfo->size);
962 i = 0; 953 i = 0;
963 /* Walk through entries, checking offsets. */ 954 /* Walk through entries, checking offsets. */
964 ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, 955 ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
965 check_entry_size_and_hooks, 956 check_entry_size_and_hooks,
966 newinfo, 957 newinfo,
967 newinfo->entries, 958 entry0,
968 newinfo->entries + size, 959 entry0 + size,
969 hook_entries, underflows, &i); 960 hook_entries, underflows, &i);
970 if (ret != 0) 961 if (ret != 0)
971 return ret; 962 return ret;
@@ -993,27 +984,24 @@ translate_table(const char *name,
993 } 984 }
994 } 985 }
995 986
996 if (!mark_source_chains(newinfo, valid_hooks)) 987 if (!mark_source_chains(newinfo, valid_hooks, entry0))
997 return -ELOOP; 988 return -ELOOP;
998 989
999 /* Finally, each sanity check must pass */ 990 /* Finally, each sanity check must pass */
1000 i = 0; 991 i = 0;
1001 ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, 992 ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
1002 check_entry, name, size, &i); 993 check_entry, name, size, &i);
1003 994
1004 if (ret != 0) { 995 if (ret != 0) {
1005 IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, 996 IP6T_ENTRY_ITERATE(entry0, newinfo->size,
1006 cleanup_entry, &i); 997 cleanup_entry, &i);
1007 return ret; 998 return ret;
1008 } 999 }
1009 1000
1010 /* And one copy for every other CPU */ 1001 /* And one copy for every other CPU */
1011 for_each_cpu(i) { 1002 for_each_cpu(i) {
1012 if (i == 0) 1003 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
1013 continue; 1004 memcpy(newinfo->entries[i], entry0, newinfo->size);
1014 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
1015 newinfo->entries,
1016 SMP_ALIGN(newinfo->size));
1017 } 1005 }
1018 1006
1019 return ret; 1007 return ret;
@@ -1029,15 +1017,12 @@ replace_table(struct ip6t_table *table,
1029 1017
1030#ifdef CONFIG_NETFILTER_DEBUG 1018#ifdef CONFIG_NETFILTER_DEBUG
1031 { 1019 {
1032 struct ip6t_entry *table_base; 1020 int cpu;
1033 unsigned int i;
1034 1021
1035 for_each_cpu(i) { 1022 for_each_cpu(cpu) {
1036 table_base = 1023 struct ip6t_entry *table_base = newinfo->entries[cpu];
1037 (void *)newinfo->entries 1024 if (table_base)
1038 + TABLE_OFFSET(newinfo, i); 1025 table_base->comefrom = 0xdead57ac;
1039
1040 table_base->comefrom = 0xdead57ac;
1041 } 1026 }
1042 } 1027 }
1043#endif 1028#endif
@@ -1072,16 +1057,44 @@ add_entry_to_counter(const struct ip6t_entry *e,
1072 return 0; 1057 return 0;
1073} 1058}
1074 1059
1060static inline int
1061set_entry_to_counter(const struct ip6t_entry *e,
1062 struct ip6t_counters total[],
1063 unsigned int *i)
1064{
1065 SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
1066
1067 (*i)++;
1068 return 0;
1069}
1070
1075static void 1071static void
1076get_counters(const struct ip6t_table_info *t, 1072get_counters(const struct ip6t_table_info *t,
1077 struct ip6t_counters counters[]) 1073 struct ip6t_counters counters[])
1078{ 1074{
1079 unsigned int cpu; 1075 unsigned int cpu;
1080 unsigned int i; 1076 unsigned int i;
1077 unsigned int curcpu;
1078
1079 /* Instead of clearing (by a previous call to memset())
1080 * the counters and using adds, we set the counters
1081 * with data used by 'current' CPU
1082 * We dont care about preemption here.
1083 */
1084 curcpu = raw_smp_processor_id();
1085
1086 i = 0;
1087 IP6T_ENTRY_ITERATE(t->entries[curcpu],
1088 t->size,
1089 set_entry_to_counter,
1090 counters,
1091 &i);
1081 1092
1082 for_each_cpu(cpu) { 1093 for_each_cpu(cpu) {
1094 if (cpu == curcpu)
1095 continue;
1083 i = 0; 1096 i = 0;
1084 IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), 1097 IP6T_ENTRY_ITERATE(t->entries[cpu],
1085 t->size, 1098 t->size,
1086 add_entry_to_counter, 1099 add_entry_to_counter,
1087 counters, 1100 counters,
@@ -1098,6 +1111,7 @@ copy_entries_to_user(unsigned int total_size,
1098 struct ip6t_entry *e; 1111 struct ip6t_entry *e;
1099 struct ip6t_counters *counters; 1112 struct ip6t_counters *counters;
1100 int ret = 0; 1113 int ret = 0;
1114 void *loc_cpu_entry;
1101 1115
1102 /* We need atomic snapshot of counters: rest doesn't change 1116 /* We need atomic snapshot of counters: rest doesn't change
1103 (other than comefrom, which userspace doesn't care 1117 (other than comefrom, which userspace doesn't care
@@ -1109,13 +1123,13 @@ copy_entries_to_user(unsigned int total_size,
1109 return -ENOMEM; 1123 return -ENOMEM;
1110 1124
1111 /* First, sum counters... */ 1125 /* First, sum counters... */
1112 memset(counters, 0, countersize);
1113 write_lock_bh(&table->lock); 1126 write_lock_bh(&table->lock);
1114 get_counters(table->private, counters); 1127 get_counters(table->private, counters);
1115 write_unlock_bh(&table->lock); 1128 write_unlock_bh(&table->lock);
1116 1129
1117 /* ... then copy entire thing from CPU 0... */ 1130 /* choose the copy that is on ourc node/cpu */
1118 if (copy_to_user(userptr, table->private->entries, total_size) != 0) { 1131 loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1132 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
1119 ret = -EFAULT; 1133 ret = -EFAULT;
1120 goto free_counters; 1134 goto free_counters;
1121 } 1135 }
@@ -1127,7 +1141,7 @@ copy_entries_to_user(unsigned int total_size,
1127 struct ip6t_entry_match *m; 1141 struct ip6t_entry_match *m;
1128 struct ip6t_entry_target *t; 1142 struct ip6t_entry_target *t;
1129 1143
1130 e = (struct ip6t_entry *)(table->private->entries + off); 1144 e = (struct ip6t_entry *)(loc_cpu_entry + off);
1131 if (copy_to_user(userptr + off 1145 if (copy_to_user(userptr + off
1132 + offsetof(struct ip6t_entry, counters), 1146 + offsetof(struct ip6t_entry, counters),
1133 &counters[num], 1147 &counters[num],
@@ -1196,6 +1210,46 @@ get_entries(const struct ip6t_get_entries *entries,
1196 return ret; 1210 return ret;
1197} 1211}
1198 1212
1213static void free_table_info(struct ip6t_table_info *info)
1214{
1215 int cpu;
1216 for_each_cpu(cpu) {
1217 if (info->size <= PAGE_SIZE)
1218 kfree(info->entries[cpu]);
1219 else
1220 vfree(info->entries[cpu]);
1221 }
1222 kfree(info);
1223}
1224
1225static struct ip6t_table_info *alloc_table_info(unsigned int size)
1226{
1227 struct ip6t_table_info *newinfo;
1228 int cpu;
1229
1230 newinfo = kzalloc(sizeof(struct ip6t_table_info), GFP_KERNEL);
1231 if (!newinfo)
1232 return NULL;
1233
1234 newinfo->size = size;
1235
1236 for_each_cpu(cpu) {
1237 if (size <= PAGE_SIZE)
1238 newinfo->entries[cpu] = kmalloc_node(size,
1239 GFP_KERNEL,
1240 cpu_to_node(cpu));
1241 else
1242 newinfo->entries[cpu] = vmalloc_node(size,
1243 cpu_to_node(cpu));
1244 if (newinfo->entries[cpu] == NULL) {
1245 free_table_info(newinfo);
1246 return NULL;
1247 }
1248 }
1249
1250 return newinfo;
1251}
1252
1199static int 1253static int
1200do_replace(void __user *user, unsigned int len) 1254do_replace(void __user *user, unsigned int len)
1201{ 1255{
@@ -1204,6 +1258,7 @@ do_replace(void __user *user, unsigned int len)
1204 struct ip6t_table *t; 1258 struct ip6t_table *t;
1205 struct ip6t_table_info *newinfo, *oldinfo; 1259 struct ip6t_table_info *newinfo, *oldinfo;
1206 struct ip6t_counters *counters; 1260 struct ip6t_counters *counters;
1261 void *loc_cpu_entry, *loc_cpu_old_entry;
1207 1262
1208 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1263 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1209 return -EFAULT; 1264 return -EFAULT;
@@ -1212,13 +1267,13 @@ do_replace(void __user *user, unsigned int len)
1212 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) 1267 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1213 return -ENOMEM; 1268 return -ENOMEM;
1214 1269
1215 newinfo = vmalloc(sizeof(struct ip6t_table_info) 1270 newinfo = alloc_table_info(tmp.size);
1216 + SMP_ALIGN(tmp.size) *
1217 (highest_possible_processor_id()+1));
1218 if (!newinfo) 1271 if (!newinfo)
1219 return -ENOMEM; 1272 return -ENOMEM;
1220 1273
1221 if (copy_from_user(newinfo->entries, user + sizeof(tmp), 1274 /* choose the copy that is on our node/cpu */
1275 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1276 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1222 tmp.size) != 0) { 1277 tmp.size) != 0) {
1223 ret = -EFAULT; 1278 ret = -EFAULT;
1224 goto free_newinfo; 1279 goto free_newinfo;
@@ -1229,10 +1284,9 @@ do_replace(void __user *user, unsigned int len)
1229 ret = -ENOMEM; 1284 ret = -ENOMEM;
1230 goto free_newinfo; 1285 goto free_newinfo;
1231 } 1286 }
1232 memset(counters, 0, tmp.num_counters * sizeof(struct ip6t_counters));
1233 1287
1234 ret = translate_table(tmp.name, tmp.valid_hooks, 1288 ret = translate_table(tmp.name, tmp.valid_hooks,
1235 newinfo, tmp.size, tmp.num_entries, 1289 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1236 tmp.hook_entry, tmp.underflow); 1290 tmp.hook_entry, tmp.underflow);
1237 if (ret != 0) 1291 if (ret != 0)
1238 goto free_newinfo_counters; 1292 goto free_newinfo_counters;
@@ -1271,8 +1325,9 @@ do_replace(void __user *user, unsigned int len)
1271 /* Get the old counters. */ 1325 /* Get the old counters. */
1272 get_counters(oldinfo, counters); 1326 get_counters(oldinfo, counters);
1273 /* Decrease module usage counts and free resource */ 1327 /* Decrease module usage counts and free resource */
1274 IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); 1328 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1275 vfree(oldinfo); 1329 IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
1330 free_table_info(oldinfo);
1276 if (copy_to_user(tmp.counters, counters, 1331 if (copy_to_user(tmp.counters, counters,
1277 sizeof(struct ip6t_counters) * tmp.num_counters) != 0) 1332 sizeof(struct ip6t_counters) * tmp.num_counters) != 0)
1278 ret = -EFAULT; 1333 ret = -EFAULT;
@@ -1284,11 +1339,11 @@ do_replace(void __user *user, unsigned int len)
1284 module_put(t->me); 1339 module_put(t->me);
1285 up(&ip6t_mutex); 1340 up(&ip6t_mutex);
1286 free_newinfo_counters_untrans: 1341 free_newinfo_counters_untrans:
1287 IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL); 1342 IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
1288 free_newinfo_counters: 1343 free_newinfo_counters:
1289 vfree(counters); 1344 vfree(counters);
1290 free_newinfo: 1345 free_newinfo:
1291 vfree(newinfo); 1346 free_table_info(newinfo);
1292 return ret; 1347 return ret;
1293} 1348}
1294 1349
@@ -1321,6 +1376,7 @@ do_add_counters(void __user *user, unsigned int len)
1321 struct ip6t_counters_info tmp, *paddc; 1376 struct ip6t_counters_info tmp, *paddc;
1322 struct ip6t_table *t; 1377 struct ip6t_table *t;
1323 int ret = 0; 1378 int ret = 0;
1379 void *loc_cpu_entry;
1324 1380
1325 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1381 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1326 return -EFAULT; 1382 return -EFAULT;
@@ -1350,7 +1406,9 @@ do_add_counters(void __user *user, unsigned int len)
1350 } 1406 }
1351 1407
1352 i = 0; 1408 i = 0;
1353 IP6T_ENTRY_ITERATE(t->private->entries, 1409 /* Choose the copy that is on our node */
1410 loc_cpu_entry = t->private->entries[smp_processor_id()];
1411 IP6T_ENTRY_ITERATE(loc_cpu_entry,
1354 t->private->size, 1412 t->private->size,
1355 add_counter_to_entry, 1413 add_counter_to_entry,
1356 paddc->counters, 1414 paddc->counters,
@@ -1543,28 +1601,29 @@ int ip6t_register_table(struct ip6t_table *table,
1543 struct ip6t_table_info *newinfo; 1601 struct ip6t_table_info *newinfo;
1544 static struct ip6t_table_info bootstrap 1602 static struct ip6t_table_info bootstrap
1545 = { 0, 0, 0, { 0 }, { 0 }, { } }; 1603 = { 0, 0, 0, { 0 }, { 0 }, { } };
1604 void *loc_cpu_entry;
1546 1605
1547 newinfo = vmalloc(sizeof(struct ip6t_table_info) 1606 newinfo = alloc_table_info(repl->size);
1548 + SMP_ALIGN(repl->size) *
1549 (highest_possible_processor_id()+1));
1550 if (!newinfo) 1607 if (!newinfo)
1551 return -ENOMEM; 1608 return -ENOMEM;
1552 1609
1553 memcpy(newinfo->entries, repl->entries, repl->size); 1610 /* choose the copy on our node/cpu */
1611 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1612 memcpy(loc_cpu_entry, repl->entries, repl->size);
1554 1613
1555 ret = translate_table(table->name, table->valid_hooks, 1614 ret = translate_table(table->name, table->valid_hooks,
1556 newinfo, repl->size, 1615 newinfo, loc_cpu_entry, repl->size,
1557 repl->num_entries, 1616 repl->num_entries,
1558 repl->hook_entry, 1617 repl->hook_entry,
1559 repl->underflow); 1618 repl->underflow);
1560 if (ret != 0) { 1619 if (ret != 0) {
1561 vfree(newinfo); 1620 free_table_info(newinfo);
1562 return ret; 1621 return ret;
1563 } 1622 }
1564 1623
1565 ret = down_interruptible(&ip6t_mutex); 1624 ret = down_interruptible(&ip6t_mutex);
1566 if (ret != 0) { 1625 if (ret != 0) {
1567 vfree(newinfo); 1626 free_table_info(newinfo);
1568 return ret; 1627 return ret;
1569 } 1628 }
1570 1629
@@ -1593,20 +1652,23 @@ int ip6t_register_table(struct ip6t_table *table,
1593 return ret; 1652 return ret;
1594 1653
1595 free_unlock: 1654 free_unlock:
1596 vfree(newinfo); 1655 free_table_info(newinfo);
1597 goto unlock; 1656 goto unlock;
1598} 1657}
1599 1658
1600void ip6t_unregister_table(struct ip6t_table *table) 1659void ip6t_unregister_table(struct ip6t_table *table)
1601{ 1660{
1661 void *loc_cpu_entry;
1662
1602 down(&ip6t_mutex); 1663 down(&ip6t_mutex);
1603 LIST_DELETE(&ip6t_tables, table); 1664 LIST_DELETE(&ip6t_tables, table);
1604 up(&ip6t_mutex); 1665 up(&ip6t_mutex);
1605 1666
1606 /* Decrease module usage counts and free resources */ 1667 /* Decrease module usage counts and free resources */
1607 IP6T_ENTRY_ITERATE(table->private->entries, table->private->size, 1668 loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1669 IP6T_ENTRY_ITERATE(loc_cpu_entry, table->private->size,
1608 cleanup_entry, NULL); 1670 cleanup_entry, NULL);
1609 vfree(table->private); 1671 free_table_info(table->private);
1610} 1672}
1611 1673
1612/* Returns 1 if the port is matched by the range, 0 otherwise */ 1674/* Returns 1 if the port is matched by the range, 0 otherwise */