diff options
-rw-r--r-- | net/ipv4/netfilter/arp_tables.c | 175 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_tables.c | 199 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6_tables.c | 190 |
3 files changed, 382 insertions, 182 deletions
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 3c2e9639bba6..bba156304695 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -68,19 +68,14 @@ struct arpt_table_info { | |||
68 | unsigned int initial_entries; | 68 | unsigned int initial_entries; |
69 | unsigned int hook_entry[NF_ARP_NUMHOOKS]; | 69 | unsigned int hook_entry[NF_ARP_NUMHOOKS]; |
70 | unsigned int underflow[NF_ARP_NUMHOOKS]; | 70 | unsigned int underflow[NF_ARP_NUMHOOKS]; |
71 | char entries[0] __attribute__((aligned(SMP_CACHE_BYTES))); | 71 | void *entries[NR_CPUS]; |
72 | }; | 72 | }; |
73 | 73 | ||
74 | static LIST_HEAD(arpt_target); | 74 | static LIST_HEAD(arpt_target); |
75 | static LIST_HEAD(arpt_tables); | 75 | static LIST_HEAD(arpt_tables); |
76 | #define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0) | ||
76 | #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) | 77 | #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) |
77 | 78 | ||
78 | #ifdef CONFIG_SMP | ||
79 | #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p)) | ||
80 | #else | ||
81 | #define TABLE_OFFSET(t,p) 0 | ||
82 | #endif | ||
83 | |||
84 | static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, | 79 | static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, |
85 | char *hdr_addr, int len) | 80 | char *hdr_addr, int len) |
86 | { | 81 | { |
@@ -269,9 +264,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, | |||
269 | outdev = out ? out->name : nulldevname; | 264 | outdev = out ? out->name : nulldevname; |
270 | 265 | ||
271 | read_lock_bh(&table->lock); | 266 | read_lock_bh(&table->lock); |
272 | table_base = (void *)table->private->entries | 267 | table_base = (void *)table->private->entries[smp_processor_id()]; |
273 | + TABLE_OFFSET(table->private, | ||
274 | smp_processor_id()); | ||
275 | e = get_entry(table_base, table->private->hook_entry[hook]); | 268 | e = get_entry(table_base, table->private->hook_entry[hook]); |
276 | back = get_entry(table_base, table->private->underflow[hook]); | 269 | back = get_entry(table_base, table->private->underflow[hook]); |
277 | 270 | ||
@@ -462,7 +455,8 @@ static inline int unconditional(const struct arpt_arp *arp) | |||
462 | /* Figures out from what hook each rule can be called: returns 0 if | 455 | /* Figures out from what hook each rule can be called: returns 0 if |
463 | * there are loops. Puts hook bitmask in comefrom. | 456 | * there are loops. Puts hook bitmask in comefrom. |
464 | */ | 457 | */ |
465 | static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int valid_hooks) | 458 | static int mark_source_chains(struct arpt_table_info *newinfo, |
459 | unsigned int valid_hooks, void *entry0) | ||
466 | { | 460 | { |
467 | unsigned int hook; | 461 | unsigned int hook; |
468 | 462 | ||
@@ -472,7 +466,7 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali | |||
472 | for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) { | 466 | for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) { |
473 | unsigned int pos = newinfo->hook_entry[hook]; | 467 | unsigned int pos = newinfo->hook_entry[hook]; |
474 | struct arpt_entry *e | 468 | struct arpt_entry *e |
475 | = (struct arpt_entry *)(newinfo->entries + pos); | 469 | = (struct arpt_entry *)(entry0 + pos); |
476 | 470 | ||
477 | if (!(valid_hooks & (1 << hook))) | 471 | if (!(valid_hooks & (1 << hook))) |
478 | continue; | 472 | continue; |
@@ -514,13 +508,13 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali | |||
514 | goto next; | 508 | goto next; |
515 | 509 | ||
516 | e = (struct arpt_entry *) | 510 | e = (struct arpt_entry *) |
517 | (newinfo->entries + pos); | 511 | (entry0 + pos); |
518 | } while (oldpos == pos + e->next_offset); | 512 | } while (oldpos == pos + e->next_offset); |
519 | 513 | ||
520 | /* Move along one */ | 514 | /* Move along one */ |
521 | size = e->next_offset; | 515 | size = e->next_offset; |
522 | e = (struct arpt_entry *) | 516 | e = (struct arpt_entry *) |
523 | (newinfo->entries + pos + size); | 517 | (entry0 + pos + size); |
524 | e->counters.pcnt = pos; | 518 | e->counters.pcnt = pos; |
525 | pos += size; | 519 | pos += size; |
526 | } else { | 520 | } else { |
@@ -537,7 +531,7 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali | |||
537 | newpos = pos + e->next_offset; | 531 | newpos = pos + e->next_offset; |
538 | } | 532 | } |
539 | e = (struct arpt_entry *) | 533 | e = (struct arpt_entry *) |
540 | (newinfo->entries + newpos); | 534 | (entry0 + newpos); |
541 | e->counters.pcnt = pos; | 535 | e->counters.pcnt = pos; |
542 | pos = newpos; | 536 | pos = newpos; |
543 | } | 537 | } |
@@ -689,6 +683,7 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) | |||
689 | static int translate_table(const char *name, | 683 | static int translate_table(const char *name, |
690 | unsigned int valid_hooks, | 684 | unsigned int valid_hooks, |
691 | struct arpt_table_info *newinfo, | 685 | struct arpt_table_info *newinfo, |
686 | void *entry0, | ||
692 | unsigned int size, | 687 | unsigned int size, |
693 | unsigned int number, | 688 | unsigned int number, |
694 | const unsigned int *hook_entries, | 689 | const unsigned int *hook_entries, |
@@ -710,11 +705,11 @@ static int translate_table(const char *name, | |||
710 | i = 0; | 705 | i = 0; |
711 | 706 | ||
712 | /* Walk through entries, checking offsets. */ | 707 | /* Walk through entries, checking offsets. */ |
713 | ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, | 708 | ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size, |
714 | check_entry_size_and_hooks, | 709 | check_entry_size_and_hooks, |
715 | newinfo, | 710 | newinfo, |
716 | newinfo->entries, | 711 | entry0, |
717 | newinfo->entries + size, | 712 | entry0 + size, |
718 | hook_entries, underflows, &i); | 713 | hook_entries, underflows, &i); |
719 | duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); | 714 | duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); |
720 | if (ret != 0) | 715 | if (ret != 0) |
@@ -743,29 +738,26 @@ static int translate_table(const char *name, | |||
743 | } | 738 | } |
744 | } | 739 | } |
745 | 740 | ||
746 | if (!mark_source_chains(newinfo, valid_hooks)) { | 741 | if (!mark_source_chains(newinfo, valid_hooks, entry0)) { |
747 | duprintf("Looping hook\n"); | 742 | duprintf("Looping hook\n"); |
748 | return -ELOOP; | 743 | return -ELOOP; |
749 | } | 744 | } |
750 | 745 | ||
751 | /* Finally, each sanity check must pass */ | 746 | /* Finally, each sanity check must pass */ |
752 | i = 0; | 747 | i = 0; |
753 | ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, | 748 | ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size, |
754 | check_entry, name, size, &i); | 749 | check_entry, name, size, &i); |
755 | 750 | ||
756 | if (ret != 0) { | 751 | if (ret != 0) { |
757 | ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, | 752 | ARPT_ENTRY_ITERATE(entry0, newinfo->size, |
758 | cleanup_entry, &i); | 753 | cleanup_entry, &i); |
759 | return ret; | 754 | return ret; |
760 | } | 755 | } |
761 | 756 | ||
762 | /* And one copy for every other CPU */ | 757 | /* And one copy for every other CPU */ |
763 | for_each_cpu(i) { | 758 | for_each_cpu(i) { |
764 | if (i == 0) | 759 | if (newinfo->entries[i] && newinfo->entries[i] != entry0) |
765 | continue; | 760 | memcpy(newinfo->entries[i], entry0, newinfo->size); |
766 | memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, | ||
767 | newinfo->entries, | ||
768 | SMP_ALIGN(newinfo->size)); | ||
769 | } | 761 | } |
770 | 762 | ||
771 | return ret; | 763 | return ret; |
@@ -807,15 +799,42 @@ static inline int add_entry_to_counter(const struct arpt_entry *e, | |||
807 | return 0; | 799 | return 0; |
808 | } | 800 | } |
809 | 801 | ||
802 | static inline int set_entry_to_counter(const struct arpt_entry *e, | ||
803 | struct arpt_counters total[], | ||
804 | unsigned int *i) | ||
805 | { | ||
806 | SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); | ||
807 | |||
808 | (*i)++; | ||
809 | return 0; | ||
810 | } | ||
811 | |||
810 | static void get_counters(const struct arpt_table_info *t, | 812 | static void get_counters(const struct arpt_table_info *t, |
811 | struct arpt_counters counters[]) | 813 | struct arpt_counters counters[]) |
812 | { | 814 | { |
813 | unsigned int cpu; | 815 | unsigned int cpu; |
814 | unsigned int i; | 816 | unsigned int i; |
817 | unsigned int curcpu; | ||
818 | |||
819 | /* Instead of clearing (by a previous call to memset()) | ||
820 | * the counters and using adds, we set the counters | ||
821 | * with data used by 'current' CPU | ||
822 | * We dont care about preemption here. | ||
823 | */ | ||
824 | curcpu = raw_smp_processor_id(); | ||
825 | |||
826 | i = 0; | ||
827 | ARPT_ENTRY_ITERATE(t->entries[curcpu], | ||
828 | t->size, | ||
829 | set_entry_to_counter, | ||
830 | counters, | ||
831 | &i); | ||
815 | 832 | ||
816 | for_each_cpu(cpu) { | 833 | for_each_cpu(cpu) { |
834 | if (cpu == curcpu) | ||
835 | continue; | ||
817 | i = 0; | 836 | i = 0; |
818 | ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), | 837 | ARPT_ENTRY_ITERATE(t->entries[cpu], |
819 | t->size, | 838 | t->size, |
820 | add_entry_to_counter, | 839 | add_entry_to_counter, |
821 | counters, | 840 | counters, |
@@ -831,6 +850,7 @@ static int copy_entries_to_user(unsigned int total_size, | |||
831 | struct arpt_entry *e; | 850 | struct arpt_entry *e; |
832 | struct arpt_counters *counters; | 851 | struct arpt_counters *counters; |
833 | int ret = 0; | 852 | int ret = 0; |
853 | void *loc_cpu_entry; | ||
834 | 854 | ||
835 | /* We need atomic snapshot of counters: rest doesn't change | 855 | /* We need atomic snapshot of counters: rest doesn't change |
836 | * (other than comefrom, which userspace doesn't care | 856 | * (other than comefrom, which userspace doesn't care |
@@ -843,13 +863,13 @@ static int copy_entries_to_user(unsigned int total_size, | |||
843 | return -ENOMEM; | 863 | return -ENOMEM; |
844 | 864 | ||
845 | /* First, sum counters... */ | 865 | /* First, sum counters... */ |
846 | memset(counters, 0, countersize); | ||
847 | write_lock_bh(&table->lock); | 866 | write_lock_bh(&table->lock); |
848 | get_counters(table->private, counters); | 867 | get_counters(table->private, counters); |
849 | write_unlock_bh(&table->lock); | 868 | write_unlock_bh(&table->lock); |
850 | 869 | ||
851 | /* ... then copy entire thing from CPU 0... */ | 870 | loc_cpu_entry = table->private->entries[raw_smp_processor_id()]; |
852 | if (copy_to_user(userptr, table->private->entries, total_size) != 0) { | 871 | /* ... then copy entire thing ... */ |
872 | if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { | ||
853 | ret = -EFAULT; | 873 | ret = -EFAULT; |
854 | goto free_counters; | 874 | goto free_counters; |
855 | } | 875 | } |
@@ -859,7 +879,7 @@ static int copy_entries_to_user(unsigned int total_size, | |||
859 | for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ | 879 | for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ |
860 | struct arpt_entry_target *t; | 880 | struct arpt_entry_target *t; |
861 | 881 | ||
862 | e = (struct arpt_entry *)(table->private->entries + off); | 882 | e = (struct arpt_entry *)(loc_cpu_entry + off); |
863 | if (copy_to_user(userptr + off | 883 | if (copy_to_user(userptr + off |
864 | + offsetof(struct arpt_entry, counters), | 884 | + offsetof(struct arpt_entry, counters), |
865 | &counters[num], | 885 | &counters[num], |
@@ -911,6 +931,47 @@ static int get_entries(const struct arpt_get_entries *entries, | |||
911 | return ret; | 931 | return ret; |
912 | } | 932 | } |
913 | 933 | ||
934 | static void free_table_info(struct arpt_table_info *info) | ||
935 | { | ||
936 | int cpu; | ||
937 | for_each_cpu(cpu) { | ||
938 | if (info->size <= PAGE_SIZE) | ||
939 | kfree(info->entries[cpu]); | ||
940 | else | ||
941 | vfree(info->entries[cpu]); | ||
942 | } | ||
943 | kfree(info); | ||
944 | } | ||
945 | |||
946 | static struct arpt_table_info *alloc_table_info(unsigned int size) | ||
947 | { | ||
948 | struct arpt_table_info *newinfo; | ||
949 | int cpu; | ||
950 | |||
951 | newinfo = kzalloc(sizeof(struct arpt_table_info), GFP_KERNEL); | ||
952 | if (!newinfo) | ||
953 | return NULL; | ||
954 | |||
955 | newinfo->size = size; | ||
956 | |||
957 | for_each_cpu(cpu) { | ||
958 | if (size <= PAGE_SIZE) | ||
959 | newinfo->entries[cpu] = kmalloc_node(size, | ||
960 | GFP_KERNEL, | ||
961 | cpu_to_node(cpu)); | ||
962 | else | ||
963 | newinfo->entries[cpu] = vmalloc_node(size, | ||
964 | cpu_to_node(cpu)); | ||
965 | |||
966 | if (newinfo->entries[cpu] == NULL) { | ||
967 | free_table_info(newinfo); | ||
968 | return NULL; | ||
969 | } | ||
970 | } | ||
971 | |||
972 | return newinfo; | ||
973 | } | ||
974 | |||
914 | static int do_replace(void __user *user, unsigned int len) | 975 | static int do_replace(void __user *user, unsigned int len) |
915 | { | 976 | { |
916 | int ret; | 977 | int ret; |
@@ -918,6 +979,7 @@ static int do_replace(void __user *user, unsigned int len) | |||
918 | struct arpt_table *t; | 979 | struct arpt_table *t; |
919 | struct arpt_table_info *newinfo, *oldinfo; | 980 | struct arpt_table_info *newinfo, *oldinfo; |
920 | struct arpt_counters *counters; | 981 | struct arpt_counters *counters; |
982 | void *loc_cpu_entry, *loc_cpu_old_entry; | ||
921 | 983 | ||
922 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) | 984 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) |
923 | return -EFAULT; | 985 | return -EFAULT; |
@@ -930,13 +992,13 @@ static int do_replace(void __user *user, unsigned int len) | |||
930 | if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) | 992 | if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) |
931 | return -ENOMEM; | 993 | return -ENOMEM; |
932 | 994 | ||
933 | newinfo = vmalloc(sizeof(struct arpt_table_info) | 995 | newinfo = alloc_table_info(tmp.size); |
934 | + SMP_ALIGN(tmp.size) * | ||
935 | (highest_possible_processor_id()+1)); | ||
936 | if (!newinfo) | 996 | if (!newinfo) |
937 | return -ENOMEM; | 997 | return -ENOMEM; |
938 | 998 | ||
939 | if (copy_from_user(newinfo->entries, user + sizeof(tmp), | 999 | /* choose the copy that is on our node/cpu */ |
1000 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; | ||
1001 | if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), | ||
940 | tmp.size) != 0) { | 1002 | tmp.size) != 0) { |
941 | ret = -EFAULT; | 1003 | ret = -EFAULT; |
942 | goto free_newinfo; | 1004 | goto free_newinfo; |
@@ -947,10 +1009,9 @@ static int do_replace(void __user *user, unsigned int len) | |||
947 | ret = -ENOMEM; | 1009 | ret = -ENOMEM; |
948 | goto free_newinfo; | 1010 | goto free_newinfo; |
949 | } | 1011 | } |
950 | memset(counters, 0, tmp.num_counters * sizeof(struct arpt_counters)); | ||
951 | 1012 | ||
952 | ret = translate_table(tmp.name, tmp.valid_hooks, | 1013 | ret = translate_table(tmp.name, tmp.valid_hooks, |
953 | newinfo, tmp.size, tmp.num_entries, | 1014 | newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, |
954 | tmp.hook_entry, tmp.underflow); | 1015 | tmp.hook_entry, tmp.underflow); |
955 | if (ret != 0) | 1016 | if (ret != 0) |
956 | goto free_newinfo_counters; | 1017 | goto free_newinfo_counters; |
@@ -989,8 +1050,10 @@ static int do_replace(void __user *user, unsigned int len) | |||
989 | /* Get the old counters. */ | 1050 | /* Get the old counters. */ |
990 | get_counters(oldinfo, counters); | 1051 | get_counters(oldinfo, counters); |
991 | /* Decrease module usage counts and free resource */ | 1052 | /* Decrease module usage counts and free resource */ |
992 | ARPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); | 1053 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; |
993 | vfree(oldinfo); | 1054 | ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL); |
1055 | |||
1056 | free_table_info(oldinfo); | ||
994 | if (copy_to_user(tmp.counters, counters, | 1057 | if (copy_to_user(tmp.counters, counters, |
995 | sizeof(struct arpt_counters) * tmp.num_counters) != 0) | 1058 | sizeof(struct arpt_counters) * tmp.num_counters) != 0) |
996 | ret = -EFAULT; | 1059 | ret = -EFAULT; |
@@ -1002,11 +1065,11 @@ static int do_replace(void __user *user, unsigned int len) | |||
1002 | module_put(t->me); | 1065 | module_put(t->me); |
1003 | up(&arpt_mutex); | 1066 | up(&arpt_mutex); |
1004 | free_newinfo_counters_untrans: | 1067 | free_newinfo_counters_untrans: |
1005 | ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry, NULL); | 1068 | ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); |
1006 | free_newinfo_counters: | 1069 | free_newinfo_counters: |
1007 | vfree(counters); | 1070 | vfree(counters); |
1008 | free_newinfo: | 1071 | free_newinfo: |
1009 | vfree(newinfo); | 1072 | free_table_info(newinfo); |
1010 | return ret; | 1073 | return ret; |
1011 | } | 1074 | } |
1012 | 1075 | ||
@@ -1030,6 +1093,7 @@ static int do_add_counters(void __user *user, unsigned int len) | |||
1030 | struct arpt_counters_info tmp, *paddc; | 1093 | struct arpt_counters_info tmp, *paddc; |
1031 | struct arpt_table *t; | 1094 | struct arpt_table *t; |
1032 | int ret = 0; | 1095 | int ret = 0; |
1096 | void *loc_cpu_entry; | ||
1033 | 1097 | ||
1034 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) | 1098 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) |
1035 | return -EFAULT; | 1099 | return -EFAULT; |
@@ -1059,7 +1123,9 @@ static int do_add_counters(void __user *user, unsigned int len) | |||
1059 | } | 1123 | } |
1060 | 1124 | ||
1061 | i = 0; | 1125 | i = 0; |
1062 | ARPT_ENTRY_ITERATE(t->private->entries, | 1126 | /* Choose the copy that is on our node */ |
1127 | loc_cpu_entry = t->private->entries[smp_processor_id()]; | ||
1128 | ARPT_ENTRY_ITERATE(loc_cpu_entry, | ||
1063 | t->private->size, | 1129 | t->private->size, |
1064 | add_counter_to_entry, | 1130 | add_counter_to_entry, |
1065 | paddc->counters, | 1131 | paddc->counters, |
@@ -1220,30 +1286,32 @@ int arpt_register_table(struct arpt_table *table, | |||
1220 | struct arpt_table_info *newinfo; | 1286 | struct arpt_table_info *newinfo; |
1221 | static struct arpt_table_info bootstrap | 1287 | static struct arpt_table_info bootstrap |
1222 | = { 0, 0, 0, { 0 }, { 0 }, { } }; | 1288 | = { 0, 0, 0, { 0 }, { 0 }, { } }; |
1289 | void *loc_cpu_entry; | ||
1223 | 1290 | ||
1224 | newinfo = vmalloc(sizeof(struct arpt_table_info) | 1291 | newinfo = alloc_table_info(repl->size); |
1225 | + SMP_ALIGN(repl->size) * | ||
1226 | (highest_possible_processor_id()+1)); | ||
1227 | if (!newinfo) { | 1292 | if (!newinfo) { |
1228 | ret = -ENOMEM; | 1293 | ret = -ENOMEM; |
1229 | return ret; | 1294 | return ret; |
1230 | } | 1295 | } |
1231 | memcpy(newinfo->entries, repl->entries, repl->size); | 1296 | |
1297 | /* choose the copy on our node/cpu */ | ||
1298 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; | ||
1299 | memcpy(loc_cpu_entry, repl->entries, repl->size); | ||
1232 | 1300 | ||
1233 | ret = translate_table(table->name, table->valid_hooks, | 1301 | ret = translate_table(table->name, table->valid_hooks, |
1234 | newinfo, repl->size, | 1302 | newinfo, loc_cpu_entry, repl->size, |
1235 | repl->num_entries, | 1303 | repl->num_entries, |
1236 | repl->hook_entry, | 1304 | repl->hook_entry, |
1237 | repl->underflow); | 1305 | repl->underflow); |
1238 | duprintf("arpt_register_table: translate table gives %d\n", ret); | 1306 | duprintf("arpt_register_table: translate table gives %d\n", ret); |
1239 | if (ret != 0) { | 1307 | if (ret != 0) { |
1240 | vfree(newinfo); | 1308 | free_table_info(newinfo); |
1241 | return ret; | 1309 | return ret; |
1242 | } | 1310 | } |
1243 | 1311 | ||
1244 | ret = down_interruptible(&arpt_mutex); | 1312 | ret = down_interruptible(&arpt_mutex); |
1245 | if (ret != 0) { | 1313 | if (ret != 0) { |
1246 | vfree(newinfo); | 1314 | free_table_info(newinfo); |
1247 | return ret; | 1315 | return ret; |
1248 | } | 1316 | } |
1249 | 1317 | ||
@@ -1272,20 +1340,23 @@ int arpt_register_table(struct arpt_table *table, | |||
1272 | return ret; | 1340 | return ret; |
1273 | 1341 | ||
1274 | free_unlock: | 1342 | free_unlock: |
1275 | vfree(newinfo); | 1343 | free_table_info(newinfo); |
1276 | goto unlock; | 1344 | goto unlock; |
1277 | } | 1345 | } |
1278 | 1346 | ||
1279 | void arpt_unregister_table(struct arpt_table *table) | 1347 | void arpt_unregister_table(struct arpt_table *table) |
1280 | { | 1348 | { |
1349 | void *loc_cpu_entry; | ||
1350 | |||
1281 | down(&arpt_mutex); | 1351 | down(&arpt_mutex); |
1282 | LIST_DELETE(&arpt_tables, table); | 1352 | LIST_DELETE(&arpt_tables, table); |
1283 | up(&arpt_mutex); | 1353 | up(&arpt_mutex); |
1284 | 1354 | ||
1285 | /* Decrease module usage counts and free resources */ | 1355 | /* Decrease module usage counts and free resources */ |
1286 | ARPT_ENTRY_ITERATE(table->private->entries, table->private->size, | 1356 | loc_cpu_entry = table->private->entries[raw_smp_processor_id()]; |
1357 | ARPT_ENTRY_ITERATE(loc_cpu_entry, table->private->size, | ||
1287 | cleanup_entry, NULL); | 1358 | cleanup_entry, NULL); |
1288 | vfree(table->private); | 1359 | free_table_info(table->private); |
1289 | } | 1360 | } |
1290 | 1361 | ||
1291 | /* The built-in targets: standard (NULL) and error. */ | 1362 | /* The built-in targets: standard (NULL) and error. */ |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 45886c8475e8..2a26d167e149 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -83,11 +83,6 @@ static DECLARE_MUTEX(ipt_mutex); | |||
83 | context stops packets coming through and allows user context to read | 83 | context stops packets coming through and allows user context to read |
84 | the counters or update the rules. | 84 | the counters or update the rules. |
85 | 85 | ||
86 | To be cache friendly on SMP, we arrange them like so: | ||
87 | [ n-entries ] | ||
88 | ... cache-align padding ... | ||
89 | [ n-entries ] | ||
90 | |||
91 | Hence the start of any table is given by get_table() below. */ | 86 | Hence the start of any table is given by get_table() below. */ |
92 | 87 | ||
93 | /* The table itself */ | 88 | /* The table itself */ |
@@ -105,20 +100,15 @@ struct ipt_table_info | |||
105 | unsigned int underflow[NF_IP_NUMHOOKS]; | 100 | unsigned int underflow[NF_IP_NUMHOOKS]; |
106 | 101 | ||
107 | /* ipt_entry tables: one per CPU */ | 102 | /* ipt_entry tables: one per CPU */ |
108 | char entries[0] ____cacheline_aligned; | 103 | void *entries[NR_CPUS]; |
109 | }; | 104 | }; |
110 | 105 | ||
111 | static LIST_HEAD(ipt_target); | 106 | static LIST_HEAD(ipt_target); |
112 | static LIST_HEAD(ipt_match); | 107 | static LIST_HEAD(ipt_match); |
113 | static LIST_HEAD(ipt_tables); | 108 | static LIST_HEAD(ipt_tables); |
109 | #define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0) | ||
114 | #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) | 110 | #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) |
115 | 111 | ||
116 | #ifdef CONFIG_SMP | ||
117 | #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p)) | ||
118 | #else | ||
119 | #define TABLE_OFFSET(t,p) 0 | ||
120 | #endif | ||
121 | |||
122 | #if 0 | 112 | #if 0 |
123 | #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0) | 113 | #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0) |
124 | #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; }) | 114 | #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; }) |
@@ -290,8 +280,7 @@ ipt_do_table(struct sk_buff **pskb, | |||
290 | 280 | ||
291 | read_lock_bh(&table->lock); | 281 | read_lock_bh(&table->lock); |
292 | IP_NF_ASSERT(table->valid_hooks & (1 << hook)); | 282 | IP_NF_ASSERT(table->valid_hooks & (1 << hook)); |
293 | table_base = (void *)table->private->entries | 283 | table_base = (void *)table->private->entries[smp_processor_id()]; |
294 | + TABLE_OFFSET(table->private, smp_processor_id()); | ||
295 | e = get_entry(table_base, table->private->hook_entry[hook]); | 284 | e = get_entry(table_base, table->private->hook_entry[hook]); |
296 | 285 | ||
297 | #ifdef CONFIG_NETFILTER_DEBUG | 286 | #ifdef CONFIG_NETFILTER_DEBUG |
@@ -563,7 +552,8 @@ unconditional(const struct ipt_ip *ip) | |||
563 | /* Figures out from what hook each rule can be called: returns 0 if | 552 | /* Figures out from what hook each rule can be called: returns 0 if |
564 | there are loops. Puts hook bitmask in comefrom. */ | 553 | there are loops. Puts hook bitmask in comefrom. */ |
565 | static int | 554 | static int |
566 | mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks) | 555 | mark_source_chains(struct ipt_table_info *newinfo, |
556 | unsigned int valid_hooks, void *entry0) | ||
567 | { | 557 | { |
568 | unsigned int hook; | 558 | unsigned int hook; |
569 | 559 | ||
@@ -572,7 +562,7 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks) | |||
572 | for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) { | 562 | for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) { |
573 | unsigned int pos = newinfo->hook_entry[hook]; | 563 | unsigned int pos = newinfo->hook_entry[hook]; |
574 | struct ipt_entry *e | 564 | struct ipt_entry *e |
575 | = (struct ipt_entry *)(newinfo->entries + pos); | 565 | = (struct ipt_entry *)(entry0 + pos); |
576 | 566 | ||
577 | if (!(valid_hooks & (1 << hook))) | 567 | if (!(valid_hooks & (1 << hook))) |
578 | continue; | 568 | continue; |
@@ -622,13 +612,13 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks) | |||
622 | goto next; | 612 | goto next; |
623 | 613 | ||
624 | e = (struct ipt_entry *) | 614 | e = (struct ipt_entry *) |
625 | (newinfo->entries + pos); | 615 | (entry0 + pos); |
626 | } while (oldpos == pos + e->next_offset); | 616 | } while (oldpos == pos + e->next_offset); |
627 | 617 | ||
628 | /* Move along one */ | 618 | /* Move along one */ |
629 | size = e->next_offset; | 619 | size = e->next_offset; |
630 | e = (struct ipt_entry *) | 620 | e = (struct ipt_entry *) |
631 | (newinfo->entries + pos + size); | 621 | (entry0 + pos + size); |
632 | e->counters.pcnt = pos; | 622 | e->counters.pcnt = pos; |
633 | pos += size; | 623 | pos += size; |
634 | } else { | 624 | } else { |
@@ -645,7 +635,7 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks) | |||
645 | newpos = pos + e->next_offset; | 635 | newpos = pos + e->next_offset; |
646 | } | 636 | } |
647 | e = (struct ipt_entry *) | 637 | e = (struct ipt_entry *) |
648 | (newinfo->entries + newpos); | 638 | (entry0 + newpos); |
649 | e->counters.pcnt = pos; | 639 | e->counters.pcnt = pos; |
650 | pos = newpos; | 640 | pos = newpos; |
651 | } | 641 | } |
@@ -855,6 +845,7 @@ static int | |||
855 | translate_table(const char *name, | 845 | translate_table(const char *name, |
856 | unsigned int valid_hooks, | 846 | unsigned int valid_hooks, |
857 | struct ipt_table_info *newinfo, | 847 | struct ipt_table_info *newinfo, |
848 | void *entry0, | ||
858 | unsigned int size, | 849 | unsigned int size, |
859 | unsigned int number, | 850 | unsigned int number, |
860 | const unsigned int *hook_entries, | 851 | const unsigned int *hook_entries, |
@@ -875,11 +866,11 @@ translate_table(const char *name, | |||
875 | duprintf("translate_table: size %u\n", newinfo->size); | 866 | duprintf("translate_table: size %u\n", newinfo->size); |
876 | i = 0; | 867 | i = 0; |
877 | /* Walk through entries, checking offsets. */ | 868 | /* Walk through entries, checking offsets. */ |
878 | ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, | 869 | ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, |
879 | check_entry_size_and_hooks, | 870 | check_entry_size_and_hooks, |
880 | newinfo, | 871 | newinfo, |
881 | newinfo->entries, | 872 | entry0, |
882 | newinfo->entries + size, | 873 | entry0 + size, |
883 | hook_entries, underflows, &i); | 874 | hook_entries, underflows, &i); |
884 | if (ret != 0) | 875 | if (ret != 0) |
885 | return ret; | 876 | return ret; |
@@ -907,27 +898,24 @@ translate_table(const char *name, | |||
907 | } | 898 | } |
908 | } | 899 | } |
909 | 900 | ||
910 | if (!mark_source_chains(newinfo, valid_hooks)) | 901 | if (!mark_source_chains(newinfo, valid_hooks, entry0)) |
911 | return -ELOOP; | 902 | return -ELOOP; |
912 | 903 | ||
913 | /* Finally, each sanity check must pass */ | 904 | /* Finally, each sanity check must pass */ |
914 | i = 0; | 905 | i = 0; |
915 | ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, | 906 | ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, |
916 | check_entry, name, size, &i); | 907 | check_entry, name, size, &i); |
917 | 908 | ||
918 | if (ret != 0) { | 909 | if (ret != 0) { |
919 | IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, | 910 | IPT_ENTRY_ITERATE(entry0, newinfo->size, |
920 | cleanup_entry, &i); | 911 | cleanup_entry, &i); |
921 | return ret; | 912 | return ret; |
922 | } | 913 | } |
923 | 914 | ||
924 | /* And one copy for every other CPU */ | 915 | /* And one copy for every other CPU */ |
925 | for_each_cpu(i) { | 916 | for_each_cpu(i) { |
926 | if (i == 0) | 917 | if (newinfo->entries[i] && newinfo->entries[i] != entry0) |
927 | continue; | 918 | memcpy(newinfo->entries[i], entry0, newinfo->size); |
928 | memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, | ||
929 | newinfo->entries, | ||
930 | SMP_ALIGN(newinfo->size)); | ||
931 | } | 919 | } |
932 | 920 | ||
933 | return ret; | 921 | return ret; |
@@ -943,15 +931,12 @@ replace_table(struct ipt_table *table, | |||
943 | 931 | ||
944 | #ifdef CONFIG_NETFILTER_DEBUG | 932 | #ifdef CONFIG_NETFILTER_DEBUG |
945 | { | 933 | { |
946 | struct ipt_entry *table_base; | 934 | int cpu; |
947 | unsigned int i; | ||
948 | 935 | ||
949 | for_each_cpu(i) { | 936 | for_each_cpu(cpu) { |
950 | table_base = | 937 | struct ipt_entry *table_base = newinfo->entries[cpu]; |
951 | (void *)newinfo->entries | 938 | if (table_base) |
952 | + TABLE_OFFSET(newinfo, i); | 939 | table_base->comefrom = 0xdead57ac; |
953 | |||
954 | table_base->comefrom = 0xdead57ac; | ||
955 | } | 940 | } |
956 | } | 941 | } |
957 | #endif | 942 | #endif |
@@ -986,16 +971,44 @@ add_entry_to_counter(const struct ipt_entry *e, | |||
986 | return 0; | 971 | return 0; |
987 | } | 972 | } |
988 | 973 | ||
974 | static inline int | ||
975 | set_entry_to_counter(const struct ipt_entry *e, | ||
976 | struct ipt_counters total[], | ||
977 | unsigned int *i) | ||
978 | { | ||
979 | SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); | ||
980 | |||
981 | (*i)++; | ||
982 | return 0; | ||
983 | } | ||
984 | |||
989 | static void | 985 | static void |
990 | get_counters(const struct ipt_table_info *t, | 986 | get_counters(const struct ipt_table_info *t, |
991 | struct ipt_counters counters[]) | 987 | struct ipt_counters counters[]) |
992 | { | 988 | { |
993 | unsigned int cpu; | 989 | unsigned int cpu; |
994 | unsigned int i; | 990 | unsigned int i; |
991 | unsigned int curcpu; | ||
992 | |||
993 | /* Instead of clearing (by a previous call to memset()) | ||
994 | * the counters and using adds, we set the counters | ||
995 | * with data used by 'current' CPU | ||
996 | * We dont care about preemption here. | ||
997 | */ | ||
998 | curcpu = raw_smp_processor_id(); | ||
999 | |||
1000 | i = 0; | ||
1001 | IPT_ENTRY_ITERATE(t->entries[curcpu], | ||
1002 | t->size, | ||
1003 | set_entry_to_counter, | ||
1004 | counters, | ||
1005 | &i); | ||
995 | 1006 | ||
996 | for_each_cpu(cpu) { | 1007 | for_each_cpu(cpu) { |
1008 | if (cpu == curcpu) | ||
1009 | continue; | ||
997 | i = 0; | 1010 | i = 0; |
998 | IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), | 1011 | IPT_ENTRY_ITERATE(t->entries[cpu], |
999 | t->size, | 1012 | t->size, |
1000 | add_entry_to_counter, | 1013 | add_entry_to_counter, |
1001 | counters, | 1014 | counters, |
@@ -1012,24 +1025,29 @@ copy_entries_to_user(unsigned int total_size, | |||
1012 | struct ipt_entry *e; | 1025 | struct ipt_entry *e; |
1013 | struct ipt_counters *counters; | 1026 | struct ipt_counters *counters; |
1014 | int ret = 0; | 1027 | int ret = 0; |
1028 | void *loc_cpu_entry; | ||
1015 | 1029 | ||
1016 | /* We need atomic snapshot of counters: rest doesn't change | 1030 | /* We need atomic snapshot of counters: rest doesn't change |
1017 | (other than comefrom, which userspace doesn't care | 1031 | (other than comefrom, which userspace doesn't care |
1018 | about). */ | 1032 | about). */ |
1019 | countersize = sizeof(struct ipt_counters) * table->private->number; | 1033 | countersize = sizeof(struct ipt_counters) * table->private->number; |
1020 | counters = vmalloc(countersize); | 1034 | counters = vmalloc_node(countersize, numa_node_id()); |
1021 | 1035 | ||
1022 | if (counters == NULL) | 1036 | if (counters == NULL) |
1023 | return -ENOMEM; | 1037 | return -ENOMEM; |
1024 | 1038 | ||
1025 | /* First, sum counters... */ | 1039 | /* First, sum counters... */ |
1026 | memset(counters, 0, countersize); | ||
1027 | write_lock_bh(&table->lock); | 1040 | write_lock_bh(&table->lock); |
1028 | get_counters(table->private, counters); | 1041 | get_counters(table->private, counters); |
1029 | write_unlock_bh(&table->lock); | 1042 | write_unlock_bh(&table->lock); |
1030 | 1043 | ||
1031 | /* ... then copy entire thing from CPU 0... */ | 1044 | /* choose the copy that is on our node/cpu, ... |
1032 | if (copy_to_user(userptr, table->private->entries, total_size) != 0) { | 1045 | * This choice is lazy (because current thread is |
1046 | * allowed to migrate to another cpu) | ||
1047 | */ | ||
1048 | loc_cpu_entry = table->private->entries[raw_smp_processor_id()]; | ||
1049 | /* ... then copy entire thing ... */ | ||
1050 | if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { | ||
1033 | ret = -EFAULT; | 1051 | ret = -EFAULT; |
1034 | goto free_counters; | 1052 | goto free_counters; |
1035 | } | 1053 | } |
@@ -1041,7 +1059,7 @@ copy_entries_to_user(unsigned int total_size, | |||
1041 | struct ipt_entry_match *m; | 1059 | struct ipt_entry_match *m; |
1042 | struct ipt_entry_target *t; | 1060 | struct ipt_entry_target *t; |
1043 | 1061 | ||
1044 | e = (struct ipt_entry *)(table->private->entries + off); | 1062 | e = (struct ipt_entry *)(loc_cpu_entry + off); |
1045 | if (copy_to_user(userptr + off | 1063 | if (copy_to_user(userptr + off |
1046 | + offsetof(struct ipt_entry, counters), | 1064 | + offsetof(struct ipt_entry, counters), |
1047 | &counters[num], | 1065 | &counters[num], |
@@ -1110,6 +1128,45 @@ get_entries(const struct ipt_get_entries *entries, | |||
1110 | return ret; | 1128 | return ret; |
1111 | } | 1129 | } |
1112 | 1130 | ||
1131 | static void free_table_info(struct ipt_table_info *info) | ||
1132 | { | ||
1133 | int cpu; | ||
1134 | for_each_cpu(cpu) { | ||
1135 | if (info->size <= PAGE_SIZE) | ||
1136 | kfree(info->entries[cpu]); | ||
1137 | else | ||
1138 | vfree(info->entries[cpu]); | ||
1139 | } | ||
1140 | kfree(info); | ||
1141 | } | ||
1142 | |||
1143 | static struct ipt_table_info *alloc_table_info(unsigned int size) | ||
1144 | { | ||
1145 | struct ipt_table_info *newinfo; | ||
1146 | int cpu; | ||
1147 | |||
1148 | newinfo = kzalloc(sizeof(struct ipt_table_info), GFP_KERNEL); | ||
1149 | if (!newinfo) | ||
1150 | return NULL; | ||
1151 | |||
1152 | newinfo->size = size; | ||
1153 | |||
1154 | for_each_cpu(cpu) { | ||
1155 | if (size <= PAGE_SIZE) | ||
1156 | newinfo->entries[cpu] = kmalloc_node(size, | ||
1157 | GFP_KERNEL, | ||
1158 | cpu_to_node(cpu)); | ||
1159 | else | ||
1160 | newinfo->entries[cpu] = vmalloc_node(size, cpu_to_node(cpu)); | ||
1161 | if (newinfo->entries[cpu] == 0) { | ||
1162 | free_table_info(newinfo); | ||
1163 | return NULL; | ||
1164 | } | ||
1165 | } | ||
1166 | |||
1167 | return newinfo; | ||
1168 | } | ||
1169 | |||
1113 | static int | 1170 | static int |
1114 | do_replace(void __user *user, unsigned int len) | 1171 | do_replace(void __user *user, unsigned int len) |
1115 | { | 1172 | { |
@@ -1118,6 +1175,7 @@ do_replace(void __user *user, unsigned int len) | |||
1118 | struct ipt_table *t; | 1175 | struct ipt_table *t; |
1119 | struct ipt_table_info *newinfo, *oldinfo; | 1176 | struct ipt_table_info *newinfo, *oldinfo; |
1120 | struct ipt_counters *counters; | 1177 | struct ipt_counters *counters; |
1178 | void *loc_cpu_entry, *loc_cpu_old_entry; | ||
1121 | 1179 | ||
1122 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) | 1180 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) |
1123 | return -EFAULT; | 1181 | return -EFAULT; |
@@ -1130,13 +1188,13 @@ do_replace(void __user *user, unsigned int len) | |||
1130 | if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) | 1188 | if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) |
1131 | return -ENOMEM; | 1189 | return -ENOMEM; |
1132 | 1190 | ||
1133 | newinfo = vmalloc(sizeof(struct ipt_table_info) | 1191 | newinfo = alloc_table_info(tmp.size); |
1134 | + SMP_ALIGN(tmp.size) * | ||
1135 | (highest_possible_processor_id()+1)); | ||
1136 | if (!newinfo) | 1192 | if (!newinfo) |
1137 | return -ENOMEM; | 1193 | return -ENOMEM; |
1138 | 1194 | ||
1139 | if (copy_from_user(newinfo->entries, user + sizeof(tmp), | 1195 | /* choose the copy that is our node/cpu */ |
1196 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; | ||
1197 | if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), | ||
1140 | tmp.size) != 0) { | 1198 | tmp.size) != 0) { |
1141 | ret = -EFAULT; | 1199 | ret = -EFAULT; |
1142 | goto free_newinfo; | 1200 | goto free_newinfo; |
@@ -1147,10 +1205,9 @@ do_replace(void __user *user, unsigned int len) | |||
1147 | ret = -ENOMEM; | 1205 | ret = -ENOMEM; |
1148 | goto free_newinfo; | 1206 | goto free_newinfo; |
1149 | } | 1207 | } |
1150 | memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters)); | ||
1151 | 1208 | ||
1152 | ret = translate_table(tmp.name, tmp.valid_hooks, | 1209 | ret = translate_table(tmp.name, tmp.valid_hooks, |
1153 | newinfo, tmp.size, tmp.num_entries, | 1210 | newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, |
1154 | tmp.hook_entry, tmp.underflow); | 1211 | tmp.hook_entry, tmp.underflow); |
1155 | if (ret != 0) | 1212 | if (ret != 0) |
1156 | goto free_newinfo_counters; | 1213 | goto free_newinfo_counters; |
@@ -1189,8 +1246,9 @@ do_replace(void __user *user, unsigned int len) | |||
1189 | /* Get the old counters. */ | 1246 | /* Get the old counters. */ |
1190 | get_counters(oldinfo, counters); | 1247 | get_counters(oldinfo, counters); |
1191 | /* Decrease module usage counts and free resource */ | 1248 | /* Decrease module usage counts and free resource */ |
1192 | IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); | 1249 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; |
1193 | vfree(oldinfo); | 1250 | IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL); |
1251 | free_table_info(oldinfo); | ||
1194 | if (copy_to_user(tmp.counters, counters, | 1252 | if (copy_to_user(tmp.counters, counters, |
1195 | sizeof(struct ipt_counters) * tmp.num_counters) != 0) | 1253 | sizeof(struct ipt_counters) * tmp.num_counters) != 0) |
1196 | ret = -EFAULT; | 1254 | ret = -EFAULT; |
@@ -1202,11 +1260,11 @@ do_replace(void __user *user, unsigned int len) | |||
1202 | module_put(t->me); | 1260 | module_put(t->me); |
1203 | up(&ipt_mutex); | 1261 | up(&ipt_mutex); |
1204 | free_newinfo_counters_untrans: | 1262 | free_newinfo_counters_untrans: |
1205 | IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL); | 1263 | IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL); |
1206 | free_newinfo_counters: | 1264 | free_newinfo_counters: |
1207 | vfree(counters); | 1265 | vfree(counters); |
1208 | free_newinfo: | 1266 | free_newinfo: |
1209 | vfree(newinfo); | 1267 | free_table_info(newinfo); |
1210 | return ret; | 1268 | return ret; |
1211 | } | 1269 | } |
1212 | 1270 | ||
@@ -1239,6 +1297,7 @@ do_add_counters(void __user *user, unsigned int len) | |||
1239 | struct ipt_counters_info tmp, *paddc; | 1297 | struct ipt_counters_info tmp, *paddc; |
1240 | struct ipt_table *t; | 1298 | struct ipt_table *t; |
1241 | int ret = 0; | 1299 | int ret = 0; |
1300 | void *loc_cpu_entry; | ||
1242 | 1301 | ||
1243 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) | 1302 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) |
1244 | return -EFAULT; | 1303 | return -EFAULT; |
@@ -1246,7 +1305,7 @@ do_add_counters(void __user *user, unsigned int len) | |||
1246 | if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters)) | 1305 | if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters)) |
1247 | return -EINVAL; | 1306 | return -EINVAL; |
1248 | 1307 | ||
1249 | paddc = vmalloc(len); | 1308 | paddc = vmalloc_node(len, numa_node_id()); |
1250 | if (!paddc) | 1309 | if (!paddc) |
1251 | return -ENOMEM; | 1310 | return -ENOMEM; |
1252 | 1311 | ||
@@ -1268,7 +1327,9 @@ do_add_counters(void __user *user, unsigned int len) | |||
1268 | } | 1327 | } |
1269 | 1328 | ||
1270 | i = 0; | 1329 | i = 0; |
1271 | IPT_ENTRY_ITERATE(t->private->entries, | 1330 | /* Choose the copy that is on our node */ |
1331 | loc_cpu_entry = t->private->entries[raw_smp_processor_id()]; | ||
1332 | IPT_ENTRY_ITERATE(loc_cpu_entry, | ||
1272 | t->private->size, | 1333 | t->private->size, |
1273 | add_counter_to_entry, | 1334 | add_counter_to_entry, |
1274 | paddc->counters, | 1335 | paddc->counters, |
@@ -1460,28 +1521,31 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl) | |||
1460 | struct ipt_table_info *newinfo; | 1521 | struct ipt_table_info *newinfo; |
1461 | static struct ipt_table_info bootstrap | 1522 | static struct ipt_table_info bootstrap |
1462 | = { 0, 0, 0, { 0 }, { 0 }, { } }; | 1523 | = { 0, 0, 0, { 0 }, { 0 }, { } }; |
1524 | void *loc_cpu_entry; | ||
1463 | 1525 | ||
1464 | newinfo = vmalloc(sizeof(struct ipt_table_info) | 1526 | newinfo = alloc_table_info(repl->size); |
1465 | + SMP_ALIGN(repl->size) * | ||
1466 | (highest_possible_processor_id()+1)); | ||
1467 | if (!newinfo) | 1527 | if (!newinfo) |
1468 | return -ENOMEM; | 1528 | return -ENOMEM; |
1469 | 1529 | ||
1470 | memcpy(newinfo->entries, repl->entries, repl->size); | 1530 | /* choose the copy on our node/cpu |
1531 | * but dont care of preemption | ||
1532 | */ | ||
1533 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; | ||
1534 | memcpy(loc_cpu_entry, repl->entries, repl->size); | ||
1471 | 1535 | ||
1472 | ret = translate_table(table->name, table->valid_hooks, | 1536 | ret = translate_table(table->name, table->valid_hooks, |
1473 | newinfo, repl->size, | 1537 | newinfo, loc_cpu_entry, repl->size, |
1474 | repl->num_entries, | 1538 | repl->num_entries, |
1475 | repl->hook_entry, | 1539 | repl->hook_entry, |
1476 | repl->underflow); | 1540 | repl->underflow); |
1477 | if (ret != 0) { | 1541 | if (ret != 0) { |
1478 | vfree(newinfo); | 1542 | free_table_info(newinfo); |
1479 | return ret; | 1543 | return ret; |
1480 | } | 1544 | } |
1481 | 1545 | ||
1482 | ret = down_interruptible(&ipt_mutex); | 1546 | ret = down_interruptible(&ipt_mutex); |
1483 | if (ret != 0) { | 1547 | if (ret != 0) { |
1484 | vfree(newinfo); | 1548 | free_table_info(newinfo); |
1485 | return ret; | 1549 | return ret; |
1486 | } | 1550 | } |
1487 | 1551 | ||
@@ -1510,20 +1574,23 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl) | |||
1510 | return ret; | 1574 | return ret; |
1511 | 1575 | ||
1512 | free_unlock: | 1576 | free_unlock: |
1513 | vfree(newinfo); | 1577 | free_table_info(newinfo); |
1514 | goto unlock; | 1578 | goto unlock; |
1515 | } | 1579 | } |
1516 | 1580 | ||
1517 | void ipt_unregister_table(struct ipt_table *table) | 1581 | void ipt_unregister_table(struct ipt_table *table) |
1518 | { | 1582 | { |
1583 | void *loc_cpu_entry; | ||
1584 | |||
1519 | down(&ipt_mutex); | 1585 | down(&ipt_mutex); |
1520 | LIST_DELETE(&ipt_tables, table); | 1586 | LIST_DELETE(&ipt_tables, table); |
1521 | up(&ipt_mutex); | 1587 | up(&ipt_mutex); |
1522 | 1588 | ||
1523 | /* Decrease module usage counts and free resources */ | 1589 | /* Decrease module usage counts and free resources */ |
1524 | IPT_ENTRY_ITERATE(table->private->entries, table->private->size, | 1590 | loc_cpu_entry = table->private->entries[raw_smp_processor_id()]; |
1591 | IPT_ENTRY_ITERATE(loc_cpu_entry, table->private->size, | ||
1525 | cleanup_entry, NULL); | 1592 | cleanup_entry, NULL); |
1526 | vfree(table->private); | 1593 | free_table_info(table->private); |
1527 | } | 1594 | } |
1528 | 1595 | ||
1529 | /* Returns 1 if the port is matched by the range, 0 otherwise */ | 1596 | /* Returns 1 if the port is matched by the range, 0 otherwise */ |
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 95d469271c4d..dd80020d8740 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c | |||
@@ -86,11 +86,6 @@ static DECLARE_MUTEX(ip6t_mutex); | |||
86 | context stops packets coming through and allows user context to read | 86 | context stops packets coming through and allows user context to read |
87 | the counters or update the rules. | 87 | the counters or update the rules. |
88 | 88 | ||
89 | To be cache friendly on SMP, we arrange them like so: | ||
90 | [ n-entries ] | ||
91 | ... cache-align padding ... | ||
92 | [ n-entries ] | ||
93 | |||
94 | Hence the start of any table is given by get_table() below. */ | 89 | Hence the start of any table is given by get_table() below. */ |
95 | 90 | ||
96 | /* The table itself */ | 91 | /* The table itself */ |
@@ -108,20 +103,15 @@ struct ip6t_table_info | |||
108 | unsigned int underflow[NF_IP6_NUMHOOKS]; | 103 | unsigned int underflow[NF_IP6_NUMHOOKS]; |
109 | 104 | ||
110 | /* ip6t_entry tables: one per CPU */ | 105 | /* ip6t_entry tables: one per CPU */ |
111 | char entries[0] ____cacheline_aligned; | 106 | void *entries[NR_CPUS]; |
112 | }; | 107 | }; |
113 | 108 | ||
114 | static LIST_HEAD(ip6t_target); | 109 | static LIST_HEAD(ip6t_target); |
115 | static LIST_HEAD(ip6t_match); | 110 | static LIST_HEAD(ip6t_match); |
116 | static LIST_HEAD(ip6t_tables); | 111 | static LIST_HEAD(ip6t_tables); |
112 | #define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0) | ||
117 | #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) | 113 | #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) |
118 | 114 | ||
119 | #ifdef CONFIG_SMP | ||
120 | #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p)) | ||
121 | #else | ||
122 | #define TABLE_OFFSET(t,p) 0 | ||
123 | #endif | ||
124 | |||
125 | #if 0 | 115 | #if 0 |
126 | #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0) | 116 | #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0) |
127 | #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; }) | 117 | #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; }) |
@@ -376,8 +366,7 @@ ip6t_do_table(struct sk_buff **pskb, | |||
376 | 366 | ||
377 | read_lock_bh(&table->lock); | 367 | read_lock_bh(&table->lock); |
378 | IP_NF_ASSERT(table->valid_hooks & (1 << hook)); | 368 | IP_NF_ASSERT(table->valid_hooks & (1 << hook)); |
379 | table_base = (void *)table->private->entries | 369 | table_base = (void *)table->private->entries[smp_processor_id()]; |
380 | + TABLE_OFFSET(table->private, smp_processor_id()); | ||
381 | e = get_entry(table_base, table->private->hook_entry[hook]); | 370 | e = get_entry(table_base, table->private->hook_entry[hook]); |
382 | 371 | ||
383 | #ifdef CONFIG_NETFILTER_DEBUG | 372 | #ifdef CONFIG_NETFILTER_DEBUG |
@@ -649,7 +638,8 @@ unconditional(const struct ip6t_ip6 *ipv6) | |||
649 | /* Figures out from what hook each rule can be called: returns 0 if | 638 | /* Figures out from what hook each rule can be called: returns 0 if |
650 | there are loops. Puts hook bitmask in comefrom. */ | 639 | there are loops. Puts hook bitmask in comefrom. */ |
651 | static int | 640 | static int |
652 | mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) | 641 | mark_source_chains(struct ip6t_table_info *newinfo, |
642 | unsigned int valid_hooks, void *entry0) | ||
653 | { | 643 | { |
654 | unsigned int hook; | 644 | unsigned int hook; |
655 | 645 | ||
@@ -658,7 +648,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) | |||
658 | for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) { | 648 | for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) { |
659 | unsigned int pos = newinfo->hook_entry[hook]; | 649 | unsigned int pos = newinfo->hook_entry[hook]; |
660 | struct ip6t_entry *e | 650 | struct ip6t_entry *e |
661 | = (struct ip6t_entry *)(newinfo->entries + pos); | 651 | = (struct ip6t_entry *)(entry0 + pos); |
662 | 652 | ||
663 | if (!(valid_hooks & (1 << hook))) | 653 | if (!(valid_hooks & (1 << hook))) |
664 | continue; | 654 | continue; |
@@ -708,13 +698,13 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) | |||
708 | goto next; | 698 | goto next; |
709 | 699 | ||
710 | e = (struct ip6t_entry *) | 700 | e = (struct ip6t_entry *) |
711 | (newinfo->entries + pos); | 701 | (entry0 + pos); |
712 | } while (oldpos == pos + e->next_offset); | 702 | } while (oldpos == pos + e->next_offset); |
713 | 703 | ||
714 | /* Move along one */ | 704 | /* Move along one */ |
715 | size = e->next_offset; | 705 | size = e->next_offset; |
716 | e = (struct ip6t_entry *) | 706 | e = (struct ip6t_entry *) |
717 | (newinfo->entries + pos + size); | 707 | (entry0 + pos + size); |
718 | e->counters.pcnt = pos; | 708 | e->counters.pcnt = pos; |
719 | pos += size; | 709 | pos += size; |
720 | } else { | 710 | } else { |
@@ -731,7 +721,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) | |||
731 | newpos = pos + e->next_offset; | 721 | newpos = pos + e->next_offset; |
732 | } | 722 | } |
733 | e = (struct ip6t_entry *) | 723 | e = (struct ip6t_entry *) |
734 | (newinfo->entries + newpos); | 724 | (entry0 + newpos); |
735 | e->counters.pcnt = pos; | 725 | e->counters.pcnt = pos; |
736 | pos = newpos; | 726 | pos = newpos; |
737 | } | 727 | } |
@@ -941,6 +931,7 @@ static int | |||
941 | translate_table(const char *name, | 931 | translate_table(const char *name, |
942 | unsigned int valid_hooks, | 932 | unsigned int valid_hooks, |
943 | struct ip6t_table_info *newinfo, | 933 | struct ip6t_table_info *newinfo, |
934 | void *entry0, | ||
944 | unsigned int size, | 935 | unsigned int size, |
945 | unsigned int number, | 936 | unsigned int number, |
946 | const unsigned int *hook_entries, | 937 | const unsigned int *hook_entries, |
@@ -961,11 +952,11 @@ translate_table(const char *name, | |||
961 | duprintf("translate_table: size %u\n", newinfo->size); | 952 | duprintf("translate_table: size %u\n", newinfo->size); |
962 | i = 0; | 953 | i = 0; |
963 | /* Walk through entries, checking offsets. */ | 954 | /* Walk through entries, checking offsets. */ |
964 | ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, | 955 | ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size, |
965 | check_entry_size_and_hooks, | 956 | check_entry_size_and_hooks, |
966 | newinfo, | 957 | newinfo, |
967 | newinfo->entries, | 958 | entry0, |
968 | newinfo->entries + size, | 959 | entry0 + size, |
969 | hook_entries, underflows, &i); | 960 | hook_entries, underflows, &i); |
970 | if (ret != 0) | 961 | if (ret != 0) |
971 | return ret; | 962 | return ret; |
@@ -993,27 +984,24 @@ translate_table(const char *name, | |||
993 | } | 984 | } |
994 | } | 985 | } |
995 | 986 | ||
996 | if (!mark_source_chains(newinfo, valid_hooks)) | 987 | if (!mark_source_chains(newinfo, valid_hooks, entry0)) |
997 | return -ELOOP; | 988 | return -ELOOP; |
998 | 989 | ||
999 | /* Finally, each sanity check must pass */ | 990 | /* Finally, each sanity check must pass */ |
1000 | i = 0; | 991 | i = 0; |
1001 | ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, | 992 | ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size, |
1002 | check_entry, name, size, &i); | 993 | check_entry, name, size, &i); |
1003 | 994 | ||
1004 | if (ret != 0) { | 995 | if (ret != 0) { |
1005 | IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, | 996 | IP6T_ENTRY_ITERATE(entry0, newinfo->size, |
1006 | cleanup_entry, &i); | 997 | cleanup_entry, &i); |
1007 | return ret; | 998 | return ret; |
1008 | } | 999 | } |
1009 | 1000 | ||
1010 | /* And one copy for every other CPU */ | 1001 | /* And one copy for every other CPU */ |
1011 | for_each_cpu(i) { | 1002 | for_each_cpu(i) { |
1012 | if (i == 0) | 1003 | if (newinfo->entries[i] && newinfo->entries[i] != entry0) |
1013 | continue; | 1004 | memcpy(newinfo->entries[i], entry0, newinfo->size); |
1014 | memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, | ||
1015 | newinfo->entries, | ||
1016 | SMP_ALIGN(newinfo->size)); | ||
1017 | } | 1005 | } |
1018 | 1006 | ||
1019 | return ret; | 1007 | return ret; |
@@ -1029,15 +1017,12 @@ replace_table(struct ip6t_table *table, | |||
1029 | 1017 | ||
1030 | #ifdef CONFIG_NETFILTER_DEBUG | 1018 | #ifdef CONFIG_NETFILTER_DEBUG |
1031 | { | 1019 | { |
1032 | struct ip6t_entry *table_base; | 1020 | int cpu; |
1033 | unsigned int i; | ||
1034 | 1021 | ||
1035 | for_each_cpu(i) { | 1022 | for_each_cpu(cpu) { |
1036 | table_base = | 1023 | struct ip6t_entry *table_base = newinfo->entries[cpu]; |
1037 | (void *)newinfo->entries | 1024 | if (table_base) |
1038 | + TABLE_OFFSET(newinfo, i); | 1025 | table_base->comefrom = 0xdead57ac; |
1039 | |||
1040 | table_base->comefrom = 0xdead57ac; | ||
1041 | } | 1026 | } |
1042 | } | 1027 | } |
1043 | #endif | 1028 | #endif |
@@ -1072,16 +1057,44 @@ add_entry_to_counter(const struct ip6t_entry *e, | |||
1072 | return 0; | 1057 | return 0; |
1073 | } | 1058 | } |
1074 | 1059 | ||
1060 | static inline int | ||
1061 | set_entry_to_counter(const struct ip6t_entry *e, | ||
1062 | struct ip6t_counters total[], | ||
1063 | unsigned int *i) | ||
1064 | { | ||
1065 | SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); | ||
1066 | |||
1067 | (*i)++; | ||
1068 | return 0; | ||
1069 | } | ||
1070 | |||
1075 | static void | 1071 | static void |
1076 | get_counters(const struct ip6t_table_info *t, | 1072 | get_counters(const struct ip6t_table_info *t, |
1077 | struct ip6t_counters counters[]) | 1073 | struct ip6t_counters counters[]) |
1078 | { | 1074 | { |
1079 | unsigned int cpu; | 1075 | unsigned int cpu; |
1080 | unsigned int i; | 1076 | unsigned int i; |
1077 | unsigned int curcpu; | ||
1078 | |||
1079 | /* Instead of clearing (by a previous call to memset()) | ||
1080 | * the counters and using adds, we set the counters | ||
1081 | * with data used by 'current' CPU | ||
1082 | * We dont care about preemption here. | ||
1083 | */ | ||
1084 | curcpu = raw_smp_processor_id(); | ||
1085 | |||
1086 | i = 0; | ||
1087 | IP6T_ENTRY_ITERATE(t->entries[curcpu], | ||
1088 | t->size, | ||
1089 | set_entry_to_counter, | ||
1090 | counters, | ||
1091 | &i); | ||
1081 | 1092 | ||
1082 | for_each_cpu(cpu) { | 1093 | for_each_cpu(cpu) { |
1094 | if (cpu == curcpu) | ||
1095 | continue; | ||
1083 | i = 0; | 1096 | i = 0; |
1084 | IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), | 1097 | IP6T_ENTRY_ITERATE(t->entries[cpu], |
1085 | t->size, | 1098 | t->size, |
1086 | add_entry_to_counter, | 1099 | add_entry_to_counter, |
1087 | counters, | 1100 | counters, |
@@ -1098,6 +1111,7 @@ copy_entries_to_user(unsigned int total_size, | |||
1098 | struct ip6t_entry *e; | 1111 | struct ip6t_entry *e; |
1099 | struct ip6t_counters *counters; | 1112 | struct ip6t_counters *counters; |
1100 | int ret = 0; | 1113 | int ret = 0; |
1114 | void *loc_cpu_entry; | ||
1101 | 1115 | ||
1102 | /* We need atomic snapshot of counters: rest doesn't change | 1116 | /* We need atomic snapshot of counters: rest doesn't change |
1103 | (other than comefrom, which userspace doesn't care | 1117 | (other than comefrom, which userspace doesn't care |
@@ -1109,13 +1123,13 @@ copy_entries_to_user(unsigned int total_size, | |||
1109 | return -ENOMEM; | 1123 | return -ENOMEM; |
1110 | 1124 | ||
1111 | /* First, sum counters... */ | 1125 | /* First, sum counters... */ |
1112 | memset(counters, 0, countersize); | ||
1113 | write_lock_bh(&table->lock); | 1126 | write_lock_bh(&table->lock); |
1114 | get_counters(table->private, counters); | 1127 | get_counters(table->private, counters); |
1115 | write_unlock_bh(&table->lock); | 1128 | write_unlock_bh(&table->lock); |
1116 | 1129 | ||
1117 | /* ... then copy entire thing from CPU 0... */ | 1130 | /* choose the copy that is on ourc node/cpu */ |
1118 | if (copy_to_user(userptr, table->private->entries, total_size) != 0) { | 1131 | loc_cpu_entry = table->private->entries[raw_smp_processor_id()]; |
1132 | if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { | ||
1119 | ret = -EFAULT; | 1133 | ret = -EFAULT; |
1120 | goto free_counters; | 1134 | goto free_counters; |
1121 | } | 1135 | } |
@@ -1127,7 +1141,7 @@ copy_entries_to_user(unsigned int total_size, | |||
1127 | struct ip6t_entry_match *m; | 1141 | struct ip6t_entry_match *m; |
1128 | struct ip6t_entry_target *t; | 1142 | struct ip6t_entry_target *t; |
1129 | 1143 | ||
1130 | e = (struct ip6t_entry *)(table->private->entries + off); | 1144 | e = (struct ip6t_entry *)(loc_cpu_entry + off); |
1131 | if (copy_to_user(userptr + off | 1145 | if (copy_to_user(userptr + off |
1132 | + offsetof(struct ip6t_entry, counters), | 1146 | + offsetof(struct ip6t_entry, counters), |
1133 | &counters[num], | 1147 | &counters[num], |
@@ -1196,6 +1210,46 @@ get_entries(const struct ip6t_get_entries *entries, | |||
1196 | return ret; | 1210 | return ret; |
1197 | } | 1211 | } |
1198 | 1212 | ||
1213 | static void free_table_info(struct ip6t_table_info *info) | ||
1214 | { | ||
1215 | int cpu; | ||
1216 | for_each_cpu(cpu) { | ||
1217 | if (info->size <= PAGE_SIZE) | ||
1218 | kfree(info->entries[cpu]); | ||
1219 | else | ||
1220 | vfree(info->entries[cpu]); | ||
1221 | } | ||
1222 | kfree(info); | ||
1223 | } | ||
1224 | |||
1225 | static struct ip6t_table_info *alloc_table_info(unsigned int size) | ||
1226 | { | ||
1227 | struct ip6t_table_info *newinfo; | ||
1228 | int cpu; | ||
1229 | |||
1230 | newinfo = kzalloc(sizeof(struct ip6t_table_info), GFP_KERNEL); | ||
1231 | if (!newinfo) | ||
1232 | return NULL; | ||
1233 | |||
1234 | newinfo->size = size; | ||
1235 | |||
1236 | for_each_cpu(cpu) { | ||
1237 | if (size <= PAGE_SIZE) | ||
1238 | newinfo->entries[cpu] = kmalloc_node(size, | ||
1239 | GFP_KERNEL, | ||
1240 | cpu_to_node(cpu)); | ||
1241 | else | ||
1242 | newinfo->entries[cpu] = vmalloc_node(size, | ||
1243 | cpu_to_node(cpu)); | ||
1244 | if (newinfo->entries[cpu] == NULL) { | ||
1245 | free_table_info(newinfo); | ||
1246 | return NULL; | ||
1247 | } | ||
1248 | } | ||
1249 | |||
1250 | return newinfo; | ||
1251 | } | ||
1252 | |||
1199 | static int | 1253 | static int |
1200 | do_replace(void __user *user, unsigned int len) | 1254 | do_replace(void __user *user, unsigned int len) |
1201 | { | 1255 | { |
@@ -1204,6 +1258,7 @@ do_replace(void __user *user, unsigned int len) | |||
1204 | struct ip6t_table *t; | 1258 | struct ip6t_table *t; |
1205 | struct ip6t_table_info *newinfo, *oldinfo; | 1259 | struct ip6t_table_info *newinfo, *oldinfo; |
1206 | struct ip6t_counters *counters; | 1260 | struct ip6t_counters *counters; |
1261 | void *loc_cpu_entry, *loc_cpu_old_entry; | ||
1207 | 1262 | ||
1208 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) | 1263 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) |
1209 | return -EFAULT; | 1264 | return -EFAULT; |
@@ -1212,13 +1267,13 @@ do_replace(void __user *user, unsigned int len) | |||
1212 | if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) | 1267 | if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) |
1213 | return -ENOMEM; | 1268 | return -ENOMEM; |
1214 | 1269 | ||
1215 | newinfo = vmalloc(sizeof(struct ip6t_table_info) | 1270 | newinfo = alloc_table_info(tmp.size); |
1216 | + SMP_ALIGN(tmp.size) * | ||
1217 | (highest_possible_processor_id()+1)); | ||
1218 | if (!newinfo) | 1271 | if (!newinfo) |
1219 | return -ENOMEM; | 1272 | return -ENOMEM; |
1220 | 1273 | ||
1221 | if (copy_from_user(newinfo->entries, user + sizeof(tmp), | 1274 | /* choose the copy that is on our node/cpu */ |
1275 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; | ||
1276 | if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), | ||
1222 | tmp.size) != 0) { | 1277 | tmp.size) != 0) { |
1223 | ret = -EFAULT; | 1278 | ret = -EFAULT; |
1224 | goto free_newinfo; | 1279 | goto free_newinfo; |
@@ -1229,10 +1284,9 @@ do_replace(void __user *user, unsigned int len) | |||
1229 | ret = -ENOMEM; | 1284 | ret = -ENOMEM; |
1230 | goto free_newinfo; | 1285 | goto free_newinfo; |
1231 | } | 1286 | } |
1232 | memset(counters, 0, tmp.num_counters * sizeof(struct ip6t_counters)); | ||
1233 | 1287 | ||
1234 | ret = translate_table(tmp.name, tmp.valid_hooks, | 1288 | ret = translate_table(tmp.name, tmp.valid_hooks, |
1235 | newinfo, tmp.size, tmp.num_entries, | 1289 | newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, |
1236 | tmp.hook_entry, tmp.underflow); | 1290 | tmp.hook_entry, tmp.underflow); |
1237 | if (ret != 0) | 1291 | if (ret != 0) |
1238 | goto free_newinfo_counters; | 1292 | goto free_newinfo_counters; |
@@ -1271,8 +1325,9 @@ do_replace(void __user *user, unsigned int len) | |||
1271 | /* Get the old counters. */ | 1325 | /* Get the old counters. */ |
1272 | get_counters(oldinfo, counters); | 1326 | get_counters(oldinfo, counters); |
1273 | /* Decrease module usage counts and free resource */ | 1327 | /* Decrease module usage counts and free resource */ |
1274 | IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); | 1328 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; |
1275 | vfree(oldinfo); | 1329 | IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL); |
1330 | free_table_info(oldinfo); | ||
1276 | if (copy_to_user(tmp.counters, counters, | 1331 | if (copy_to_user(tmp.counters, counters, |
1277 | sizeof(struct ip6t_counters) * tmp.num_counters) != 0) | 1332 | sizeof(struct ip6t_counters) * tmp.num_counters) != 0) |
1278 | ret = -EFAULT; | 1333 | ret = -EFAULT; |
@@ -1284,11 +1339,11 @@ do_replace(void __user *user, unsigned int len) | |||
1284 | module_put(t->me); | 1339 | module_put(t->me); |
1285 | up(&ip6t_mutex); | 1340 | up(&ip6t_mutex); |
1286 | free_newinfo_counters_untrans: | 1341 | free_newinfo_counters_untrans: |
1287 | IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL); | 1342 | IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL); |
1288 | free_newinfo_counters: | 1343 | free_newinfo_counters: |
1289 | vfree(counters); | 1344 | vfree(counters); |
1290 | free_newinfo: | 1345 | free_newinfo: |
1291 | vfree(newinfo); | 1346 | free_table_info(newinfo); |
1292 | return ret; | 1347 | return ret; |
1293 | } | 1348 | } |
1294 | 1349 | ||
@@ -1321,6 +1376,7 @@ do_add_counters(void __user *user, unsigned int len) | |||
1321 | struct ip6t_counters_info tmp, *paddc; | 1376 | struct ip6t_counters_info tmp, *paddc; |
1322 | struct ip6t_table *t; | 1377 | struct ip6t_table *t; |
1323 | int ret = 0; | 1378 | int ret = 0; |
1379 | void *loc_cpu_entry; | ||
1324 | 1380 | ||
1325 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) | 1381 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) |
1326 | return -EFAULT; | 1382 | return -EFAULT; |
@@ -1350,7 +1406,9 @@ do_add_counters(void __user *user, unsigned int len) | |||
1350 | } | 1406 | } |
1351 | 1407 | ||
1352 | i = 0; | 1408 | i = 0; |
1353 | IP6T_ENTRY_ITERATE(t->private->entries, | 1409 | /* Choose the copy that is on our node */ |
1410 | loc_cpu_entry = t->private->entries[smp_processor_id()]; | ||
1411 | IP6T_ENTRY_ITERATE(loc_cpu_entry, | ||
1354 | t->private->size, | 1412 | t->private->size, |
1355 | add_counter_to_entry, | 1413 | add_counter_to_entry, |
1356 | paddc->counters, | 1414 | paddc->counters, |
@@ -1543,28 +1601,29 @@ int ip6t_register_table(struct ip6t_table *table, | |||
1543 | struct ip6t_table_info *newinfo; | 1601 | struct ip6t_table_info *newinfo; |
1544 | static struct ip6t_table_info bootstrap | 1602 | static struct ip6t_table_info bootstrap |
1545 | = { 0, 0, 0, { 0 }, { 0 }, { } }; | 1603 | = { 0, 0, 0, { 0 }, { 0 }, { } }; |
1604 | void *loc_cpu_entry; | ||
1546 | 1605 | ||
1547 | newinfo = vmalloc(sizeof(struct ip6t_table_info) | 1606 | newinfo = alloc_table_info(repl->size); |
1548 | + SMP_ALIGN(repl->size) * | ||
1549 | (highest_possible_processor_id()+1)); | ||
1550 | if (!newinfo) | 1607 | if (!newinfo) |
1551 | return -ENOMEM; | 1608 | return -ENOMEM; |
1552 | 1609 | ||
1553 | memcpy(newinfo->entries, repl->entries, repl->size); | 1610 | /* choose the copy on our node/cpu */ |
1611 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; | ||
1612 | memcpy(loc_cpu_entry, repl->entries, repl->size); | ||
1554 | 1613 | ||
1555 | ret = translate_table(table->name, table->valid_hooks, | 1614 | ret = translate_table(table->name, table->valid_hooks, |
1556 | newinfo, repl->size, | 1615 | newinfo, loc_cpu_entry, repl->size, |
1557 | repl->num_entries, | 1616 | repl->num_entries, |
1558 | repl->hook_entry, | 1617 | repl->hook_entry, |
1559 | repl->underflow); | 1618 | repl->underflow); |
1560 | if (ret != 0) { | 1619 | if (ret != 0) { |
1561 | vfree(newinfo); | 1620 | free_table_info(newinfo); |
1562 | return ret; | 1621 | return ret; |
1563 | } | 1622 | } |
1564 | 1623 | ||
1565 | ret = down_interruptible(&ip6t_mutex); | 1624 | ret = down_interruptible(&ip6t_mutex); |
1566 | if (ret != 0) { | 1625 | if (ret != 0) { |
1567 | vfree(newinfo); | 1626 | free_table_info(newinfo); |
1568 | return ret; | 1627 | return ret; |
1569 | } | 1628 | } |
1570 | 1629 | ||
@@ -1593,20 +1652,23 @@ int ip6t_register_table(struct ip6t_table *table, | |||
1593 | return ret; | 1652 | return ret; |
1594 | 1653 | ||
1595 | free_unlock: | 1654 | free_unlock: |
1596 | vfree(newinfo); | 1655 | free_table_info(newinfo); |
1597 | goto unlock; | 1656 | goto unlock; |
1598 | } | 1657 | } |
1599 | 1658 | ||
1600 | void ip6t_unregister_table(struct ip6t_table *table) | 1659 | void ip6t_unregister_table(struct ip6t_table *table) |
1601 | { | 1660 | { |
1661 | void *loc_cpu_entry; | ||
1662 | |||
1602 | down(&ip6t_mutex); | 1663 | down(&ip6t_mutex); |
1603 | LIST_DELETE(&ip6t_tables, table); | 1664 | LIST_DELETE(&ip6t_tables, table); |
1604 | up(&ip6t_mutex); | 1665 | up(&ip6t_mutex); |
1605 | 1666 | ||
1606 | /* Decrease module usage counts and free resources */ | 1667 | /* Decrease module usage counts and free resources */ |
1607 | IP6T_ENTRY_ITERATE(table->private->entries, table->private->size, | 1668 | loc_cpu_entry = table->private->entries[raw_smp_processor_id()]; |
1669 | IP6T_ENTRY_ITERATE(loc_cpu_entry, table->private->size, | ||
1608 | cleanup_entry, NULL); | 1670 | cleanup_entry, NULL); |
1609 | vfree(table->private); | 1671 | free_table_info(table->private); |
1610 | } | 1672 | } |
1611 | 1673 | ||
1612 | /* Returns 1 if the port is matched by the range, 0 otherwise */ | 1674 | /* Returns 1 if the port is matched by the range, 0 otherwise */ |