aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c457
-rw-r--r--net/ipv6/af_inet6.c8
-rw-r--r--net/ipv6/icmp.c30
-rw-r--r--net/ipv6/ip6_fib.c54
-rw-r--r--net/ipv6/ip6_input.c5
-rw-r--r--net/ipv6/ip6_output.c92
-rw-r--r--net/ipv6/ip6_tunnel.c7
-rw-r--r--net/ipv6/ipcomp6.c3
-rw-r--r--net/ipv6/ipv6_sockglue.c5
-rw-r--r--net/ipv6/ipv6_syms.c2
-rw-r--r--net/ipv6/mcast.c25
-rw-r--r--net/ipv6/netfilter/Kconfig36
-rw-r--r--net/ipv6/netfilter/Makefile6
-rw-r--r--net/ipv6/netfilter/ip6_tables.c298
-rw-r--r--net/ipv6/netfilter/ip6t_MARK.c8
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c556
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c272
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c897
-rw-r--r--net/ipv6/proc.c4
-rw-r--r--net/ipv6/raw.c46
-rw-r--r--net/ipv6/reassembly.c41
-rw-r--r--net/ipv6/route.c10
-rw-r--r--net/ipv6/tcp_ipv6.c35
-rw-r--r--net/ipv6/udp.c25
24 files changed, 2430 insertions, 492 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a970b4727ce8..a16064ba0caf 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -35,6 +35,9 @@
35 * YOSHIFUJI Hideaki @USAGI : ARCnet support 35 * YOSHIFUJI Hideaki @USAGI : ARCnet support
36 * YOSHIFUJI Hideaki @USAGI : convert /proc/net/if_inet6 to 36 * YOSHIFUJI Hideaki @USAGI : convert /proc/net/if_inet6 to
37 * seq_file. 37 * seq_file.
38 * YOSHIFUJI Hideaki @USAGI : improved source address
39 * selection; consider scope,
40 * status etc.
38 */ 41 */
39 42
40#include <linux/config.h> 43#include <linux/config.h>
@@ -75,7 +78,7 @@
75#ifdef CONFIG_IPV6_PRIVACY 78#ifdef CONFIG_IPV6_PRIVACY
76#include <linux/random.h> 79#include <linux/random.h>
77#include <linux/crypto.h> 80#include <linux/crypto.h>
78#include <asm/scatterlist.h> 81#include <linux/scatterlist.h>
79#endif 82#endif
80 83
81#include <asm/uaccess.h> 84#include <asm/uaccess.h>
@@ -193,46 +196,51 @@ const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
193#endif 196#endif
194const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; 197const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
195 198
196int ipv6_addr_type(const struct in6_addr *addr) 199#define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16)
200
201static inline unsigned ipv6_addr_scope2type(unsigned scope)
202{
203 switch(scope) {
204 case IPV6_ADDR_SCOPE_NODELOCAL:
205 return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_NODELOCAL) |
206 IPV6_ADDR_LOOPBACK);
207 case IPV6_ADDR_SCOPE_LINKLOCAL:
208 return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL) |
209 IPV6_ADDR_LINKLOCAL);
210 case IPV6_ADDR_SCOPE_SITELOCAL:
211 return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_SITELOCAL) |
212 IPV6_ADDR_SITELOCAL);
213 }
214 return IPV6_ADDR_SCOPE_TYPE(scope);
215}
216
217int __ipv6_addr_type(const struct in6_addr *addr)
197{ 218{
198 int type;
199 u32 st; 219 u32 st;
200 220
201 st = addr->s6_addr32[0]; 221 st = addr->s6_addr32[0];
202 222
203 if ((st & htonl(0xFF000000)) == htonl(0xFF000000)) {
204 type = IPV6_ADDR_MULTICAST;
205
206 switch((st & htonl(0x00FF0000))) {
207 case __constant_htonl(0x00010000):
208 type |= IPV6_ADDR_LOOPBACK;
209 break;
210
211 case __constant_htonl(0x00020000):
212 type |= IPV6_ADDR_LINKLOCAL;
213 break;
214
215 case __constant_htonl(0x00050000):
216 type |= IPV6_ADDR_SITELOCAL;
217 break;
218 };
219 return type;
220 }
221
222 type = IPV6_ADDR_UNICAST;
223
224 /* Consider all addresses with the first three bits different of 223 /* Consider all addresses with the first three bits different of
225 000 and 111 as finished. 224 000 and 111 as unicasts.
226 */ 225 */
227 if ((st & htonl(0xE0000000)) != htonl(0x00000000) && 226 if ((st & htonl(0xE0000000)) != htonl(0x00000000) &&
228 (st & htonl(0xE0000000)) != htonl(0xE0000000)) 227 (st & htonl(0xE0000000)) != htonl(0xE0000000))
229 return type; 228 return (IPV6_ADDR_UNICAST |
230 229 IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL));
231 if ((st & htonl(0xFFC00000)) == htonl(0xFE800000))
232 return (IPV6_ADDR_LINKLOCAL | type);
233 230
231 if ((st & htonl(0xFF000000)) == htonl(0xFF000000)) {
232 /* multicast */
233 /* addr-select 3.1 */
234 return (IPV6_ADDR_MULTICAST |
235 ipv6_addr_scope2type(IPV6_ADDR_MC_SCOPE(addr)));
236 }
237
238 if ((st & htonl(0xFFC00000)) == htonl(0xFE800000))
239 return (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST |
240 IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL)); /* addr-select 3.1 */
234 if ((st & htonl(0xFFC00000)) == htonl(0xFEC00000)) 241 if ((st & htonl(0xFFC00000)) == htonl(0xFEC00000))
235 return (IPV6_ADDR_SITELOCAL | type); 242 return (IPV6_ADDR_SITELOCAL | IPV6_ADDR_UNICAST |
243 IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_SITELOCAL)); /* addr-select 3.1 */
236 244
237 if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) { 245 if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) {
238 if (addr->s6_addr32[2] == 0) { 246 if (addr->s6_addr32[2] == 0) {
@@ -240,24 +248,20 @@ int ipv6_addr_type(const struct in6_addr *addr)
240 return IPV6_ADDR_ANY; 248 return IPV6_ADDR_ANY;
241 249
242 if (addr->s6_addr32[3] == htonl(0x00000001)) 250 if (addr->s6_addr32[3] == htonl(0x00000001))
243 return (IPV6_ADDR_LOOPBACK | type); 251 return (IPV6_ADDR_LOOPBACK | IPV6_ADDR_UNICAST |
252 IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL)); /* addr-select 3.4 */
244 253
245 return (IPV6_ADDR_COMPATv4 | type); 254 return (IPV6_ADDR_COMPATv4 | IPV6_ADDR_UNICAST |
255 IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* addr-select 3.3 */
246 } 256 }
247 257
248 if (addr->s6_addr32[2] == htonl(0x0000ffff)) 258 if (addr->s6_addr32[2] == htonl(0x0000ffff))
249 return IPV6_ADDR_MAPPED; 259 return (IPV6_ADDR_MAPPED |
260 IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* addr-select 3.3 */
250 } 261 }
251 262
252 st &= htonl(0xFF000000); 263 return (IPV6_ADDR_RESERVED |
253 if (st == 0) 264 IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* addr-select 3.4 */
254 return IPV6_ADDR_RESERVED;
255 st &= htonl(0xFE000000);
256 if (st == htonl(0x02000000))
257 return IPV6_ADDR_RESERVED; /* for NSAP */
258 if (st == htonl(0x04000000))
259 return IPV6_ADDR_RESERVED; /* for IPX */
260 return type;
261} 265}
262 266
263static void addrconf_del_timer(struct inet6_ifaddr *ifp) 267static void addrconf_del_timer(struct inet6_ifaddr *ifp)
@@ -805,138 +809,276 @@ out:
805#endif 809#endif
806 810
807/* 811/*
808 * Choose an appropriate source address 812 * Choose an appropriate source address (RFC3484)
809 * should do:
810 * i) get an address with an appropriate scope
811 * ii) see if there is a specific route for the destination and use
812 * an address of the attached interface
813 * iii) don't use deprecated addresses
814 */ 813 */
815static int inline ipv6_saddr_pref(const struct inet6_ifaddr *ifp, u8 invpref) 814struct ipv6_saddr_score {
815 int addr_type;
816 unsigned int attrs;
817 int matchlen;
818 unsigned int scope;
819 unsigned int rule;
820};
821
822#define IPV6_SADDR_SCORE_LOCAL 0x0001
823#define IPV6_SADDR_SCORE_PREFERRED 0x0004
824#define IPV6_SADDR_SCORE_HOA 0x0008
825#define IPV6_SADDR_SCORE_OIF 0x0010
826#define IPV6_SADDR_SCORE_LABEL 0x0020
827#define IPV6_SADDR_SCORE_PRIVACY 0x0040
828
829static int inline ipv6_saddr_preferred(int type)
816{ 830{
817 int pref; 831 if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4|
818 pref = ifp->flags&IFA_F_DEPRECATED ? 0 : 2; 832 IPV6_ADDR_LOOPBACK|IPV6_ADDR_RESERVED))
819#ifdef CONFIG_IPV6_PRIVACY 833 return 1;
820 pref |= (ifp->flags^invpref)&IFA_F_TEMPORARY ? 0 : 1; 834 return 0;
821#endif
822 return pref;
823} 835}
824 836
825#ifdef CONFIG_IPV6_PRIVACY 837/* static matching label */
826#define IPV6_GET_SADDR_MAXSCORE(score) ((score) == 3) 838static int inline ipv6_saddr_label(const struct in6_addr *addr, int type)
827#else 839{
828#define IPV6_GET_SADDR_MAXSCORE(score) (score) 840 /*
829#endif 841 * prefix (longest match) label
842 * -----------------------------
843 * ::1/128 0
844 * ::/0 1
845 * 2002::/16 2
846 * ::/96 3
847 * ::ffff:0:0/96 4
848 */
849 if (type & IPV6_ADDR_LOOPBACK)
850 return 0;
851 else if (type & IPV6_ADDR_COMPATv4)
852 return 3;
853 else if (type & IPV6_ADDR_MAPPED)
854 return 4;
855 else if (addr->s6_addr16[0] == htons(0x2002))
856 return 2;
857 return 1;
858}
830 859
831int ipv6_dev_get_saddr(struct net_device *dev, 860int ipv6_dev_get_saddr(struct net_device *daddr_dev,
832 struct in6_addr *daddr, struct in6_addr *saddr) 861 struct in6_addr *daddr, struct in6_addr *saddr)
833{ 862{
834 struct inet6_ifaddr *ifp = NULL; 863 struct ipv6_saddr_score hiscore;
835 struct inet6_ifaddr *match = NULL; 864 struct inet6_ifaddr *ifa_result = NULL;
836 struct inet6_dev *idev; 865 int daddr_type = __ipv6_addr_type(daddr);
837 int scope; 866 int daddr_scope = __ipv6_addr_src_scope(daddr_type);
838 int err; 867 u32 daddr_label = ipv6_saddr_label(daddr, daddr_type);
839 int hiscore = -1, score; 868 struct net_device *dev;
840 869
841 scope = ipv6_addr_scope(daddr); 870 memset(&hiscore, 0, sizeof(hiscore));
842 871
843 /* 872 read_lock(&dev_base_lock);
844 * known dev 873 read_lock(&addrconf_lock);
845 * search dev and walk through dev addresses
846 */
847 874
848 if (dev) { 875 for (dev = dev_base; dev; dev=dev->next) {
849 if (dev->flags & IFF_LOOPBACK) 876 struct inet6_dev *idev;
850 scope = IFA_HOST; 877 struct inet6_ifaddr *ifa;
878
879 /* Rule 0: Candidate Source Address (section 4)
880 * - multicast and link-local destination address,
881 * the set of candidate source address MUST only
882 * include addresses assigned to interfaces
883 * belonging to the same link as the outgoing
884 * interface.
885 * (- For site-local destination addresses, the
886 * set of candidate source addresses MUST only
887 * include addresses assigned to interfaces
888 * belonging to the same site as the outgoing
889 * interface.)
890 */
891 if ((daddr_type & IPV6_ADDR_MULTICAST ||
892 daddr_scope <= IPV6_ADDR_SCOPE_LINKLOCAL) &&
893 daddr_dev && dev != daddr_dev)
894 continue;
851 895
852 read_lock(&addrconf_lock);
853 idev = __in6_dev_get(dev); 896 idev = __in6_dev_get(dev);
854 if (idev) { 897 if (!idev)
855 read_lock_bh(&idev->lock); 898 continue;
856 for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
857 if (ifp->scope == scope) {
858 if (ifp->flags&IFA_F_TENTATIVE)
859 continue;
860#ifdef CONFIG_IPV6_PRIVACY
861 score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0);
862#else
863 score = ipv6_saddr_pref(ifp, 0);
864#endif
865 if (score <= hiscore)
866 continue;
867 899
868 if (match) 900 read_lock_bh(&idev->lock);
869 in6_ifa_put(match); 901 for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) {
870 match = ifp; 902 struct ipv6_saddr_score score;
871 hiscore = score;
872 in6_ifa_hold(ifp);
873 903
874 if (IPV6_GET_SADDR_MAXSCORE(score)) { 904 score.addr_type = __ipv6_addr_type(&ifa->addr);
875 read_unlock_bh(&idev->lock); 905
876 read_unlock(&addrconf_lock); 906 /* Rule 0: Candidate Source Address (section 4)
877 goto out; 907 * - In any case, anycast addresses, multicast
878 } 908 * addresses, and the unspecified address MUST
909 * NOT be included in a candidate set.
910 */
911 if (unlikely(score.addr_type == IPV6_ADDR_ANY ||
912 score.addr_type & IPV6_ADDR_MULTICAST)) {
913 LIMIT_NETDEBUG(KERN_DEBUG
914 "ADDRCONF: unspecified / multicast address"
915 "assigned as unicast address on %s",
916 dev->name);
917 continue;
918 }
919
920 score.attrs = 0;
921 score.matchlen = 0;
922 score.scope = 0;
923 score.rule = 0;
924
925 if (ifa_result == NULL) {
926 /* record it if the first available entry */
927 goto record_it;
928 }
929
930 /* Rule 1: Prefer same address */
931 if (hiscore.rule < 1) {
932 if (ipv6_addr_equal(&ifa_result->addr, daddr))
933 hiscore.attrs |= IPV6_SADDR_SCORE_LOCAL;
934 hiscore.rule++;
935 }
936 if (ipv6_addr_equal(&ifa->addr, daddr)) {
937 score.attrs |= IPV6_SADDR_SCORE_LOCAL;
938 if (!(hiscore.attrs & IPV6_SADDR_SCORE_LOCAL)) {
939 score.rule = 1;
940 goto record_it;
879 } 941 }
942 } else {
943 if (hiscore.attrs & IPV6_SADDR_SCORE_LOCAL)
944 continue;
880 } 945 }
881 read_unlock_bh(&idev->lock);
882 }
883 read_unlock(&addrconf_lock);
884 }
885 946
886 if (scope == IFA_LINK) 947 /* Rule 2: Prefer appropriate scope */
887 goto out; 948 if (hiscore.rule < 2) {
949 hiscore.scope = __ipv6_addr_src_scope(hiscore.addr_type);
950 hiscore.rule++;
951 }
952 score.scope = __ipv6_addr_src_scope(score.addr_type);
953 if (hiscore.scope < score.scope) {
954 if (hiscore.scope < daddr_scope) {
955 score.rule = 2;
956 goto record_it;
957 } else
958 continue;
959 } else if (score.scope < hiscore.scope) {
960 if (score.scope < daddr_scope)
961 continue;
962 else {
963 score.rule = 2;
964 goto record_it;
965 }
966 }
888 967
889 /* 968 /* Rule 3: Avoid deprecated address */
890 * dev == NULL or search failed for specified dev 969 if (hiscore.rule < 3) {
891 */ 970 if (ipv6_saddr_preferred(hiscore.addr_type) ||
971 !(ifa_result->flags & IFA_F_DEPRECATED))
972 hiscore.attrs |= IPV6_SADDR_SCORE_PREFERRED;
973 hiscore.rule++;
974 }
975 if (ipv6_saddr_preferred(score.addr_type) ||
976 !(ifa->flags & IFA_F_DEPRECATED)) {
977 score.attrs |= IPV6_SADDR_SCORE_PREFERRED;
978 if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) {
979 score.rule = 3;
980 goto record_it;
981 }
982 } else {
983 if (hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)
984 continue;
985 }
892 986
893 read_lock(&dev_base_lock); 987 /* Rule 4: Prefer home address -- not implemented yet */
894 read_lock(&addrconf_lock);
895 for (dev = dev_base; dev; dev=dev->next) {
896 idev = __in6_dev_get(dev);
897 if (idev) {
898 read_lock_bh(&idev->lock);
899 for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
900 if (ifp->scope == scope) {
901 if (ifp->flags&IFA_F_TENTATIVE)
902 continue;
903#ifdef CONFIG_IPV6_PRIVACY
904 score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0);
905#else
906 score = ipv6_saddr_pref(ifp, 0);
907#endif
908 if (score <= hiscore)
909 continue;
910 988
911 if (match) 989 /* Rule 5: Prefer outgoing interface */
912 in6_ifa_put(match); 990 if (hiscore.rule < 5) {
913 match = ifp; 991 if (daddr_dev == NULL ||
914 hiscore = score; 992 daddr_dev == ifa_result->idev->dev)
915 in6_ifa_hold(ifp); 993 hiscore.attrs |= IPV6_SADDR_SCORE_OIF;
994 hiscore.rule++;
995 }
996 if (daddr_dev == NULL ||
997 daddr_dev == ifa->idev->dev) {
998 score.attrs |= IPV6_SADDR_SCORE_OIF;
999 if (!(hiscore.attrs & IPV6_SADDR_SCORE_OIF)) {
1000 score.rule = 5;
1001 goto record_it;
1002 }
1003 } else {
1004 if (hiscore.attrs & IPV6_SADDR_SCORE_OIF)
1005 continue;
1006 }
916 1007
917 if (IPV6_GET_SADDR_MAXSCORE(score)) { 1008 /* Rule 6: Prefer matching label */
918 read_unlock_bh(&idev->lock); 1009 if (hiscore.rule < 6) {
919 goto out_unlock_base; 1010 if (ipv6_saddr_label(&ifa_result->addr, hiscore.addr_type) == daddr_label)
920 } 1011 hiscore.attrs |= IPV6_SADDR_SCORE_LABEL;
1012 hiscore.rule++;
1013 }
1014 if (ipv6_saddr_label(&ifa->addr, score.addr_type) == daddr_label) {
1015 score.attrs |= IPV6_SADDR_SCORE_LABEL;
1016 if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) {
1017 score.rule = 6;
1018 goto record_it;
921 } 1019 }
1020 } else {
1021 if (hiscore.attrs & IPV6_SADDR_SCORE_LABEL)
1022 continue;
922 } 1023 }
923 read_unlock_bh(&idev->lock); 1024
1025#ifdef CONFIG_IPV6_PRIVACY
1026 /* Rule 7: Prefer public address
1027 * Note: prefer temprary address if use_tempaddr >= 2
1028 */
1029 if (hiscore.rule < 7) {
1030 if ((!(ifa_result->flags & IFA_F_TEMPORARY)) ^
1031 (ifa_result->idev->cnf.use_tempaddr >= 2))
1032 hiscore.attrs |= IPV6_SADDR_SCORE_PRIVACY;
1033 hiscore.rule++;
1034 }
1035 if ((!(ifa->flags & IFA_F_TEMPORARY)) ^
1036 (ifa->idev->cnf.use_tempaddr >= 2)) {
1037 score.attrs |= IPV6_SADDR_SCORE_PRIVACY;
1038 if (!(hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY)) {
1039 score.rule = 7;
1040 goto record_it;
1041 }
1042 } else {
1043 if (hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY)
1044 continue;
1045 }
1046#endif
1047 /* Rule 8: Use longest matching prefix */
1048 if (hiscore.rule < 8) {
1049 hiscore.matchlen = ipv6_addr_diff(&ifa_result->addr, daddr);
1050 hiscore.rule++;
1051 }
1052 score.matchlen = ipv6_addr_diff(&ifa->addr, daddr);
1053 if (score.matchlen > hiscore.matchlen) {
1054 score.rule = 8;
1055 goto record_it;
1056 }
1057#if 0
1058 else if (score.matchlen < hiscore.matchlen)
1059 continue;
1060#endif
1061
1062 /* Final Rule: choose first available one */
1063 continue;
1064record_it:
1065 if (ifa_result)
1066 in6_ifa_put(ifa_result);
1067 in6_ifa_hold(ifa);
1068 ifa_result = ifa;
1069 hiscore = score;
924 } 1070 }
1071 read_unlock_bh(&idev->lock);
925 } 1072 }
926
927out_unlock_base:
928 read_unlock(&addrconf_lock); 1073 read_unlock(&addrconf_lock);
929 read_unlock(&dev_base_lock); 1074 read_unlock(&dev_base_lock);
930 1075
931out: 1076 if (!ifa_result)
932 err = -EADDRNOTAVAIL; 1077 return -EADDRNOTAVAIL;
933 if (match) { 1078
934 ipv6_addr_copy(saddr, &match->addr); 1079 ipv6_addr_copy(saddr, &ifa_result->addr);
935 err = 0; 1080 in6_ifa_put(ifa_result);
936 in6_ifa_put(match); 1081 return 0;
937 }
938
939 return err;
940} 1082}
941 1083
942 1084
@@ -1217,12 +1359,8 @@ static int __ipv6_regen_rndid(struct inet6_dev *idev)
1217 struct net_device *dev; 1359 struct net_device *dev;
1218 struct scatterlist sg[2]; 1360 struct scatterlist sg[2];
1219 1361
1220 sg[0].page = virt_to_page(idev->entropy); 1362 sg_set_buf(&sg[0], idev->entropy, 8);
1221 sg[0].offset = offset_in_page(idev->entropy); 1363 sg_set_buf(&sg[1], idev->work_eui64, 8);
1222 sg[0].length = 8;
1223 sg[1].page = virt_to_page(idev->work_eui64);
1224 sg[1].offset = offset_in_page(idev->work_eui64);
1225 sg[1].length = 8;
1226 1364
1227 dev = idev->dev; 1365 dev = idev->dev;
1228 1366
@@ -2167,7 +2305,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2167 2305
2168 /* Step 5: netlink notification of this interface */ 2306 /* Step 5: netlink notification of this interface */
2169 idev->tstamp = jiffies; 2307 idev->tstamp = jiffies;
2170 inet6_ifinfo_notify(RTM_NEWLINK, idev); 2308 inet6_ifinfo_notify(RTM_DELLINK, idev);
2171 2309
2172 /* Shot the device (if unregistered) */ 2310 /* Shot the device (if unregistered) */
2173 2311
@@ -2489,7 +2627,7 @@ static void addrconf_verify(unsigned long foo)
2489 for (i=0; i < IN6_ADDR_HSIZE; i++) { 2627 for (i=0; i < IN6_ADDR_HSIZE; i++) {
2490 2628
2491restart: 2629restart:
2492 write_lock(&addrconf_hash_lock); 2630 read_lock(&addrconf_hash_lock);
2493 for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) { 2631 for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) {
2494 unsigned long age; 2632 unsigned long age;
2495#ifdef CONFIG_IPV6_PRIVACY 2633#ifdef CONFIG_IPV6_PRIVACY
@@ -2511,7 +2649,7 @@ restart:
2511 if (age >= ifp->valid_lft) { 2649 if (age >= ifp->valid_lft) {
2512 spin_unlock(&ifp->lock); 2650 spin_unlock(&ifp->lock);
2513 in6_ifa_hold(ifp); 2651 in6_ifa_hold(ifp);
2514 write_unlock(&addrconf_hash_lock); 2652 read_unlock(&addrconf_hash_lock);
2515 ipv6_del_addr(ifp); 2653 ipv6_del_addr(ifp);
2516 goto restart; 2654 goto restart;
2517 } else if (age >= ifp->prefered_lft) { 2655 } else if (age >= ifp->prefered_lft) {
@@ -2530,7 +2668,7 @@ restart:
2530 2668
2531 if (deprecate) { 2669 if (deprecate) {
2532 in6_ifa_hold(ifp); 2670 in6_ifa_hold(ifp);
2533 write_unlock(&addrconf_hash_lock); 2671 read_unlock(&addrconf_hash_lock);
2534 2672
2535 ipv6_ifa_notify(0, ifp); 2673 ipv6_ifa_notify(0, ifp);
2536 in6_ifa_put(ifp); 2674 in6_ifa_put(ifp);
@@ -2548,7 +2686,7 @@ restart:
2548 in6_ifa_hold(ifp); 2686 in6_ifa_hold(ifp);
2549 in6_ifa_hold(ifpub); 2687 in6_ifa_hold(ifpub);
2550 spin_unlock(&ifp->lock); 2688 spin_unlock(&ifp->lock);
2551 write_unlock(&addrconf_hash_lock); 2689 read_unlock(&addrconf_hash_lock);
2552 ipv6_create_tempaddr(ifpub, ifp); 2690 ipv6_create_tempaddr(ifpub, ifp);
2553 in6_ifa_put(ifpub); 2691 in6_ifa_put(ifpub);
2554 in6_ifa_put(ifp); 2692 in6_ifa_put(ifp);
@@ -2565,7 +2703,7 @@ restart:
2565 spin_unlock(&ifp->lock); 2703 spin_unlock(&ifp->lock);
2566 } 2704 }
2567 } 2705 }
2568 write_unlock(&addrconf_hash_lock); 2706 read_unlock(&addrconf_hash_lock);
2569 } 2707 }
2570 2708
2571 addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next; 2709 addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next;
@@ -2954,8 +3092,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
2954 3092
2955nlmsg_failure: 3093nlmsg_failure:
2956rtattr_failure: 3094rtattr_failure:
2957 if (array) 3095 kfree(array);
2958 kfree(array);
2959 skb_trim(skb, b - skb->data); 3096 skb_trim(skb, b - skb->data);
2960 return -1; 3097 return -1;
2961} 3098}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 4f8795af2edb..c63b8ce0e1b5 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -699,12 +699,14 @@ static int __init inet6_init(void)
699 /* Register the family here so that the init calls below will 699 /* Register the family here so that the init calls below will
700 * be able to create sockets. (?? is this dangerous ??) 700 * be able to create sockets. (?? is this dangerous ??)
701 */ 701 */
702 (void) sock_register(&inet6_family_ops); 702 err = sock_register(&inet6_family_ops);
703 if (err)
704 goto out_unregister_raw_proto;
703 705
704 /* Initialise ipv6 mibs */ 706 /* Initialise ipv6 mibs */
705 err = init_ipv6_mibs(); 707 err = init_ipv6_mibs();
706 if (err) 708 if (err)
707 goto out_unregister_raw_proto; 709 goto out_unregister_sock;
708 710
709 /* 711 /*
710 * ipngwg API draft makes clear that the correct semantics 712 * ipngwg API draft makes clear that the correct semantics
@@ -796,6 +798,8 @@ icmp_fail:
796 ipv6_sysctl_unregister(); 798 ipv6_sysctl_unregister();
797#endif 799#endif
798 cleanup_ipv6_mibs(); 800 cleanup_ipv6_mibs();
801out_unregister_sock:
802 sock_unregister(PF_INET6);
799out_unregister_raw_proto: 803out_unregister_raw_proto:
800 proto_unregister(&rawv6_prot); 804 proto_unregister(&rawv6_prot);
801out_unregister_udp_proto: 805out_unregister_udp_proto:
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index b7185fb3377c..1bdf0fb8bf8a 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -585,17 +585,16 @@ static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
585 daddr = &skb->nh.ipv6h->daddr; 585 daddr = &skb->nh.ipv6h->daddr;
586 586
587 /* Perform checksum. */ 587 /* Perform checksum. */
588 if (skb->ip_summed == CHECKSUM_HW) { 588 switch (skb->ip_summed) {
589 skb->ip_summed = CHECKSUM_UNNECESSARY; 589 case CHECKSUM_HW:
590 if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, 590 if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
591 skb->csum)) { 591 skb->csum))
592 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 hw checksum failed\n"); 592 break;
593 skb->ip_summed = CHECKSUM_NONE; 593 /* fall through */
594 } 594 case CHECKSUM_NONE:
595 } 595 skb->csum = ~csum_ipv6_magic(saddr, daddr, skb->len,
596 if (skb->ip_summed == CHECKSUM_NONE) { 596 IPPROTO_ICMPV6, 0);
597 if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, 597 if (__skb_checksum_complete(skb)) {
598 skb_checksum(skb, 0, skb->len, 0))) {
599 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n", 598 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
600 NIP6(*saddr), NIP6(*daddr)); 599 NIP6(*saddr), NIP6(*daddr));
601 goto discard_it; 600 goto discard_it;
@@ -700,10 +699,7 @@ int __init icmpv6_init(struct net_proto_family *ops)
700 struct sock *sk; 699 struct sock *sk;
701 int err, i, j; 700 int err, i, j;
702 701
703 for (i = 0; i < NR_CPUS; i++) { 702 for_each_cpu(i) {
704 if (!cpu_possible(i))
705 continue;
706
707 err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, 703 err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
708 &per_cpu(__icmpv6_socket, i)); 704 &per_cpu(__icmpv6_socket, i));
709 if (err < 0) { 705 if (err < 0) {
@@ -749,9 +745,7 @@ void icmpv6_cleanup(void)
749{ 745{
750 int i; 746 int i;
751 747
752 for (i = 0; i < NR_CPUS; i++) { 748 for_each_cpu(i) {
753 if (!cpu_possible(i))
754 continue;
755 sock_release(per_cpu(__icmpv6_socket, i)); 749 sock_release(per_cpu(__icmpv6_socket, i));
756 } 750 }
757 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); 751 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 4fcc5a7acf6e..1bf6d9a769e6 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -127,56 +127,6 @@ static __inline__ int addr_bit_set(void *token, int fn_bit)
127 return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5]; 127 return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5];
128} 128}
129 129
130/*
131 * find the first different bit between two addresses
132 * length of address must be a multiple of 32bits
133 */
134
135static __inline__ int addr_diff(void *token1, void *token2, int addrlen)
136{
137 __u32 *a1 = token1;
138 __u32 *a2 = token2;
139 int i;
140
141 addrlen >>= 2;
142
143 for (i = 0; i < addrlen; i++) {
144 __u32 xb;
145
146 xb = a1[i] ^ a2[i];
147
148 if (xb) {
149 int j = 31;
150
151 xb = ntohl(xb);
152
153 while ((xb & (1 << j)) == 0)
154 j--;
155
156 return (i * 32 + 31 - j);
157 }
158 }
159
160 /*
161 * we should *never* get to this point since that
162 * would mean the addrs are equal
163 *
164 * However, we do get to it 8) And exacly, when
165 * addresses are equal 8)
166 *
167 * ip route add 1111::/128 via ...
168 * ip route add 1111::/64 via ...
169 * and we are here.
170 *
171 * Ideally, this function should stop comparison
172 * at prefix length. It does not, but it is still OK,
173 * if returned value is greater than prefix length.
174 * --ANK (980803)
175 */
176
177 return addrlen<<5;
178}
179
180static __inline__ struct fib6_node * node_alloc(void) 130static __inline__ struct fib6_node * node_alloc(void)
181{ 131{
182 struct fib6_node *fn; 132 struct fib6_node *fn;
@@ -296,11 +246,11 @@ insert_above:
296 246
297 /* find 1st bit in difference between the 2 addrs. 247 /* find 1st bit in difference between the 2 addrs.
298 248
299 See comment in addr_diff: bit may be an invalid value, 249 See comment in __ipv6_addr_diff: bit may be an invalid value,
300 but if it is >= plen, the value is ignored in any case. 250 but if it is >= plen, the value is ignored in any case.
301 */ 251 */
302 252
303 bit = addr_diff(addr, &key->addr, addrlen); 253 bit = __ipv6_addr_diff(addr, &key->addr, addrlen);
304 254
305 /* 255 /*
306 * (intermediate)[in] 256 * (intermediate)[in]
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 6e3480426939..a6026d2787d2 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -176,6 +176,11 @@ resubmit:
176 if (ipprot->flags & INET6_PROTO_FINAL) { 176 if (ipprot->flags & INET6_PROTO_FINAL) {
177 struct ipv6hdr *hdr; 177 struct ipv6hdr *hdr;
178 178
179 /* Free reference early: we don't need it any more,
180 and it may hold ip_conntrack module loaded
181 indefinitely. */
182 nf_reset(skb);
183
179 skb_postpull_rcsum(skb, skb->nh.raw, 184 skb_postpull_rcsum(skb, skb->nh.raw,
180 skb->h.raw - skb->nh.raw); 185 skb->h.raw - skb->nh.raw);
181 hdr = skb->nh.ipv6h; 186 hdr = skb->nh.ipv6h;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 563b442ffab8..c1fa693511a1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -147,7 +147,8 @@ static int ip6_output2(struct sk_buff *skb)
147 147
148int ip6_output(struct sk_buff *skb) 148int ip6_output(struct sk_buff *skb)
149{ 149{
150 if (skb->len > dst_mtu(skb->dst) || dst_allfrag(skb->dst)) 150 if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->ufo_size) ||
151 dst_allfrag(skb->dst))
151 return ip6_fragment(skb, ip6_output2); 152 return ip6_fragment(skb, ip6_output2);
152 else 153 else
153 return ip6_output2(skb); 154 return ip6_output2(skb);
@@ -440,9 +441,15 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
440#ifdef CONFIG_NETFILTER 441#ifdef CONFIG_NETFILTER
441 to->nfmark = from->nfmark; 442 to->nfmark = from->nfmark;
442 /* Connection association is same as pre-frag packet */ 443 /* Connection association is same as pre-frag packet */
444 nf_conntrack_put(to->nfct);
443 to->nfct = from->nfct; 445 to->nfct = from->nfct;
444 nf_conntrack_get(to->nfct); 446 nf_conntrack_get(to->nfct);
445 to->nfctinfo = from->nfctinfo; 447 to->nfctinfo = from->nfctinfo;
448#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
449 nf_conntrack_put_reasm(to->nfct_reasm);
450 to->nfct_reasm = from->nfct_reasm;
451 nf_conntrack_get_reasm(to->nfct_reasm);
452#endif
446#ifdef CONFIG_BRIDGE_NETFILTER 453#ifdef CONFIG_BRIDGE_NETFILTER
447 nf_bridge_put(to->nf_bridge); 454 nf_bridge_put(to->nf_bridge);
448 to->nf_bridge = from->nf_bridge; 455 to->nf_bridge = from->nf_bridge;
@@ -586,8 +593,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
586 skb->next = NULL; 593 skb->next = NULL;
587 } 594 }
588 595
589 if (tmp_hdr) 596 kfree(tmp_hdr);
590 kfree(tmp_hdr);
591 597
592 if (err == 0) { 598 if (err == 0) {
593 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS); 599 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
@@ -768,6 +774,65 @@ out_err_release:
768 *dst = NULL; 774 *dst = NULL;
769 return err; 775 return err;
770} 776}
777inline int ip6_ufo_append_data(struct sock *sk,
778 int getfrag(void *from, char *to, int offset, int len,
779 int odd, struct sk_buff *skb),
780 void *from, int length, int hh_len, int fragheaderlen,
781 int transhdrlen, int mtu,unsigned int flags)
782
783{
784 struct sk_buff *skb;
785 int err;
786
787 /* There is support for UDP large send offload by network
788 * device, so create one single skb packet containing complete
789 * udp datagram
790 */
791 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
792 skb = sock_alloc_send_skb(sk,
793 hh_len + fragheaderlen + transhdrlen + 20,
794 (flags & MSG_DONTWAIT), &err);
795 if (skb == NULL)
796 return -ENOMEM;
797
798 /* reserve space for Hardware header */
799 skb_reserve(skb, hh_len);
800
801 /* create space for UDP/IP header */
802 skb_put(skb,fragheaderlen + transhdrlen);
803
804 /* initialize network header pointer */
805 skb->nh.raw = skb->data;
806
807 /* initialize protocol header pointer */
808 skb->h.raw = skb->data + fragheaderlen;
809
810 skb->ip_summed = CHECKSUM_HW;
811 skb->csum = 0;
812 sk->sk_sndmsg_off = 0;
813 }
814
815 err = skb_append_datato_frags(sk,skb, getfrag, from,
816 (length - transhdrlen));
817 if (!err) {
818 struct frag_hdr fhdr;
819
820 /* specify the length of each IP datagram fragment*/
821 skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen) -
822 sizeof(struct frag_hdr);
823 ipv6_select_ident(skb, &fhdr);
824 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
825 __skb_queue_tail(&sk->sk_write_queue, skb);
826
827 return 0;
828 }
829 /* There is not enough support do UPD LSO,
830 * so follow normal path
831 */
832 kfree_skb(skb);
833
834 return err;
835}
771 836
772int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, 837int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
773 int offset, int len, int odd, struct sk_buff *skb), 838 int offset, int len, int odd, struct sk_buff *skb),
@@ -860,6 +925,15 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
860 */ 925 */
861 926
862 inet->cork.length += length; 927 inet->cork.length += length;
928 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
929 (rt->u.dst.dev->features & NETIF_F_UFO)) {
930
931 if(ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
932 fragheaderlen, transhdrlen, mtu, flags))
933 goto error;
934
935 return 0;
936 }
863 937
864 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 938 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
865 goto alloc_new_skb; 939 goto alloc_new_skb;
@@ -1117,10 +1191,8 @@ int ip6_push_pending_frames(struct sock *sk)
1117 1191
1118out: 1192out:
1119 inet->cork.flags &= ~IPCORK_OPT; 1193 inet->cork.flags &= ~IPCORK_OPT;
1120 if (np->cork.opt) { 1194 kfree(np->cork.opt);
1121 kfree(np->cork.opt); 1195 np->cork.opt = NULL;
1122 np->cork.opt = NULL;
1123 }
1124 if (np->cork.rt) { 1196 if (np->cork.rt) {
1125 dst_release(&np->cork.rt->u.dst); 1197 dst_release(&np->cork.rt->u.dst);
1126 np->cork.rt = NULL; 1198 np->cork.rt = NULL;
@@ -1145,10 +1217,8 @@ void ip6_flush_pending_frames(struct sock *sk)
1145 1217
1146 inet->cork.flags &= ~IPCORK_OPT; 1218 inet->cork.flags &= ~IPCORK_OPT;
1147 1219
1148 if (np->cork.opt) { 1220 kfree(np->cork.opt);
1149 kfree(np->cork.opt); 1221 np->cork.opt = NULL;
1150 np->cork.opt = NULL;
1151 }
1152 if (np->cork.rt) { 1222 if (np->cork.rt) {
1153 dst_release(&np->cork.rt->u.dst); 1223 dst_release(&np->cork.rt->u.dst);
1154 np->cork.rt = NULL; 1224 np->cork.rt = NULL;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index cf94372d1af3..e315d0f80af1 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -525,6 +525,7 @@ ip6ip6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
525 525
526 if ((t = ip6ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) { 526 if ((t = ip6ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) {
527 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 527 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
528 read_unlock(&ip6ip6_lock);
528 kfree_skb(skb); 529 kfree_skb(skb);
529 return 0; 530 return 0;
530 } 531 }
@@ -756,8 +757,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
756 } 757 }
757 ip6_tnl_dst_store(t, dst); 758 ip6_tnl_dst_store(t, dst);
758 759
759 if (opt) 760 kfree(opt);
760 kfree(opt);
761 761
762 t->recursion--; 762 t->recursion--;
763 return 0; 763 return 0;
@@ -766,8 +766,7 @@ tx_err_link_failure:
766 dst_link_failure(skb); 766 dst_link_failure(skb);
767tx_err_dst_release: 767tx_err_dst_release:
768 dst_release(dst); 768 dst_release(dst);
769 if (opt) 769 kfree(opt);
770 kfree(opt);
771tx_err: 770tx_err:
772 stats->tx_errors++; 771 stats->tx_errors++;
773 stats->tx_dropped++; 772 stats->tx_dropped++;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 85bfbc69b2c3..55917fb17094 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -130,8 +130,7 @@ static int ipcomp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, s
130out_put_cpu: 130out_put_cpu:
131 put_cpu(); 131 put_cpu();
132out: 132out:
133 if (tmp_hdr) 133 kfree(tmp_hdr);
134 kfree(tmp_hdr);
135 if (err) 134 if (err)
136 goto error_out; 135 goto error_out;
137 return nexthdr; 136 return nexthdr;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 8567873d0dd8..25757ade989f 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -80,8 +80,7 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
80 if (ra->sk == sk) { 80 if (ra->sk == sk) {
81 if (sel>=0) { 81 if (sel>=0) {
82 write_unlock_bh(&ip6_ra_lock); 82 write_unlock_bh(&ip6_ra_lock);
83 if (new_ra) 83 kfree(new_ra);
84 kfree(new_ra);
85 return -EADDRINUSE; 84 return -EADDRINUSE;
86 } 85 }
87 86
@@ -288,7 +287,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
288 { 287 {
289 struct ipv6_txoptions *opt; 288 struct ipv6_txoptions *opt;
290 if (optlen == 0) 289 if (optlen == 0)
291 optval = 0; 290 optval = NULL;
292 291
293 /* hop-by-hop / destination options are privileged option */ 292 /* hop-by-hop / destination options are privileged option */
294 retv = -EPERM; 293 retv = -EPERM;
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c
index 37a4a99c9fe9..16482785bdfd 100644
--- a/net/ipv6/ipv6_syms.c
+++ b/net/ipv6/ipv6_syms.c
@@ -7,7 +7,7 @@
7#include <net/ip6_route.h> 7#include <net/ip6_route.h>
8#include <net/xfrm.h> 8#include <net/xfrm.h>
9 9
10EXPORT_SYMBOL(ipv6_addr_type); 10EXPORT_SYMBOL(__ipv6_addr_type);
11EXPORT_SYMBOL(icmpv6_send); 11EXPORT_SYMBOL(icmpv6_send);
12EXPORT_SYMBOL(icmpv6_statistics); 12EXPORT_SYMBOL(icmpv6_statistics);
13EXPORT_SYMBOL(icmpv6_err_convert); 13EXPORT_SYMBOL(icmpv6_err_convert);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 39a96c768102..f15e04ad026e 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -164,7 +164,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
164#define MLDV2_MASK(value, nb) ((nb)>=32 ? (value) : ((1<<(nb))-1) & (value)) 164#define MLDV2_MASK(value, nb) ((nb)>=32 ? (value) : ((1<<(nb))-1) & (value))
165#define MLDV2_EXP(thresh, nbmant, nbexp, value) \ 165#define MLDV2_EXP(thresh, nbmant, nbexp, value) \
166 ((value) < (thresh) ? (value) : \ 166 ((value) < (thresh) ? (value) : \
167 ((MLDV2_MASK(value, nbmant) | (1<<(nbmant+nbexp))) << \ 167 ((MLDV2_MASK(value, nbmant) | (1<<(nbmant))) << \
168 (MLDV2_MASK((value) >> (nbmant), nbexp) + (nbexp)))) 168 (MLDV2_MASK((value) >> (nbmant), nbexp) + (nbexp))))
169 169
170#define MLDV2_QQIC(value) MLDV2_EXP(0x80, 4, 3, value) 170#define MLDV2_QQIC(value) MLDV2_EXP(0x80, 4, 3, value)
@@ -545,8 +545,10 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
545 sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max)); 545 sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max));
546 goto done; 546 goto done;
547 } 547 }
548 } else 548 } else {
549 newpsl = NULL; 549 newpsl = NULL;
550 (void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0);
551 }
550 psl = pmc->sflist; 552 psl = pmc->sflist;
551 if (psl) { 553 if (psl) {
552 (void) ip6_mc_del_src(idev, group, pmc->sfmode, 554 (void) ip6_mc_del_src(idev, group, pmc->sfmode,
@@ -1087,7 +1089,7 @@ static void mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
1087 1089
1088int igmp6_event_query(struct sk_buff *skb) 1090int igmp6_event_query(struct sk_buff *skb)
1089{ 1091{
1090 struct mld2_query *mlh2 = (struct mld2_query *) skb->h.raw; 1092 struct mld2_query *mlh2 = NULL;
1091 struct ifmcaddr6 *ma; 1093 struct ifmcaddr6 *ma;
1092 struct in6_addr *group; 1094 struct in6_addr *group;
1093 unsigned long max_delay; 1095 unsigned long max_delay;
@@ -1140,6 +1142,13 @@ int igmp6_event_query(struct sk_buff *skb)
1140 /* clear deleted report items */ 1142 /* clear deleted report items */
1141 mld_clear_delrec(idev); 1143 mld_clear_delrec(idev);
1142 } else if (len >= 28) { 1144 } else if (len >= 28) {
1145 int srcs_offset = sizeof(struct mld2_query) -
1146 sizeof(struct icmp6hdr);
1147 if (!pskb_may_pull(skb, srcs_offset)) {
1148 in6_dev_put(idev);
1149 return -EINVAL;
1150 }
1151 mlh2 = (struct mld2_query *) skb->h.raw;
1143 max_delay = (MLDV2_MRC(ntohs(mlh2->mrc))*HZ)/1000; 1152 max_delay = (MLDV2_MRC(ntohs(mlh2->mrc))*HZ)/1000;
1144 if (!max_delay) 1153 if (!max_delay)
1145 max_delay = 1; 1154 max_delay = 1;
@@ -1156,7 +1165,15 @@ int igmp6_event_query(struct sk_buff *skb)
1156 return 0; 1165 return 0;
1157 } 1166 }
1158 /* mark sources to include, if group & source-specific */ 1167 /* mark sources to include, if group & source-specific */
1159 mark = mlh2->nsrcs != 0; 1168 if (mlh2->nsrcs != 0) {
1169 if (!pskb_may_pull(skb, srcs_offset +
1170 mlh2->nsrcs * sizeof(struct in6_addr))) {
1171 in6_dev_put(idev);
1172 return -EINVAL;
1173 }
1174 mlh2 = (struct mld2_query *) skb->h.raw;
1175 mark = 1;
1176 }
1160 } else { 1177 } else {
1161 in6_dev_put(idev); 1178 in6_dev_put(idev);
1162 return -EINVAL; 1179 return -EINVAL;
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index bb7ccfe33f23..060d61202412 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -5,10 +5,20 @@
5menu "IPv6: Netfilter Configuration (EXPERIMENTAL)" 5menu "IPv6: Netfilter Configuration (EXPERIMENTAL)"
6 depends on INET && IPV6 && NETFILTER && EXPERIMENTAL 6 depends on INET && IPV6 && NETFILTER && EXPERIMENTAL
7 7
8#tristate 'Connection tracking (required for masq/NAT)' CONFIG_IP6_NF_CONNTRACK 8config NF_CONNTRACK_IPV6
9#if [ "$CONFIG_IP6_NF_CONNTRACK" != "n" ]; then 9 tristate "IPv6 support for new connection tracking (EXPERIMENTAL)"
10# dep_tristate ' FTP protocol support' CONFIG_IP6_NF_FTP $CONFIG_IP6_NF_CONNTRACK 10 depends on EXPERIMENTAL && NF_CONNTRACK
11#fi 11 ---help---
12 Connection tracking keeps a record of what packets have passed
13 through your machine, in order to figure out how they are related
14 into connections.
15
16 This is IPv6 support on Layer 3 independent connection tracking.
17 Layer 3 independent connection tracking is experimental scheme
18 which generalize ip_conntrack to support other layer 3 protocols.
19
20 To compile it as a module, choose M here. If unsure, say N.
21
12config IP6_NF_QUEUE 22config IP6_NF_QUEUE
13 tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)" 23 tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)"
14 ---help--- 24 ---help---
@@ -114,7 +124,6 @@ config IP6_NF_MATCH_OWNER
114 124
115 To compile it as a module, choose M here. If unsure, say N. 125 To compile it as a module, choose M here. If unsure, say N.
116 126
117# dep_tristate ' MAC address match support' CONFIG_IP6_NF_MATCH_MAC $CONFIG_IP6_NF_IPTABLES
118config IP6_NF_MATCH_MARK 127config IP6_NF_MATCH_MARK
119 tristate "netfilter MARK match support" 128 tristate "netfilter MARK match support"
120 depends on IP6_NF_IPTABLES 129 depends on IP6_NF_IPTABLES
@@ -170,15 +179,6 @@ config IP6_NF_MATCH_PHYSDEV
170 179
171 To compile it as a module, choose M here. If unsure, say N. 180 To compile it as a module, choose M here. If unsure, say N.
172 181
173# dep_tristate ' Multiple port match support' CONFIG_IP6_NF_MATCH_MULTIPORT $CONFIG_IP6_NF_IPTABLES
174# dep_tristate ' TOS match support' CONFIG_IP6_NF_MATCH_TOS $CONFIG_IP6_NF_IPTABLES
175# if [ "$CONFIG_IP6_NF_CONNTRACK" != "n" ]; then
176# dep_tristate ' Connection state match support' CONFIG_IP6_NF_MATCH_STATE $CONFIG_IP6_NF_CONNTRACK $CONFIG_IP6_NF_IPTABLES
177# fi
178# if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
179# dep_tristate ' Unclean match support (EXPERIMENTAL)' CONFIG_IP6_NF_MATCH_UNCLEAN $CONFIG_IP6_NF_IPTABLES
180# dep_tristate ' Owner match support (EXPERIMENTAL)' CONFIG_IP6_NF_MATCH_OWNER $CONFIG_IP6_NF_IPTABLES
181# fi
182# The targets 182# The targets
183config IP6_NF_FILTER 183config IP6_NF_FILTER
184 tristate "Packet filtering" 184 tristate "Packet filtering"
@@ -220,12 +220,6 @@ config IP6_NF_TARGET_NFQUEUE
220 220
221 To compile it as a module, choose M here. If unsure, say N. 221 To compile it as a module, choose M here. If unsure, say N.
222 222
223# if [ "$CONFIG_IP6_NF_FILTER" != "n" ]; then
224# dep_tristate ' REJECT target support' CONFIG_IP6_NF_TARGET_REJECT $CONFIG_IP6_NF_FILTER
225# if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
226# dep_tristate ' MIRROR target support (EXPERIMENTAL)' CONFIG_IP6_NF_TARGET_MIRROR $CONFIG_IP6_NF_FILTER
227# fi
228# fi
229config IP6_NF_MANGLE 223config IP6_NF_MANGLE
230 tristate "Packet mangling" 224 tristate "Packet mangling"
231 depends on IP6_NF_IPTABLES 225 depends on IP6_NF_IPTABLES
@@ -236,7 +230,6 @@ config IP6_NF_MANGLE
236 230
237 To compile it as a module, choose M here. If unsure, say N. 231 To compile it as a module, choose M here. If unsure, say N.
238 232
239# dep_tristate ' TOS target support' CONFIG_IP6_NF_TARGET_TOS $CONFIG_IP_NF_MANGLE
240config IP6_NF_TARGET_MARK 233config IP6_NF_TARGET_MARK
241 tristate "MARK target support" 234 tristate "MARK target support"
242 depends on IP6_NF_MANGLE 235 depends on IP6_NF_MANGLE
@@ -266,7 +259,6 @@ config IP6_NF_TARGET_HL
266 259
267 To compile it as a module, choose M here. If unsure, say N. 260 To compile it as a module, choose M here. If unsure, say N.
268 261
269#dep_tristate ' LOG target support' CONFIG_IP6_NF_TARGET_LOG $CONFIG_IP6_NF_IPTABLES
270config IP6_NF_RAW 262config IP6_NF_RAW
271 tristate 'raw table support (required for TRACE)' 263 tristate 'raw table support (required for TRACE)'
272 depends on IP6_NF_IPTABLES 264 depends on IP6_NF_IPTABLES
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 2b2c370e8b1c..9ab5b2ca1f59 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -27,3 +27,9 @@ obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o
27obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o 27obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
28obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o 28obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o
29obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o 29obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
30
31# objects for l3 independent conntrack
32nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o
33
34# l3 independent conntrack
35obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 21deec25a12b..7d492226c16e 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2,7 +2,7 @@
2 * Packet matching code. 2 * Packet matching code.
3 * 3 *
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling 4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2002 Netfilter core team <coreteam@netfilter.org> 5 * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
@@ -23,7 +23,6 @@
23#include <linux/tcp.h> 23#include <linux/tcp.h>
24#include <linux/udp.h> 24#include <linux/udp.h>
25#include <linux/icmpv6.h> 25#include <linux/icmpv6.h>
26#include <net/ip.h>
27#include <net/ipv6.h> 26#include <net/ipv6.h>
28#include <asm/uaccess.h> 27#include <asm/uaccess.h>
29#include <asm/semaphore.h> 28#include <asm/semaphore.h>
@@ -80,13 +79,12 @@ static DECLARE_MUTEX(ip6t_mutex);
80#define inline 79#define inline
81#endif 80#endif
82 81
83/* Locking is simple: we assume at worst case there will be one packet 82/*
84 in user context and one from bottom halves (or soft irq if Alexey's
85 softnet patch was applied).
86
87 We keep a set of rules for each CPU, so we can avoid write-locking 83 We keep a set of rules for each CPU, so we can avoid write-locking
88 them; doing a readlock_bh() stops packets coming through if we're 84 them in the softirq when updating the counters and therefore
89 in user context. 85 only need to read-lock in the softirq; doing a write_lock_bh() in user
86 context stops packets coming through and allows user context to read
87 the counters or update the rules.
90 88
91 To be cache friendly on SMP, we arrange them like so: 89 To be cache friendly on SMP, we arrange them like so:
92 [ n-entries ] 90 [ n-entries ]
@@ -356,7 +354,7 @@ ip6t_do_table(struct sk_buff **pskb,
356 struct ip6t_table *table, 354 struct ip6t_table *table,
357 void *userdata) 355 void *userdata)
358{ 356{
359 static const char nulldevname[IFNAMSIZ]; 357 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
360 int offset = 0; 358 int offset = 0;
361 unsigned int protoff = 0; 359 unsigned int protoff = 0;
362 int hotdrop = 0; 360 int hotdrop = 0;
@@ -369,7 +367,6 @@ ip6t_do_table(struct sk_buff **pskb,
369 /* Initialization */ 367 /* Initialization */
370 indev = in ? in->name : nulldevname; 368 indev = in ? in->name : nulldevname;
371 outdev = out ? out->name : nulldevname; 369 outdev = out ? out->name : nulldevname;
372
373 /* We handle fragments by dealing with the first fragment as 370 /* We handle fragments by dealing with the first fragment as
374 * if it was a normal packet. All other fragments are treated 371 * if it was a normal packet. All other fragments are treated
375 * normally, except that they will NEVER match rules that ask 372 * normally, except that they will NEVER match rules that ask
@@ -497,75 +494,145 @@ ip6t_do_table(struct sk_buff **pskb,
497#endif 494#endif
498} 495}
499 496
500/* If it succeeds, returns element and locks mutex */ 497/*
501static inline void * 498 * These are weird, but module loading must not be done with mutex
502find_inlist_lock_noload(struct list_head *head, 499 * held (since they will register), and we have to have a single
503 const char *name, 500 * function to use try_then_request_module().
504 int *error, 501 */
505 struct semaphore *mutex) 502
503/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
504static inline struct ip6t_table *find_table_lock(const char *name)
506{ 505{
507 void *ret; 506 struct ip6t_table *t;
508 507
509#if 1 508 if (down_interruptible(&ip6t_mutex) != 0)
510 duprintf("find_inlist: searching for `%s' in %s.\n", 509 return ERR_PTR(-EINTR);
511 name, head == &ip6t_target ? "ip6t_target"
512 : head == &ip6t_match ? "ip6t_match"
513 : head == &ip6t_tables ? "ip6t_tables" : "UNKNOWN");
514#endif
515 510
516 *error = down_interruptible(mutex); 511 list_for_each_entry(t, &ip6t_tables, list)
517 if (*error != 0) 512 if (strcmp(t->name, name) == 0 && try_module_get(t->me))
518 return NULL; 513 return t;
514 up(&ip6t_mutex);
515 return NULL;
516}
517
518/* Find match, grabs ref. Returns ERR_PTR() on error. */
519static inline struct ip6t_match *find_match(const char *name, u8 revision)
520{
521 struct ip6t_match *m;
522 int err = 0;
519 523
520 ret = list_named_find(head, name); 524 if (down_interruptible(&ip6t_mutex) != 0)
521 if (!ret) { 525 return ERR_PTR(-EINTR);
522 *error = -ENOENT; 526
523 up(mutex); 527 list_for_each_entry(m, &ip6t_match, list) {
528 if (strcmp(m->name, name) == 0) {
529 if (m->revision == revision) {
530 if (try_module_get(m->me)) {
531 up(&ip6t_mutex);
532 return m;
533 }
534 } else
535 err = -EPROTOTYPE; /* Found something. */
536 }
524 } 537 }
525 return ret; 538 up(&ip6t_mutex);
539 return ERR_PTR(err);
526} 540}
527 541
528#ifndef CONFIG_KMOD 542/* Find target, grabs ref. Returns ERR_PTR() on error. */
529#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m)) 543static inline struct ip6t_target *find_target(const char *name, u8 revision)
530#else
531static void *
532find_inlist_lock(struct list_head *head,
533 const char *name,
534 const char *prefix,
535 int *error,
536 struct semaphore *mutex)
537{ 544{
538 void *ret; 545 struct ip6t_target *t;
546 int err = 0;
539 547
540 ret = find_inlist_lock_noload(head, name, error, mutex); 548 if (down_interruptible(&ip6t_mutex) != 0)
541 if (!ret) { 549 return ERR_PTR(-EINTR);
542 duprintf("find_inlist: loading `%s%s'.\n", prefix, name); 550
543 request_module("%s%s", prefix, name); 551 list_for_each_entry(t, &ip6t_target, list) {
544 ret = find_inlist_lock_noload(head, name, error, mutex); 552 if (strcmp(t->name, name) == 0) {
553 if (t->revision == revision) {
554 if (try_module_get(t->me)) {
555 up(&ip6t_mutex);
556 return t;
557 }
558 } else
559 err = -EPROTOTYPE; /* Found something. */
560 }
545 } 561 }
562 up(&ip6t_mutex);
563 return ERR_PTR(err);
564}
546 565
547 return ret; 566struct ip6t_target *ip6t_find_target(const char *name, u8 revision)
567{
568 struct ip6t_target *target;
569
570 target = try_then_request_module(find_target(name, revision),
571 "ip6t_%s", name);
572 if (IS_ERR(target) || !target)
573 return NULL;
574 return target;
548} 575}
549#endif
550 576
551static inline struct ip6t_table * 577static int match_revfn(const char *name, u8 revision, int *bestp)
552ip6t_find_table_lock(const char *name, int *error, struct semaphore *mutex)
553{ 578{
554 return find_inlist_lock(&ip6t_tables, name, "ip6table_", error, mutex); 579 struct ip6t_match *m;
580 int have_rev = 0;
581
582 list_for_each_entry(m, &ip6t_match, list) {
583 if (strcmp(m->name, name) == 0) {
584 if (m->revision > *bestp)
585 *bestp = m->revision;
586 if (m->revision == revision)
587 have_rev = 1;
588 }
589 }
590 return have_rev;
555} 591}
556 592
557static inline struct ip6t_match * 593static int target_revfn(const char *name, u8 revision, int *bestp)
558find_match_lock(const char *name, int *error, struct semaphore *mutex)
559{ 594{
560 return find_inlist_lock(&ip6t_match, name, "ip6t_", error, mutex); 595 struct ip6t_target *t;
596 int have_rev = 0;
597
598 list_for_each_entry(t, &ip6t_target, list) {
599 if (strcmp(t->name, name) == 0) {
600 if (t->revision > *bestp)
601 *bestp = t->revision;
602 if (t->revision == revision)
603 have_rev = 1;
604 }
605 }
606 return have_rev;
561} 607}
562 608
563static struct ip6t_target * 609/* Returns true or fals (if no such extension at all) */
564ip6t_find_target_lock(const char *name, int *error, struct semaphore *mutex) 610static inline int find_revision(const char *name, u8 revision,
611 int (*revfn)(const char *, u8, int *),
612 int *err)
565{ 613{
566 return find_inlist_lock(&ip6t_target, name, "ip6t_", error, mutex); 614 int have_rev, best = -1;
615
616 if (down_interruptible(&ip6t_mutex) != 0) {
617 *err = -EINTR;
618 return 1;
619 }
620 have_rev = revfn(name, revision, &best);
621 up(&ip6t_mutex);
622
623 /* Nothing at all? Return 0 to try loading module. */
624 if (best == -1) {
625 *err = -ENOENT;
626 return 0;
627 }
628
629 *err = best;
630 if (!have_rev)
631 *err = -EPROTONOSUPPORT;
632 return 1;
567} 633}
568 634
635
569/* All zeroes == unconditional rule. */ 636/* All zeroes == unconditional rule. */
570static inline int 637static inline int
571unconditional(const struct ip6t_ip6 *ipv6) 638unconditional(const struct ip6t_ip6 *ipv6)
@@ -725,20 +792,16 @@ check_match(struct ip6t_entry_match *m,
725 unsigned int hookmask, 792 unsigned int hookmask,
726 unsigned int *i) 793 unsigned int *i)
727{ 794{
728 int ret;
729 struct ip6t_match *match; 795 struct ip6t_match *match;
730 796
731 match = find_match_lock(m->u.user.name, &ret, &ip6t_mutex); 797 match = try_then_request_module(find_match(m->u.user.name,
732 if (!match) { 798 m->u.user.revision),
733 // duprintf("check_match: `%s' not found\n", m->u.name); 799 "ip6t_%s", m->u.user.name);
734 return ret; 800 if (IS_ERR(match) || !match) {
735 } 801 duprintf("check_match: `%s' not found\n", m->u.user.name);
736 if (!try_module_get(match->me)) { 802 return match ? PTR_ERR(match) : -ENOENT;
737 up(&ip6t_mutex);
738 return -ENOENT;
739 } 803 }
740 m->u.kernel.match = match; 804 m->u.kernel.match = match;
741 up(&ip6t_mutex);
742 805
743 if (m->u.kernel.match->checkentry 806 if (m->u.kernel.match->checkentry
744 && !m->u.kernel.match->checkentry(name, ipv6, m->data, 807 && !m->u.kernel.match->checkentry(name, ipv6, m->data,
@@ -776,22 +839,16 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
776 goto cleanup_matches; 839 goto cleanup_matches;
777 840
778 t = ip6t_get_target(e); 841 t = ip6t_get_target(e);
779 target = ip6t_find_target_lock(t->u.user.name, &ret, &ip6t_mutex); 842 target = try_then_request_module(find_target(t->u.user.name,
780 if (!target) { 843 t->u.user.revision),
844 "ip6t_%s", t->u.user.name);
845 if (IS_ERR(target) || !target) {
781 duprintf("check_entry: `%s' not found\n", t->u.user.name); 846 duprintf("check_entry: `%s' not found\n", t->u.user.name);
782 goto cleanup_matches; 847 ret = target ? PTR_ERR(target) : -ENOENT;
783 }
784 if (!try_module_get(target->me)) {
785 up(&ip6t_mutex);
786 ret = -ENOENT;
787 goto cleanup_matches; 848 goto cleanup_matches;
788 } 849 }
789 t->u.kernel.target = target; 850 t->u.kernel.target = target;
790 up(&ip6t_mutex); 851
791 if (!t->u.kernel.target) {
792 ret = -EBUSY;
793 goto cleanup_matches;
794 }
795 if (t->u.kernel.target == &ip6t_standard_target) { 852 if (t->u.kernel.target == &ip6t_standard_target) {
796 if (!standard_check(t, size)) { 853 if (!standard_check(t, size)) {
797 ret = -EINVAL; 854 ret = -EINVAL;
@@ -1118,8 +1175,8 @@ get_entries(const struct ip6t_get_entries *entries,
1118 int ret; 1175 int ret;
1119 struct ip6t_table *t; 1176 struct ip6t_table *t;
1120 1177
1121 t = ip6t_find_table_lock(entries->name, &ret, &ip6t_mutex); 1178 t = find_table_lock(entries->name);
1122 if (t) { 1179 if (t && !IS_ERR(t)) {
1123 duprintf("t->private->number = %u\n", 1180 duprintf("t->private->number = %u\n",
1124 t->private->number); 1181 t->private->number);
1125 if (entries->size == t->private->size) 1182 if (entries->size == t->private->size)
@@ -1131,10 +1188,10 @@ get_entries(const struct ip6t_get_entries *entries,
1131 entries->size); 1188 entries->size);
1132 ret = -EINVAL; 1189 ret = -EINVAL;
1133 } 1190 }
1191 module_put(t->me);
1134 up(&ip6t_mutex); 1192 up(&ip6t_mutex);
1135 } else 1193 } else
1136 duprintf("get_entries: Can't find %s!\n", 1194 ret = t ? PTR_ERR(t) : -ENOENT;
1137 entries->name);
1138 1195
1139 return ret; 1196 return ret;
1140} 1197}
@@ -1182,22 +1239,19 @@ do_replace(void __user *user, unsigned int len)
1182 1239
1183 duprintf("ip_tables: Translated table\n"); 1240 duprintf("ip_tables: Translated table\n");
1184 1241
1185 t = ip6t_find_table_lock(tmp.name, &ret, &ip6t_mutex); 1242 t = try_then_request_module(find_table_lock(tmp.name),
1186 if (!t) 1243 "ip6table_%s", tmp.name);
1244 if (!t || IS_ERR(t)) {
1245 ret = t ? PTR_ERR(t) : -ENOENT;
1187 goto free_newinfo_counters_untrans; 1246 goto free_newinfo_counters_untrans;
1247 }
1188 1248
1189 /* You lied! */ 1249 /* You lied! */
1190 if (tmp.valid_hooks != t->valid_hooks) { 1250 if (tmp.valid_hooks != t->valid_hooks) {
1191 duprintf("Valid hook crap: %08X vs %08X\n", 1251 duprintf("Valid hook crap: %08X vs %08X\n",
1192 tmp.valid_hooks, t->valid_hooks); 1252 tmp.valid_hooks, t->valid_hooks);
1193 ret = -EINVAL; 1253 ret = -EINVAL;
1194 goto free_newinfo_counters_untrans_unlock; 1254 goto put_module;
1195 }
1196
1197 /* Get a reference in advance, we're not allowed fail later */
1198 if (!try_module_get(t->me)) {
1199 ret = -EBUSY;
1200 goto free_newinfo_counters_untrans_unlock;
1201 } 1255 }
1202 1256
1203 oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret); 1257 oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
@@ -1219,7 +1273,6 @@ do_replace(void __user *user, unsigned int len)
1219 /* Decrease module usage counts and free resource */ 1273 /* Decrease module usage counts and free resource */
1220 IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); 1274 IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1221 vfree(oldinfo); 1275 vfree(oldinfo);
1222 /* Silent error: too late now. */
1223 if (copy_to_user(tmp.counters, counters, 1276 if (copy_to_user(tmp.counters, counters,
1224 sizeof(struct ip6t_counters) * tmp.num_counters) != 0) 1277 sizeof(struct ip6t_counters) * tmp.num_counters) != 0)
1225 ret = -EFAULT; 1278 ret = -EFAULT;
@@ -1229,7 +1282,6 @@ do_replace(void __user *user, unsigned int len)
1229 1282
1230 put_module: 1283 put_module:
1231 module_put(t->me); 1284 module_put(t->me);
1232 free_newinfo_counters_untrans_unlock:
1233 up(&ip6t_mutex); 1285 up(&ip6t_mutex);
1234 free_newinfo_counters_untrans: 1286 free_newinfo_counters_untrans:
1235 IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL); 1287 IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
@@ -1268,7 +1320,7 @@ do_add_counters(void __user *user, unsigned int len)
1268 unsigned int i; 1320 unsigned int i;
1269 struct ip6t_counters_info tmp, *paddc; 1321 struct ip6t_counters_info tmp, *paddc;
1270 struct ip6t_table *t; 1322 struct ip6t_table *t;
1271 int ret; 1323 int ret = 0;
1272 1324
1273 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1325 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1274 return -EFAULT; 1326 return -EFAULT;
@@ -1285,9 +1337,11 @@ do_add_counters(void __user *user, unsigned int len)
1285 goto free; 1337 goto free;
1286 } 1338 }
1287 1339
1288 t = ip6t_find_table_lock(tmp.name, &ret, &ip6t_mutex); 1340 t = find_table_lock(tmp.name);
1289 if (!t) 1341 if (!t || IS_ERR(t)) {
1342 ret = t ? PTR_ERR(t) : -ENOENT;
1290 goto free; 1343 goto free;
1344 }
1291 1345
1292 write_lock_bh(&t->lock); 1346 write_lock_bh(&t->lock);
1293 if (t->private->number != paddc->num_counters) { 1347 if (t->private->number != paddc->num_counters) {
@@ -1304,6 +1358,7 @@ do_add_counters(void __user *user, unsigned int len)
1304 unlock_up_free: 1358 unlock_up_free:
1305 write_unlock_bh(&t->lock); 1359 write_unlock_bh(&t->lock);
1306 up(&ip6t_mutex); 1360 up(&ip6t_mutex);
1361 module_put(t->me);
1307 free: 1362 free:
1308 vfree(paddc); 1363 vfree(paddc);
1309 1364
@@ -1360,8 +1415,10 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1360 break; 1415 break;
1361 } 1416 }
1362 name[IP6T_TABLE_MAXNAMELEN-1] = '\0'; 1417 name[IP6T_TABLE_MAXNAMELEN-1] = '\0';
1363 t = ip6t_find_table_lock(name, &ret, &ip6t_mutex); 1418
1364 if (t) { 1419 t = try_then_request_module(find_table_lock(name),
1420 "ip6table_%s", name);
1421 if (t && !IS_ERR(t)) {
1365 struct ip6t_getinfo info; 1422 struct ip6t_getinfo info;
1366 1423
1367 info.valid_hooks = t->valid_hooks; 1424 info.valid_hooks = t->valid_hooks;
@@ -1377,9 +1434,10 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1377 ret = -EFAULT; 1434 ret = -EFAULT;
1378 else 1435 else
1379 ret = 0; 1436 ret = 0;
1380
1381 up(&ip6t_mutex); 1437 up(&ip6t_mutex);
1382 } 1438 module_put(t->me);
1439 } else
1440 ret = t ? PTR_ERR(t) : -ENOENT;
1383 } 1441 }
1384 break; 1442 break;
1385 1443
@@ -1400,6 +1458,31 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1400 break; 1458 break;
1401 } 1459 }
1402 1460
1461 case IP6T_SO_GET_REVISION_MATCH:
1462 case IP6T_SO_GET_REVISION_TARGET: {
1463 struct ip6t_get_revision rev;
1464 int (*revfn)(const char *, u8, int *);
1465
1466 if (*len != sizeof(rev)) {
1467 ret = -EINVAL;
1468 break;
1469 }
1470 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1471 ret = -EFAULT;
1472 break;
1473 }
1474
1475 if (cmd == IP6T_SO_GET_REVISION_TARGET)
1476 revfn = target_revfn;
1477 else
1478 revfn = match_revfn;
1479
1480 try_then_request_module(find_revision(rev.name, rev.revision,
1481 revfn, &ret),
1482 "ip6t_%s", rev.name);
1483 break;
1484 }
1485
1403 default: 1486 default:
1404 duprintf("do_ip6t_get_ctl: unknown request %i\n", cmd); 1487 duprintf("do_ip6t_get_ctl: unknown request %i\n", cmd);
1405 ret = -EINVAL; 1488 ret = -EINVAL;
@@ -1417,12 +1500,7 @@ ip6t_register_target(struct ip6t_target *target)
1417 ret = down_interruptible(&ip6t_mutex); 1500 ret = down_interruptible(&ip6t_mutex);
1418 if (ret != 0) 1501 if (ret != 0)
1419 return ret; 1502 return ret;
1420 1503 list_add(&target->list, &ip6t_target);
1421 if (!list_named_insert(&ip6t_target, target)) {
1422 duprintf("ip6t_register_target: `%s' already in list!\n",
1423 target->name);
1424 ret = -EINVAL;
1425 }
1426 up(&ip6t_mutex); 1504 up(&ip6t_mutex);
1427 return ret; 1505 return ret;
1428} 1506}
@@ -1444,11 +1522,7 @@ ip6t_register_match(struct ip6t_match *match)
1444 if (ret != 0) 1522 if (ret != 0)
1445 return ret; 1523 return ret;
1446 1524
1447 if (!list_named_insert(&ip6t_match, match)) { 1525 list_add(&match->list, &ip6t_match);
1448 duprintf("ip6t_register_match: `%s' already in list!\n",
1449 match->name);
1450 ret = -EINVAL;
1451 }
1452 up(&ip6t_mutex); 1526 up(&ip6t_mutex);
1453 1527
1454 return ret; 1528 return ret;
diff --git a/net/ipv6/netfilter/ip6t_MARK.c b/net/ipv6/netfilter/ip6t_MARK.c
index 81924fcc5857..eab8fb864ee0 100644
--- a/net/ipv6/netfilter/ip6t_MARK.c
+++ b/net/ipv6/netfilter/ip6t_MARK.c
@@ -56,8 +56,12 @@ checkentry(const char *tablename,
56 return 1; 56 return 1;
57} 57}
58 58
59static struct ip6t_target ip6t_mark_reg 59static struct ip6t_target ip6t_mark_reg = {
60= { { NULL, NULL }, "MARK", target, checkentry, NULL, THIS_MODULE }; 60 .name = "MARK",
61 .target = target,
62 .checkentry = checkentry,
63 .me = THIS_MODULE
64};
61 65
62static int __init init(void) 66static int __init init(void)
63{ 67{
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
new file mode 100644
index 000000000000..753a3ae8502b
--- /dev/null
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -0,0 +1,556 @@
1/*
2 * Copyright (C)2004 USAGI/WIDE Project
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Author:
9 * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
10 *
11 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
12 * - support Layer 3 protocol independent connection tracking.
13 * Based on the original ip_conntrack code which had the following
14 * copyright information:
15 * (C) 1999-2001 Paul `Rusty' Russell
16 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
17 *
18 * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
19 * - add get_features() to support various size of conntrack
20 * structures.
21 */
22
23#include <linux/config.h>
24#include <linux/types.h>
25#include <linux/ipv6.h>
26#include <linux/in6.h>
27#include <linux/netfilter.h>
28#include <linux/module.h>
29#include <linux/skbuff.h>
30#include <linux/icmp.h>
31#include <linux/sysctl.h>
32#include <net/ipv6.h>
33
34#include <linux/netfilter_ipv6.h>
35#include <net/netfilter/nf_conntrack.h>
36#include <net/netfilter/nf_conntrack_helper.h>
37#include <net/netfilter/nf_conntrack_protocol.h>
38#include <net/netfilter/nf_conntrack_l3proto.h>
39#include <net/netfilter/nf_conntrack_core.h>
40
41#if 0
42#define DEBUGP printk
43#else
44#define DEBUGP(format, args...)
45#endif
46
47DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
48
49static int ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
50 struct nf_conntrack_tuple *tuple)
51{
52 u_int32_t _addrs[8], *ap;
53
54 ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr),
55 sizeof(_addrs), _addrs);
56 if (ap == NULL)
57 return 0;
58
59 memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
60 memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
61
62 return 1;
63}
64
65static int ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
66 const struct nf_conntrack_tuple *orig)
67{
68 memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6));
69 memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6));
70
71 return 1;
72}
73
74static int ipv6_print_tuple(struct seq_file *s,
75 const struct nf_conntrack_tuple *tuple)
76{
77 return seq_printf(s, "src=%x:%x:%x:%x:%x:%x:%x:%x dst=%x:%x:%x:%x:%x:%x:%x:%x ",
78 NIP6(*((struct in6_addr *)tuple->src.u3.ip6)),
79 NIP6(*((struct in6_addr *)tuple->dst.u3.ip6)));
80}
81
82static int ipv6_print_conntrack(struct seq_file *s,
83 const struct nf_conn *conntrack)
84{
85 return 0;
86}
87
88/*
89 * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c
90 *
91 * This function parses (probably truncated) exthdr set "hdr"
92 * of length "len". "nexthdrp" initially points to some place,
93 * where type of the first header can be found.
94 *
95 * It skips all well-known exthdrs, and returns pointer to the start
96 * of unparsable area i.e. the first header with unknown type.
97 * if success, *nexthdr is updated by type/protocol of this header.
98 *
99 * NOTES: - it may return pointer pointing beyond end of packet,
100 * if the last recognized header is truncated in the middle.
101 * - if packet is truncated, so that all parsed headers are skipped,
102 * it returns -1.
103 * - if packet is fragmented, return pointer of the fragment header.
104 * - ESP is unparsable for now and considered like
105 * normal payload protocol.
106 * - Note also special handling of AUTH header. Thanks to IPsec wizards.
107 */
108
109int nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp,
110 int len)
111{
112 u8 nexthdr = *nexthdrp;
113
114 while (ipv6_ext_hdr(nexthdr)) {
115 struct ipv6_opt_hdr hdr;
116 int hdrlen;
117
118 if (len < (int)sizeof(struct ipv6_opt_hdr))
119 return -1;
120 if (nexthdr == NEXTHDR_NONE)
121 break;
122 if (nexthdr == NEXTHDR_FRAGMENT)
123 break;
124 if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
125 BUG();
126 if (nexthdr == NEXTHDR_AUTH)
127 hdrlen = (hdr.hdrlen+2)<<2;
128 else
129 hdrlen = ipv6_optlen(&hdr);
130
131 nexthdr = hdr.nexthdr;
132 len -= hdrlen;
133 start += hdrlen;
134 }
135
136 *nexthdrp = nexthdr;
137 return start;
138}
139
140static int
141ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
142 u_int8_t *protonum)
143{
144 unsigned int extoff;
145 unsigned char pnum;
146 int protoff;
147
148 extoff = (u8*)((*pskb)->nh.ipv6h + 1) - (*pskb)->data;
149 pnum = (*pskb)->nh.ipv6h->nexthdr;
150
151 protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
152 (*pskb)->len - extoff);
153
154 /*
155 * (protoff == (*pskb)->len) mean that the packet doesn't have no data
156 * except of IPv6 & ext headers. but it's tracked anyway. - YK
157 */
158 if ((protoff < 0) || (protoff > (*pskb)->len)) {
159 DEBUGP("ip6_conntrack_core: can't find proto in pkt\n");
160 NF_CT_STAT_INC(error);
161 NF_CT_STAT_INC(invalid);
162 return -NF_ACCEPT;
163 }
164
165 *dataoff = protoff;
166 *protonum = pnum;
167 return NF_ACCEPT;
168}
169
170static u_int32_t ipv6_get_features(const struct nf_conntrack_tuple *tuple)
171{
172 return NF_CT_F_BASIC;
173}
174
175static unsigned int ipv6_confirm(unsigned int hooknum,
176 struct sk_buff **pskb,
177 const struct net_device *in,
178 const struct net_device *out,
179 int (*okfn)(struct sk_buff *))
180{
181 struct nf_conn *ct;
182 enum ip_conntrack_info ctinfo;
183
184 /* This is where we call the helper: as the packet goes out. */
185 ct = nf_ct_get(*pskb, &ctinfo);
186 if (ct && ct->helper) {
187 unsigned int ret, protoff;
188 unsigned int extoff = (u8*)((*pskb)->nh.ipv6h + 1)
189 - (*pskb)->data;
190 unsigned char pnum = (*pskb)->nh.ipv6h->nexthdr;
191
192 protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
193 (*pskb)->len - extoff);
194 if (protoff < 0 || protoff > (*pskb)->len ||
195 pnum == NEXTHDR_FRAGMENT) {
196 DEBUGP("proto header not found\n");
197 return NF_ACCEPT;
198 }
199
200 ret = ct->helper->help(pskb, protoff, ct, ctinfo);
201 if (ret != NF_ACCEPT)
202 return ret;
203 }
204
205 /* We've seen it coming out the other side: confirm it */
206
207 return nf_conntrack_confirm(pskb);
208}
209
210extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb);
211extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
212 struct net_device *in,
213 struct net_device *out,
214 int (*okfn)(struct sk_buff *));
215static unsigned int ipv6_defrag(unsigned int hooknum,
216 struct sk_buff **pskb,
217 const struct net_device *in,
218 const struct net_device *out,
219 int (*okfn)(struct sk_buff *))
220{
221 struct sk_buff *reasm;
222
223 /* Previously seen (loopback)? */
224 if ((*pskb)->nfct)
225 return NF_ACCEPT;
226
227 reasm = nf_ct_frag6_gather(*pskb);
228
229 /* queued */
230 if (reasm == NULL)
231 return NF_STOLEN;
232
233 /* error occured or not fragmented */
234 if (reasm == *pskb)
235 return NF_ACCEPT;
236
237 nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
238 (struct net_device *)out, okfn);
239
240 return NF_STOLEN;
241}
242
243static unsigned int ipv6_conntrack_in(unsigned int hooknum,
244 struct sk_buff **pskb,
245 const struct net_device *in,
246 const struct net_device *out,
247 int (*okfn)(struct sk_buff *))
248{
249 struct sk_buff *reasm = (*pskb)->nfct_reasm;
250
251 /* This packet is fragmented and has reassembled packet. */
252 if (reasm) {
253 /* Reassembled packet isn't parsed yet ? */
254 if (!reasm->nfct) {
255 unsigned int ret;
256
257 ret = nf_conntrack_in(PF_INET6, hooknum, &reasm);
258 if (ret != NF_ACCEPT)
259 return ret;
260 }
261 nf_conntrack_get(reasm->nfct);
262 (*pskb)->nfct = reasm->nfct;
263 return NF_ACCEPT;
264 }
265
266 return nf_conntrack_in(PF_INET6, hooknum, pskb);
267}
268
269static unsigned int ipv6_conntrack_local(unsigned int hooknum,
270 struct sk_buff **pskb,
271 const struct net_device *in,
272 const struct net_device *out,
273 int (*okfn)(struct sk_buff *))
274{
275 /* root is playing with raw sockets. */
276 if ((*pskb)->len < sizeof(struct ipv6hdr)) {
277 if (net_ratelimit())
278 printk("ipv6_conntrack_local: packet too short\n");
279 return NF_ACCEPT;
280 }
281 return ipv6_conntrack_in(hooknum, pskb, in, out, okfn);
282}
283
284/* Connection tracking may drop packets, but never alters them, so
285 make it the first hook. */
286static struct nf_hook_ops ipv6_conntrack_defrag_ops = {
287 .hook = ipv6_defrag,
288 .owner = THIS_MODULE,
289 .pf = PF_INET6,
290 .hooknum = NF_IP6_PRE_ROUTING,
291 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
292};
293
294static struct nf_hook_ops ipv6_conntrack_in_ops = {
295 .hook = ipv6_conntrack_in,
296 .owner = THIS_MODULE,
297 .pf = PF_INET6,
298 .hooknum = NF_IP6_PRE_ROUTING,
299 .priority = NF_IP6_PRI_CONNTRACK,
300};
301
302static struct nf_hook_ops ipv6_conntrack_local_out_ops = {
303 .hook = ipv6_conntrack_local,
304 .owner = THIS_MODULE,
305 .pf = PF_INET6,
306 .hooknum = NF_IP6_LOCAL_OUT,
307 .priority = NF_IP6_PRI_CONNTRACK,
308};
309
310static struct nf_hook_ops ipv6_conntrack_defrag_local_out_ops = {
311 .hook = ipv6_defrag,
312 .owner = THIS_MODULE,
313 .pf = PF_INET6,
314 .hooknum = NF_IP6_LOCAL_OUT,
315 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
316};
317
318/* Refragmenter; last chance. */
319static struct nf_hook_ops ipv6_conntrack_out_ops = {
320 .hook = ipv6_confirm,
321 .owner = THIS_MODULE,
322 .pf = PF_INET6,
323 .hooknum = NF_IP6_POST_ROUTING,
324 .priority = NF_IP6_PRI_LAST,
325};
326
327static struct nf_hook_ops ipv6_conntrack_local_in_ops = {
328 .hook = ipv6_confirm,
329 .owner = THIS_MODULE,
330 .pf = PF_INET6,
331 .hooknum = NF_IP6_LOCAL_IN,
332 .priority = NF_IP6_PRI_LAST-1,
333};
334
335#ifdef CONFIG_SYSCTL
336
337/* From nf_conntrack_proto_icmpv6.c */
338extern unsigned long nf_ct_icmpv6_timeout;
339
340/* From nf_conntrack_frag6.c */
341extern unsigned long nf_ct_frag6_timeout;
342extern unsigned int nf_ct_frag6_low_thresh;
343extern unsigned int nf_ct_frag6_high_thresh;
344
345static struct ctl_table_header *nf_ct_ipv6_sysctl_header;
346
347static ctl_table nf_ct_sysctl_table[] = {
348 {
349 .ctl_name = NET_NF_CONNTRACK_ICMPV6_TIMEOUT,
350 .procname = "nf_conntrack_icmpv6_timeout",
351 .data = &nf_ct_icmpv6_timeout,
352 .maxlen = sizeof(unsigned int),
353 .mode = 0644,
354 .proc_handler = &proc_dointvec_jiffies,
355 },
356 {
357 .ctl_name = NET_NF_CONNTRACK_FRAG6_TIMEOUT,
358 .procname = "nf_conntrack_frag6_timeout",
359 .data = &nf_ct_frag6_timeout,
360 .maxlen = sizeof(unsigned int),
361 .mode = 0644,
362 .proc_handler = &proc_dointvec_jiffies,
363 },
364 {
365 .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH,
366 .procname = "nf_conntrack_frag6_low_thresh",
367 .data = &nf_ct_frag6_low_thresh,
368 .maxlen = sizeof(unsigned int),
369 .mode = 0644,
370 .proc_handler = &proc_dointvec,
371 },
372 {
373 .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH,
374 .procname = "nf_conntrack_frag6_high_thresh",
375 .data = &nf_ct_frag6_high_thresh,
376 .maxlen = sizeof(unsigned int),
377 .mode = 0644,
378 .proc_handler = &proc_dointvec,
379 },
380 { .ctl_name = 0 }
381};
382
383static ctl_table nf_ct_netfilter_table[] = {
384 {
385 .ctl_name = NET_NETFILTER,
386 .procname = "netfilter",
387 .mode = 0555,
388 .child = nf_ct_sysctl_table,
389 },
390 { .ctl_name = 0 }
391};
392
393static ctl_table nf_ct_net_table[] = {
394 {
395 .ctl_name = CTL_NET,
396 .procname = "net",
397 .mode = 0555,
398 .child = nf_ct_netfilter_table,
399 },
400 { .ctl_name = 0 }
401};
402#endif
403
404struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
405 .l3proto = PF_INET6,
406 .name = "ipv6",
407 .pkt_to_tuple = ipv6_pkt_to_tuple,
408 .invert_tuple = ipv6_invert_tuple,
409 .print_tuple = ipv6_print_tuple,
410 .print_conntrack = ipv6_print_conntrack,
411 .prepare = ipv6_prepare,
412 .get_features = ipv6_get_features,
413 .me = THIS_MODULE,
414};
415
416extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp6;
417extern struct nf_conntrack_protocol nf_conntrack_protocol_udp6;
418extern struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6;
419extern int nf_ct_frag6_init(void);
420extern void nf_ct_frag6_cleanup(void);
421static int init_or_cleanup(int init)
422{
423 int ret = 0;
424
425 if (!init) goto cleanup;
426
427 ret = nf_ct_frag6_init();
428 if (ret < 0) {
429 printk("nf_conntrack_ipv6: can't initialize frag6.\n");
430 goto cleanup_nothing;
431 }
432 ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp6);
433 if (ret < 0) {
434 printk("nf_conntrack_ipv6: can't register tcp.\n");
435 goto cleanup_frag6;
436 }
437
438 ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp6);
439 if (ret < 0) {
440 printk("nf_conntrack_ipv6: can't register udp.\n");
441 goto cleanup_tcp;
442 }
443
444 ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmpv6);
445 if (ret < 0) {
446 printk("nf_conntrack_ipv6: can't register icmpv6.\n");
447 goto cleanup_udp;
448 }
449
450 ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6);
451 if (ret < 0) {
452 printk("nf_conntrack_ipv6: can't register ipv6\n");
453 goto cleanup_icmpv6;
454 }
455
456 ret = nf_register_hook(&ipv6_conntrack_defrag_ops);
457 if (ret < 0) {
458 printk("nf_conntrack_ipv6: can't register pre-routing defrag "
459 "hook.\n");
460 goto cleanup_ipv6;
461 }
462
463 ret = nf_register_hook(&ipv6_conntrack_defrag_local_out_ops);
464 if (ret < 0) {
465 printk("nf_conntrack_ipv6: can't register local_out defrag "
466 "hook.\n");
467 goto cleanup_defragops;
468 }
469
470 ret = nf_register_hook(&ipv6_conntrack_in_ops);
471 if (ret < 0) {
472 printk("nf_conntrack_ipv6: can't register pre-routing hook.\n");
473 goto cleanup_defraglocalops;
474 }
475
476 ret = nf_register_hook(&ipv6_conntrack_local_out_ops);
477 if (ret < 0) {
478 printk("nf_conntrack_ipv6: can't register local out hook.\n");
479 goto cleanup_inops;
480 }
481
482 ret = nf_register_hook(&ipv6_conntrack_out_ops);
483 if (ret < 0) {
484 printk("nf_conntrack_ipv6: can't register post-routing hook.\n");
485 goto cleanup_inandlocalops;
486 }
487
488 ret = nf_register_hook(&ipv6_conntrack_local_in_ops);
489 if (ret < 0) {
490 printk("nf_conntrack_ipv6: can't register local in hook.\n");
491 goto cleanup_inoutandlocalops;
492 }
493
494#ifdef CONFIG_SYSCTL
495 nf_ct_ipv6_sysctl_header = register_sysctl_table(nf_ct_net_table, 0);
496 if (nf_ct_ipv6_sysctl_header == NULL) {
497 printk("nf_conntrack: can't register to sysctl.\n");
498 ret = -ENOMEM;
499 goto cleanup_localinops;
500 }
501#endif
502 return ret;
503
504 cleanup:
505 synchronize_net();
506#ifdef CONFIG_SYSCTL
507 unregister_sysctl_table(nf_ct_ipv6_sysctl_header);
508 cleanup_localinops:
509#endif
510 nf_unregister_hook(&ipv6_conntrack_local_in_ops);
511 cleanup_inoutandlocalops:
512 nf_unregister_hook(&ipv6_conntrack_out_ops);
513 cleanup_inandlocalops:
514 nf_unregister_hook(&ipv6_conntrack_local_out_ops);
515 cleanup_inops:
516 nf_unregister_hook(&ipv6_conntrack_in_ops);
517 cleanup_defraglocalops:
518 nf_unregister_hook(&ipv6_conntrack_defrag_local_out_ops);
519 cleanup_defragops:
520 nf_unregister_hook(&ipv6_conntrack_defrag_ops);
521 cleanup_ipv6:
522 nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
523 cleanup_icmpv6:
524 nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmpv6);
525 cleanup_udp:
526 nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp6);
527 cleanup_tcp:
528 nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp6);
529 cleanup_frag6:
530 nf_ct_frag6_cleanup();
531 cleanup_nothing:
532 return ret;
533}
534
535MODULE_LICENSE("GPL");
536MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>");
537
538static int __init init(void)
539{
540 need_nf_conntrack();
541 return init_or_cleanup(1);
542}
543
544static void __exit fini(void)
545{
546 init_or_cleanup(0);
547}
548
549module_init(init);
550module_exit(fini);
551
552void need_ip6_conntrack(void)
553{
554}
555
556EXPORT_SYMBOL(need_ip6_conntrack);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
new file mode 100644
index 000000000000..c0f1da5497a9
--- /dev/null
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -0,0 +1,272 @@
1/*
2 * Copyright (C)2003,2004 USAGI/WIDE Project
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Author:
9 * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
10 *
11 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
12 * - ICMPv6 tracking support. Derived from the original ip_conntrack code
13 * net/ipv4/netfilter/ip_conntrack_proto_icmp.c which had the following
14 * copyright information:
15 * (C) 1999-2001 Paul `Rusty' Russell
16 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
17 */
18
19#include <linux/types.h>
20#include <linux/sched.h>
21#include <linux/timer.h>
22#include <linux/module.h>
23#include <linux/netfilter.h>
24#include <linux/in6.h>
25#include <linux/icmpv6.h>
26#include <linux/ipv6.h>
27#include <net/ipv6.h>
28#include <net/ip6_checksum.h>
29#include <linux/seq_file.h>
30#include <linux/netfilter_ipv6.h>
31#include <net/netfilter/nf_conntrack_tuple.h>
32#include <net/netfilter/nf_conntrack_protocol.h>
33#include <net/netfilter/nf_conntrack_core.h>
34#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
35
36unsigned long nf_ct_icmpv6_timeout = 30*HZ;
37
38#if 0
39#define DEBUGP printk
40#else
41#define DEBUGP(format, args...)
42#endif
43
44static int icmpv6_pkt_to_tuple(const struct sk_buff *skb,
45 unsigned int dataoff,
46 struct nf_conntrack_tuple *tuple)
47{
48 struct icmp6hdr _hdr, *hp;
49
50 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
51 if (hp == NULL)
52 return 0;
53 tuple->dst.u.icmp.type = hp->icmp6_type;
54 tuple->src.u.icmp.id = hp->icmp6_identifier;
55 tuple->dst.u.icmp.code = hp->icmp6_code;
56
57 return 1;
58}
59
60static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
61 const struct nf_conntrack_tuple *orig)
62{
63 /* Add 1; spaces filled with 0. */
64 static u_int8_t invmap[] = {
65 [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1,
66 [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1,
67 [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_QUERY + 1,
68 [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1
69 };
70
71 __u8 type = orig->dst.u.icmp.type - 128;
72 if (type >= sizeof(invmap) || !invmap[type])
73 return 0;
74
75 tuple->src.u.icmp.id = orig->src.u.icmp.id;
76 tuple->dst.u.icmp.type = invmap[type] - 1;
77 tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
78 return 1;
79}
80
81/* Print out the per-protocol part of the tuple. */
82static int icmpv6_print_tuple(struct seq_file *s,
83 const struct nf_conntrack_tuple *tuple)
84{
85 return seq_printf(s, "type=%u code=%u id=%u ",
86 tuple->dst.u.icmp.type,
87 tuple->dst.u.icmp.code,
88 ntohs(tuple->src.u.icmp.id));
89}
90
91/* Print out the private part of the conntrack. */
92static int icmpv6_print_conntrack(struct seq_file *s,
93 const struct nf_conn *conntrack)
94{
95 return 0;
96}
97
98/* Returns verdict for packet, or -1 for invalid. */
99static int icmpv6_packet(struct nf_conn *ct,
100 const struct sk_buff *skb,
101 unsigned int dataoff,
102 enum ip_conntrack_info ctinfo,
103 int pf,
104 unsigned int hooknum)
105{
106 /* Try to delete connection immediately after all replies:
107 won't actually vanish as we still have skb, and del_timer
108 means this will only run once even if count hits zero twice
109 (theoretically possible with SMP) */
110 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
111 if (atomic_dec_and_test(&ct->proto.icmp.count)
112 && del_timer(&ct->timeout))
113 ct->timeout.function((unsigned long)ct);
114 } else {
115 atomic_inc(&ct->proto.icmp.count);
116 nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
117 nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout);
118 }
119
120 return NF_ACCEPT;
121}
122
123/* Called when a new connection for this protocol found. */
124static int icmpv6_new(struct nf_conn *conntrack,
125 const struct sk_buff *skb,
126 unsigned int dataoff)
127{
128 static u_int8_t valid_new[] = {
129 [ICMPV6_ECHO_REQUEST - 128] = 1,
130 [ICMPV6_NI_QUERY - 128] = 1
131 };
132
133 if (conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128 >= sizeof(valid_new)
134 || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128]) {
135 /* Can't create a new ICMPv6 `conn' with this. */
136 DEBUGP("icmp: can't create new conn with type %u\n",
137 conntrack->tuplehash[0].tuple.dst.u.icmp.type);
138 NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
139 return 0;
140 }
141 atomic_set(&conntrack->proto.icmp.count, 0);
142 return 1;
143}
144
145extern int
146nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, int len);
147extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6;
148static int
149icmpv6_error_message(struct sk_buff *skb,
150 unsigned int icmp6off,
151 enum ip_conntrack_info *ctinfo,
152 unsigned int hooknum)
153{
154 struct nf_conntrack_tuple intuple, origtuple;
155 struct nf_conntrack_tuple_hash *h;
156 struct icmp6hdr _hdr, *hp;
157 unsigned int inip6off;
158 struct nf_conntrack_protocol *inproto;
159 u_int8_t inprotonum;
160 unsigned int inprotoff;
161
162 NF_CT_ASSERT(skb->nfct == NULL);
163
164 hp = skb_header_pointer(skb, icmp6off, sizeof(_hdr), &_hdr);
165 if (hp == NULL) {
166 DEBUGP("icmpv6_error: Can't get ICMPv6 hdr.\n");
167 return -NF_ACCEPT;
168 }
169
170 inip6off = icmp6off + sizeof(_hdr);
171 if (skb_copy_bits(skb, inip6off+offsetof(struct ipv6hdr, nexthdr),
172 &inprotonum, sizeof(inprotonum)) != 0) {
173 DEBUGP("icmpv6_error: Can't get nexthdr in inner IPv6 header.\n");
174 return -NF_ACCEPT;
175 }
176 inprotoff = nf_ct_ipv6_skip_exthdr(skb,
177 inip6off + sizeof(struct ipv6hdr),
178 &inprotonum,
179 skb->len - inip6off
180 - sizeof(struct ipv6hdr));
181
182 if ((inprotoff < 0) || (inprotoff > skb->len) ||
183 (inprotonum == NEXTHDR_FRAGMENT)) {
184 DEBUGP("icmpv6_error: Can't get protocol header in ICMPv6 payload.\n");
185 return -NF_ACCEPT;
186 }
187
188 inproto = nf_ct_find_proto(PF_INET6, inprotonum);
189
190 /* Are they talking about one of our connections? */
191 if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum,
192 &origtuple, &nf_conntrack_l3proto_ipv6, inproto)) {
193 DEBUGP("icmpv6_error: Can't get tuple\n");
194 return -NF_ACCEPT;
195 }
196
197 /* Ordinarily, we'd expect the inverted tupleproto, but it's
198 been preserved inside the ICMP. */
199 if (!nf_ct_invert_tuple(&intuple, &origtuple,
200 &nf_conntrack_l3proto_ipv6, inproto)) {
201 DEBUGP("icmpv6_error: Can't invert tuple\n");
202 return -NF_ACCEPT;
203 }
204
205 *ctinfo = IP_CT_RELATED;
206
207 h = nf_conntrack_find_get(&intuple, NULL);
208 if (!h) {
209 DEBUGP("icmpv6_error: no match\n");
210 return -NF_ACCEPT;
211 } else {
212 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
213 *ctinfo += IP_CT_IS_REPLY;
214 }
215
216 /* Update skb to refer to this connection */
217 skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
218 skb->nfctinfo = *ctinfo;
219 return -NF_ACCEPT;
220}
221
222static int
223icmpv6_error(struct sk_buff *skb, unsigned int dataoff,
224 enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum)
225{
226 struct icmp6hdr _ih, *icmp6h;
227
228 icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
229 if (icmp6h == NULL) {
230 if (LOG_INVALID(IPPROTO_ICMPV6))
231 nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
232 "nf_ct_icmpv6: short packet ");
233 return -NF_ACCEPT;
234 }
235
236 if (hooknum != NF_IP6_PRE_ROUTING)
237 goto skipped;
238
239 /* Ignore it if the checksum's bogus. */
240 if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,
241 skb->len - dataoff, IPPROTO_ICMPV6,
242 skb_checksum(skb, dataoff,
243 skb->len - dataoff, 0))) {
244 nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
245 "nf_ct_icmpv6: ICMPv6 checksum failed\n");
246 return -NF_ACCEPT;
247 }
248
249skipped:
250
251 /* is not error message ? */
252 if (icmp6h->icmp6_type >= 128)
253 return NF_ACCEPT;
254
255 return icmpv6_error_message(skb, dataoff, ctinfo, hooknum);
256}
257
258struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 =
259{
260 .l3proto = PF_INET6,
261 .proto = IPPROTO_ICMPV6,
262 .name = "icmpv6",
263 .pkt_to_tuple = icmpv6_pkt_to_tuple,
264 .invert_tuple = icmpv6_invert_tuple,
265 .print_tuple = icmpv6_print_tuple,
266 .print_conntrack = icmpv6_print_conntrack,
267 .packet = icmpv6_packet,
268 .new = icmpv6_new,
269 .error = icmpv6_error,
270};
271
272EXPORT_SYMBOL(nf_conntrack_protocol_icmpv6);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
new file mode 100644
index 000000000000..c2c52af9e560
--- /dev/null
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -0,0 +1,897 @@
1/*
2 * IPv6 fragment reassembly for connection tracking
3 *
4 * Copyright (C)2004 USAGI/WIDE Project
5 *
6 * Author:
7 * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
8 *
9 * Based on: net/ipv6/reassembly.c
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version
14 * 2 of the License, or (at your option) any later version.
15 */
16
17#include <linux/config.h>
18#include <linux/errno.h>
19#include <linux/types.h>
20#include <linux/string.h>
21#include <linux/socket.h>
22#include <linux/sockios.h>
23#include <linux/jiffies.h>
24#include <linux/net.h>
25#include <linux/list.h>
26#include <linux/netdevice.h>
27#include <linux/in6.h>
28#include <linux/ipv6.h>
29#include <linux/icmpv6.h>
30#include <linux/random.h>
31#include <linux/jhash.h>
32
33#include <net/sock.h>
34#include <net/snmp.h>
35
36#include <net/ipv6.h>
37#include <net/protocol.h>
38#include <net/transp_v6.h>
39#include <net/rawv6.h>
40#include <net/ndisc.h>
41#include <net/addrconf.h>
42#include <linux/sysctl.h>
43#include <linux/netfilter.h>
44#include <linux/netfilter_ipv6.h>
45#include <linux/kernel.h>
46#include <linux/module.h>
47
48#if 0
49#define DEBUGP printk
50#else
51#define DEBUGP(format, args...)
52#endif
53
54#define NF_CT_FRAG6_HIGH_THRESH 262144 /* == 256*1024 */
55#define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */
56#define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT
57
58unsigned int nf_ct_frag6_high_thresh = 256*1024;
59unsigned int nf_ct_frag6_low_thresh = 192*1024;
60unsigned long nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT;
61
62struct nf_ct_frag6_skb_cb
63{
64 struct inet6_skb_parm h;
65 int offset;
66 struct sk_buff *orig;
67};
68
69#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb))
70
71struct nf_ct_frag6_queue
72{
73 struct nf_ct_frag6_queue *next;
74 struct list_head lru_list; /* lru list member */
75
76 __u32 id; /* fragment id */
77 struct in6_addr saddr;
78 struct in6_addr daddr;
79
80 spinlock_t lock;
81 atomic_t refcnt;
82 struct timer_list timer; /* expire timer */
83 struct sk_buff *fragments;
84 int len;
85 int meat;
86 struct timeval stamp;
87 unsigned int csum;
88 __u8 last_in; /* has first/last segment arrived? */
89#define COMPLETE 4
90#define FIRST_IN 2
91#define LAST_IN 1
92 __u16 nhoffset;
93 struct nf_ct_frag6_queue **pprev;
94};
95
96/* Hash table. */
97
98#define FRAG6Q_HASHSZ 64
99
100static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ];
101static rwlock_t nf_ct_frag6_lock = RW_LOCK_UNLOCKED;
102static u32 nf_ct_frag6_hash_rnd;
103static LIST_HEAD(nf_ct_frag6_lru_list);
104int nf_ct_frag6_nqueues = 0;
105
106static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq)
107{
108 if (fq->next)
109 fq->next->pprev = fq->pprev;
110 *fq->pprev = fq->next;
111 list_del(&fq->lru_list);
112 nf_ct_frag6_nqueues--;
113}
114
115static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq)
116{
117 write_lock(&nf_ct_frag6_lock);
118 __fq_unlink(fq);
119 write_unlock(&nf_ct_frag6_lock);
120}
121
122static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr,
123 struct in6_addr *daddr)
124{
125 u32 a, b, c;
126
127 a = saddr->s6_addr32[0];
128 b = saddr->s6_addr32[1];
129 c = saddr->s6_addr32[2];
130
131 a += JHASH_GOLDEN_RATIO;
132 b += JHASH_GOLDEN_RATIO;
133 c += nf_ct_frag6_hash_rnd;
134 __jhash_mix(a, b, c);
135
136 a += saddr->s6_addr32[3];
137 b += daddr->s6_addr32[0];
138 c += daddr->s6_addr32[1];
139 __jhash_mix(a, b, c);
140
141 a += daddr->s6_addr32[2];
142 b += daddr->s6_addr32[3];
143 c += id;
144 __jhash_mix(a, b, c);
145
146 return c & (FRAG6Q_HASHSZ - 1);
147}
148
149static struct timer_list nf_ct_frag6_secret_timer;
150int nf_ct_frag6_secret_interval = 10 * 60 * HZ;
151
152static void nf_ct_frag6_secret_rebuild(unsigned long dummy)
153{
154 unsigned long now = jiffies;
155 int i;
156
157 write_lock(&nf_ct_frag6_lock);
158 get_random_bytes(&nf_ct_frag6_hash_rnd, sizeof(u32));
159 for (i = 0; i < FRAG6Q_HASHSZ; i++) {
160 struct nf_ct_frag6_queue *q;
161
162 q = nf_ct_frag6_hash[i];
163 while (q) {
164 struct nf_ct_frag6_queue *next = q->next;
165 unsigned int hval = ip6qhashfn(q->id,
166 &q->saddr,
167 &q->daddr);
168
169 if (hval != i) {
170 /* Unlink. */
171 if (q->next)
172 q->next->pprev = q->pprev;
173 *q->pprev = q->next;
174
175 /* Relink to new hash chain. */
176 if ((q->next = nf_ct_frag6_hash[hval]) != NULL)
177 q->next->pprev = &q->next;
178 nf_ct_frag6_hash[hval] = q;
179 q->pprev = &nf_ct_frag6_hash[hval];
180 }
181
182 q = next;
183 }
184 }
185 write_unlock(&nf_ct_frag6_lock);
186
187 mod_timer(&nf_ct_frag6_secret_timer, now + nf_ct_frag6_secret_interval);
188}
189
190atomic_t nf_ct_frag6_mem = ATOMIC_INIT(0);
191
192/* Memory Tracking Functions. */
193static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work)
194{
195 if (work)
196 *work -= skb->truesize;
197 atomic_sub(skb->truesize, &nf_ct_frag6_mem);
198 if (NFCT_FRAG6_CB(skb)->orig)
199 kfree_skb(NFCT_FRAG6_CB(skb)->orig);
200
201 kfree_skb(skb);
202}
203
204static inline void frag_free_queue(struct nf_ct_frag6_queue *fq,
205 unsigned int *work)
206{
207 if (work)
208 *work -= sizeof(struct nf_ct_frag6_queue);
209 atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
210 kfree(fq);
211}
212
213static inline struct nf_ct_frag6_queue *frag_alloc_queue(void)
214{
215 struct nf_ct_frag6_queue *fq = kmalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC);
216
217 if (!fq)
218 return NULL;
219 atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
220 return fq;
221}
222
223/* Destruction primitives. */
224
225/* Complete destruction of fq. */
226static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq,
227 unsigned int *work)
228{
229 struct sk_buff *fp;
230
231 BUG_TRAP(fq->last_in&COMPLETE);
232 BUG_TRAP(del_timer(&fq->timer) == 0);
233
234 /* Release all fragment data. */
235 fp = fq->fragments;
236 while (fp) {
237 struct sk_buff *xp = fp->next;
238
239 frag_kfree_skb(fp, work);
240 fp = xp;
241 }
242
243 frag_free_queue(fq, work);
244}
245
246static __inline__ void fq_put(struct nf_ct_frag6_queue *fq, unsigned int *work)
247{
248 if (atomic_dec_and_test(&fq->refcnt))
249 nf_ct_frag6_destroy(fq, work);
250}
251
252/* Kill fq entry. It is not destroyed immediately,
253 * because caller (and someone more) holds reference count.
254 */
255static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
256{
257 if (del_timer(&fq->timer))
258 atomic_dec(&fq->refcnt);
259
260 if (!(fq->last_in & COMPLETE)) {
261 fq_unlink(fq);
262 atomic_dec(&fq->refcnt);
263 fq->last_in |= COMPLETE;
264 }
265}
266
267static void nf_ct_frag6_evictor(void)
268{
269 struct nf_ct_frag6_queue *fq;
270 struct list_head *tmp;
271 unsigned int work;
272
273 work = atomic_read(&nf_ct_frag6_mem);
274 if (work <= nf_ct_frag6_low_thresh)
275 return;
276
277 work -= nf_ct_frag6_low_thresh;
278 while (work > 0) {
279 read_lock(&nf_ct_frag6_lock);
280 if (list_empty(&nf_ct_frag6_lru_list)) {
281 read_unlock(&nf_ct_frag6_lock);
282 return;
283 }
284 tmp = nf_ct_frag6_lru_list.next;
285 BUG_ON(tmp == NULL);
286 fq = list_entry(tmp, struct nf_ct_frag6_queue, lru_list);
287 atomic_inc(&fq->refcnt);
288 read_unlock(&nf_ct_frag6_lock);
289
290 spin_lock(&fq->lock);
291 if (!(fq->last_in&COMPLETE))
292 fq_kill(fq);
293 spin_unlock(&fq->lock);
294
295 fq_put(fq, &work);
296 }
297}
298
299static void nf_ct_frag6_expire(unsigned long data)
300{
301 struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data;
302
303 spin_lock(&fq->lock);
304
305 if (fq->last_in & COMPLETE)
306 goto out;
307
308 fq_kill(fq);
309
310out:
311 spin_unlock(&fq->lock);
312 fq_put(fq, NULL);
313}
314
315/* Creation primitives. */
316
317
318static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash,
319 struct nf_ct_frag6_queue *fq_in)
320{
321 struct nf_ct_frag6_queue *fq;
322
323 write_lock(&nf_ct_frag6_lock);
324#ifdef CONFIG_SMP
325 for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) {
326 if (fq->id == fq_in->id &&
327 !ipv6_addr_cmp(&fq_in->saddr, &fq->saddr) &&
328 !ipv6_addr_cmp(&fq_in->daddr, &fq->daddr)) {
329 atomic_inc(&fq->refcnt);
330 write_unlock(&nf_ct_frag6_lock);
331 fq_in->last_in |= COMPLETE;
332 fq_put(fq_in, NULL);
333 return fq;
334 }
335 }
336#endif
337 fq = fq_in;
338
339 if (!mod_timer(&fq->timer, jiffies + nf_ct_frag6_timeout))
340 atomic_inc(&fq->refcnt);
341
342 atomic_inc(&fq->refcnt);
343 if ((fq->next = nf_ct_frag6_hash[hash]) != NULL)
344 fq->next->pprev = &fq->next;
345 nf_ct_frag6_hash[hash] = fq;
346 fq->pprev = &nf_ct_frag6_hash[hash];
347 INIT_LIST_HEAD(&fq->lru_list);
348 list_add_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
349 nf_ct_frag6_nqueues++;
350 write_unlock(&nf_ct_frag6_lock);
351 return fq;
352}
353
354
355static struct nf_ct_frag6_queue *
356nf_ct_frag6_create(unsigned int hash, u32 id, struct in6_addr *src, struct in6_addr *dst)
357{
358 struct nf_ct_frag6_queue *fq;
359
360 if ((fq = frag_alloc_queue()) == NULL) {
361 DEBUGP("Can't alloc new queue\n");
362 goto oom;
363 }
364
365 memset(fq, 0, sizeof(struct nf_ct_frag6_queue));
366
367 fq->id = id;
368 ipv6_addr_copy(&fq->saddr, src);
369 ipv6_addr_copy(&fq->daddr, dst);
370
371 init_timer(&fq->timer);
372 fq->timer.function = nf_ct_frag6_expire;
373 fq->timer.data = (long) fq;
374 fq->lock = SPIN_LOCK_UNLOCKED;
375 atomic_set(&fq->refcnt, 1);
376
377 return nf_ct_frag6_intern(hash, fq);
378
379oom:
380 return NULL;
381}
382
383static __inline__ struct nf_ct_frag6_queue *
384fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst)
385{
386 struct nf_ct_frag6_queue *fq;
387 unsigned int hash = ip6qhashfn(id, src, dst);
388
389 read_lock(&nf_ct_frag6_lock);
390 for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) {
391 if (fq->id == id &&
392 !ipv6_addr_cmp(src, &fq->saddr) &&
393 !ipv6_addr_cmp(dst, &fq->daddr)) {
394 atomic_inc(&fq->refcnt);
395 read_unlock(&nf_ct_frag6_lock);
396 return fq;
397 }
398 }
399 read_unlock(&nf_ct_frag6_lock);
400
401 return nf_ct_frag6_create(hash, id, src, dst);
402}
403
404
405static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
406 struct frag_hdr *fhdr, int nhoff)
407{
408 struct sk_buff *prev, *next;
409 int offset, end;
410
411 if (fq->last_in & COMPLETE) {
412 DEBUGP("Allready completed\n");
413 goto err;
414 }
415
416 offset = ntohs(fhdr->frag_off) & ~0x7;
417 end = offset + (ntohs(skb->nh.ipv6h->payload_len) -
418 ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
419
420 if ((unsigned int)end > IPV6_MAXPLEN) {
421 DEBUGP("offset is too large.\n");
422 return -1;
423 }
424
425 if (skb->ip_summed == CHECKSUM_HW)
426 skb->csum = csum_sub(skb->csum,
427 csum_partial(skb->nh.raw,
428 (u8*)(fhdr + 1) - skb->nh.raw,
429 0));
430
431 /* Is this the final fragment? */
432 if (!(fhdr->frag_off & htons(IP6_MF))) {
433 /* If we already have some bits beyond end
434 * or have different end, the segment is corrupted.
435 */
436 if (end < fq->len ||
437 ((fq->last_in & LAST_IN) && end != fq->len)) {
438 DEBUGP("already received last fragment\n");
439 goto err;
440 }
441 fq->last_in |= LAST_IN;
442 fq->len = end;
443 } else {
444 /* Check if the fragment is rounded to 8 bytes.
445 * Required by the RFC.
446 */
447 if (end & 0x7) {
448 /* RFC2460 says always send parameter problem in
449 * this case. -DaveM
450 */
451 DEBUGP("the end of this fragment is not rounded to 8 bytes.\n");
452 return -1;
453 }
454 if (end > fq->len) {
455 /* Some bits beyond end -> corruption. */
456 if (fq->last_in & LAST_IN) {
457 DEBUGP("last packet already reached.\n");
458 goto err;
459 }
460 fq->len = end;
461 }
462 }
463
464 if (end == offset)
465 goto err;
466
467 /* Point into the IP datagram 'data' part. */
468 if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) {
469 DEBUGP("queue: message is too short.\n");
470 goto err;
471 }
472 if (end-offset < skb->len) {
473 if (pskb_trim(skb, end - offset)) {
474 DEBUGP("Can't trim\n");
475 goto err;
476 }
477 if (skb->ip_summed != CHECKSUM_UNNECESSARY)
478 skb->ip_summed = CHECKSUM_NONE;
479 }
480
481 /* Find out which fragments are in front and at the back of us
482 * in the chain of fragments so far. We must know where to put
483 * this fragment, right?
484 */
485 prev = NULL;
486 for (next = fq->fragments; next != NULL; next = next->next) {
487 if (NFCT_FRAG6_CB(next)->offset >= offset)
488 break; /* bingo! */
489 prev = next;
490 }
491
492 /* We found where to put this one. Check for overlap with
493 * preceding fragment, and, if needed, align things so that
494 * any overlaps are eliminated.
495 */
496 if (prev) {
497 int i = (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset;
498
499 if (i > 0) {
500 offset += i;
501 if (end <= offset) {
502 DEBUGP("overlap\n");
503 goto err;
504 }
505 if (!pskb_pull(skb, i)) {
506 DEBUGP("Can't pull\n");
507 goto err;
508 }
509 if (skb->ip_summed != CHECKSUM_UNNECESSARY)
510 skb->ip_summed = CHECKSUM_NONE;
511 }
512 }
513
514 /* Look for overlap with succeeding segments.
515 * If we can merge fragments, do it.
516 */
517 while (next && NFCT_FRAG6_CB(next)->offset < end) {
518 /* overlap is 'i' bytes */
519 int i = end - NFCT_FRAG6_CB(next)->offset;
520
521 if (i < next->len) {
522 /* Eat head of the next overlapped fragment
523 * and leave the loop. The next ones cannot overlap.
524 */
525 DEBUGP("Eat head of the overlapped parts.: %d", i);
526 if (!pskb_pull(next, i))
527 goto err;
528
529 /* next fragment */
530 NFCT_FRAG6_CB(next)->offset += i;
531 fq->meat -= i;
532 if (next->ip_summed != CHECKSUM_UNNECESSARY)
533 next->ip_summed = CHECKSUM_NONE;
534 break;
535 } else {
536 struct sk_buff *free_it = next;
537
538 /* Old fragmnet is completely overridden with
539 * new one drop it.
540 */
541 next = next->next;
542
543 if (prev)
544 prev->next = next;
545 else
546 fq->fragments = next;
547
548 fq->meat -= free_it->len;
549 frag_kfree_skb(free_it, NULL);
550 }
551 }
552
553 NFCT_FRAG6_CB(skb)->offset = offset;
554
555 /* Insert this fragment in the chain of fragments. */
556 skb->next = next;
557 if (prev)
558 prev->next = skb;
559 else
560 fq->fragments = skb;
561
562 skb->dev = NULL;
563 skb_get_timestamp(skb, &fq->stamp);
564 fq->meat += skb->len;
565 atomic_add(skb->truesize, &nf_ct_frag6_mem);
566
567 /* The first fragment.
568 * nhoffset is obtained from the first fragment, of course.
569 */
570 if (offset == 0) {
571 fq->nhoffset = nhoff;
572 fq->last_in |= FIRST_IN;
573 }
574 write_lock(&nf_ct_frag6_lock);
575 list_move_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
576 write_unlock(&nf_ct_frag6_lock);
577 return 0;
578
579err:
580 return -1;
581}
582
583/*
584 * Check if this packet is complete.
585 * Returns NULL on failure by any reason, and pointer
586 * to current nexthdr field in reassembled frame.
587 *
588 * It is called with locked fq, and caller must check that
589 * queue is eligible for reassembly i.e. it is not COMPLETE,
590 * the last and the first frames arrived and all the bits are here.
591 */
592static struct sk_buff *
593nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
594{
595 struct sk_buff *fp, *op, *head = fq->fragments;
596 int payload_len;
597
598 fq_kill(fq);
599
600 BUG_TRAP(head != NULL);
601 BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0);
602
603 /* Unfragmented part is taken from the first segment. */
604 payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
605 if (payload_len > IPV6_MAXPLEN) {
606 DEBUGP("payload len is too large.\n");
607 goto out_oversize;
608 }
609
610 /* Head of list must not be cloned. */
611 if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) {
612 DEBUGP("skb is cloned but can't expand head");
613 goto out_oom;
614 }
615
616 /* If the first fragment is fragmented itself, we split
617 * it to two chunks: the first with data and paged part
618 * and the second, holding only fragments. */
619 if (skb_shinfo(head)->frag_list) {
620 struct sk_buff *clone;
621 int i, plen = 0;
622
623 if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) {
624 DEBUGP("Can't alloc skb\n");
625 goto out_oom;
626 }
627 clone->next = head->next;
628 head->next = clone;
629 skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
630 skb_shinfo(head)->frag_list = NULL;
631 for (i=0; i<skb_shinfo(head)->nr_frags; i++)
632 plen += skb_shinfo(head)->frags[i].size;
633 clone->len = clone->data_len = head->data_len - plen;
634 head->data_len -= clone->len;
635 head->len -= clone->len;
636 clone->csum = 0;
637 clone->ip_summed = head->ip_summed;
638
639 NFCT_FRAG6_CB(clone)->orig = NULL;
640 atomic_add(clone->truesize, &nf_ct_frag6_mem);
641 }
642
643 /* We have to remove fragment header from datagram and to relocate
644 * header in order to calculate ICV correctly. */
645 head->nh.raw[fq->nhoffset] = head->h.raw[0];
646 memmove(head->head + sizeof(struct frag_hdr), head->head,
647 (head->data - head->head) - sizeof(struct frag_hdr));
648 head->mac.raw += sizeof(struct frag_hdr);
649 head->nh.raw += sizeof(struct frag_hdr);
650
651 skb_shinfo(head)->frag_list = head->next;
652 head->h.raw = head->data;
653 skb_push(head, head->data - head->nh.raw);
654 atomic_sub(head->truesize, &nf_ct_frag6_mem);
655
656 for (fp=head->next; fp; fp = fp->next) {
657 head->data_len += fp->len;
658 head->len += fp->len;
659 if (head->ip_summed != fp->ip_summed)
660 head->ip_summed = CHECKSUM_NONE;
661 else if (head->ip_summed == CHECKSUM_HW)
662 head->csum = csum_add(head->csum, fp->csum);
663 head->truesize += fp->truesize;
664 atomic_sub(fp->truesize, &nf_ct_frag6_mem);
665 }
666
667 head->next = NULL;
668 head->dev = dev;
669 skb_set_timestamp(head, &fq->stamp);
670 head->nh.ipv6h->payload_len = htons(payload_len);
671
672 /* Yes, and fold redundant checksum back. 8) */
673 if (head->ip_summed == CHECKSUM_HW)
674 head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
675
676 fq->fragments = NULL;
677
678 /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
679 fp = skb_shinfo(head)->frag_list;
680 if (NFCT_FRAG6_CB(fp)->orig == NULL)
681 /* at above code, head skb is divided into two skbs. */
682 fp = fp->next;
683
684 op = NFCT_FRAG6_CB(head)->orig;
685 for (; fp; fp = fp->next) {
686 struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig;
687
688 op->next = orig;
689 op = orig;
690 NFCT_FRAG6_CB(fp)->orig = NULL;
691 }
692
693 return head;
694
695out_oversize:
696 if (net_ratelimit())
697 printk(KERN_DEBUG "nf_ct_frag6_reasm: payload len = %d\n", payload_len);
698 goto out_fail;
699out_oom:
700 if (net_ratelimit())
701 printk(KERN_DEBUG "nf_ct_frag6_reasm: no memory for reassembly\n");
702out_fail:
703 return NULL;
704}
705
706/*
707 * find the header just before Fragment Header.
708 *
709 * if success return 0 and set ...
710 * (*prevhdrp): the value of "Next Header Field" in the header
711 * just before Fragment Header.
712 * (*prevhoff): the offset of "Next Header Field" in the header
713 * just before Fragment Header.
714 * (*fhoff) : the offset of Fragment Header.
715 *
716 * Based on ipv6_skip_hdr() in net/ipv6/exthdr.c
717 *
718 */
719static int
720find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
721{
722 u8 nexthdr = skb->nh.ipv6h->nexthdr;
723 u8 prev_nhoff = (u8 *)&skb->nh.ipv6h->nexthdr - skb->data;
724 int start = (u8 *)(skb->nh.ipv6h+1) - skb->data;
725 int len = skb->len - start;
726 u8 prevhdr = NEXTHDR_IPV6;
727
728 while (nexthdr != NEXTHDR_FRAGMENT) {
729 struct ipv6_opt_hdr hdr;
730 int hdrlen;
731
732 if (!ipv6_ext_hdr(nexthdr)) {
733 return -1;
734 }
735 if (len < (int)sizeof(struct ipv6_opt_hdr)) {
736 DEBUGP("too short\n");
737 return -1;
738 }
739 if (nexthdr == NEXTHDR_NONE) {
740 DEBUGP("next header is none\n");
741 return -1;
742 }
743 if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
744 BUG();
745 if (nexthdr == NEXTHDR_AUTH)
746 hdrlen = (hdr.hdrlen+2)<<2;
747 else
748 hdrlen = ipv6_optlen(&hdr);
749
750 prevhdr = nexthdr;
751 prev_nhoff = start;
752
753 nexthdr = hdr.nexthdr;
754 len -= hdrlen;
755 start += hdrlen;
756 }
757
758 if (len < 0)
759 return -1;
760
761 *prevhdrp = prevhdr;
762 *prevhoff = prev_nhoff;
763 *fhoff = start;
764
765 return 0;
766}
767
768struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
769{
770 struct sk_buff *clone;
771 struct net_device *dev = skb->dev;
772 struct frag_hdr *fhdr;
773 struct nf_ct_frag6_queue *fq;
774 struct ipv6hdr *hdr;
775 int fhoff, nhoff;
776 u8 prevhdr;
777 struct sk_buff *ret_skb = NULL;
778
779 /* Jumbo payload inhibits frag. header */
780 if (skb->nh.ipv6h->payload_len == 0) {
781 DEBUGP("payload len = 0\n");
782 return skb;
783 }
784
785 if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0)
786 return skb;
787
788 clone = skb_clone(skb, GFP_ATOMIC);
789 if (clone == NULL) {
790 DEBUGP("Can't clone skb\n");
791 return skb;
792 }
793
794 NFCT_FRAG6_CB(clone)->orig = skb;
795
796 if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) {
797 DEBUGP("message is too short.\n");
798 goto ret_orig;
799 }
800
801 clone->h.raw = clone->data + fhoff;
802 hdr = clone->nh.ipv6h;
803 fhdr = (struct frag_hdr *)clone->h.raw;
804
805 if (!(fhdr->frag_off & htons(0xFFF9))) {
806 DEBUGP("Invalid fragment offset\n");
807 /* It is not a fragmented frame */
808 goto ret_orig;
809 }
810
811 if (atomic_read(&nf_ct_frag6_mem) > nf_ct_frag6_high_thresh)
812 nf_ct_frag6_evictor();
813
814 fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr);
815 if (fq == NULL) {
816 DEBUGP("Can't find and can't create new queue\n");
817 goto ret_orig;
818 }
819
820 spin_lock(&fq->lock);
821
822 if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
823 spin_unlock(&fq->lock);
824 DEBUGP("Can't insert skb to queue\n");
825 fq_put(fq, NULL);
826 goto ret_orig;
827 }
828
829 if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) {
830 ret_skb = nf_ct_frag6_reasm(fq, dev);
831 if (ret_skb == NULL)
832 DEBUGP("Can't reassemble fragmented packets\n");
833 }
834 spin_unlock(&fq->lock);
835
836 fq_put(fq, NULL);
837 return ret_skb;
838
839ret_orig:
840 kfree_skb(clone);
841 return skb;
842}
843
844void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
845 struct net_device *in, struct net_device *out,
846 int (*okfn)(struct sk_buff *))
847{
848 struct sk_buff *s, *s2;
849
850 for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
851 nf_conntrack_put_reasm(s->nfct_reasm);
852 nf_conntrack_get_reasm(skb);
853 s->nfct_reasm = skb;
854
855 s2 = s->next;
856 NF_HOOK_THRESH(PF_INET6, hooknum, s, in, out, okfn,
857 NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
858 s = s2;
859 }
860 nf_conntrack_put_reasm(skb);
861}
862
863int nf_ct_frag6_kfree_frags(struct sk_buff *skb)
864{
865 struct sk_buff *s, *s2;
866
867 for (s = NFCT_FRAG6_CB(skb)->orig; s; s = s2) {
868
869 s2 = s->next;
870 kfree_skb(s);
871 }
872
873 kfree_skb(skb);
874
875 return 0;
876}
877
878int nf_ct_frag6_init(void)
879{
880 nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
881 (jiffies ^ (jiffies >> 6)));
882
883 init_timer(&nf_ct_frag6_secret_timer);
884 nf_ct_frag6_secret_timer.function = nf_ct_frag6_secret_rebuild;
885 nf_ct_frag6_secret_timer.expires = jiffies
886 + nf_ct_frag6_secret_interval;
887 add_timer(&nf_ct_frag6_secret_timer);
888
889 return 0;
890}
891
892void nf_ct_frag6_cleanup(void)
893{
894 del_timer(&nf_ct_frag6_secret_timer);
895 nf_ct_frag6_low_thresh = 0;
896 nf_ct_frag6_evictor();
897}
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 334a5967831e..50a13e75d70e 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -140,9 +140,7 @@ fold_field(void *mib[], int offt)
140 unsigned long res = 0; 140 unsigned long res = 0;
141 int i; 141 int i;
142 142
143 for (i = 0; i < NR_CPUS; i++) { 143 for_each_cpu(i) {
144 if (!cpu_possible(i))
145 continue;
146 res += *(((unsigned long *)per_cpu_ptr(mib[0], i)) + offt); 144 res += *(((unsigned long *)per_cpu_ptr(mib[0], i)) + offt);
147 res += *(((unsigned long *)per_cpu_ptr(mib[1], i)) + offt); 145 res += *(((unsigned long *)per_cpu_ptr(mib[1], i)) + offt);
148 } 146 }
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index d77d3352c967..a66900cda2af 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -174,8 +174,10 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
174 struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); 174 struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
175 175
176 /* Not releasing hash table! */ 176 /* Not releasing hash table! */
177 if (clone) 177 if (clone) {
178 nf_reset(clone);
178 rawv6_rcv(sk, clone); 179 rawv6_rcv(sk, clone);
180 }
179 } 181 }
180 sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr, 182 sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr,
181 IP6CB(skb)->iif); 183 IP6CB(skb)->iif);
@@ -296,13 +298,10 @@ void rawv6_err(struct sock *sk, struct sk_buff *skb,
296static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) 298static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
297{ 299{
298 if ((raw6_sk(sk)->checksum || sk->sk_filter) && 300 if ((raw6_sk(sk)->checksum || sk->sk_filter) &&
299 skb->ip_summed != CHECKSUM_UNNECESSARY) { 301 skb_checksum_complete(skb)) {
300 if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) { 302 /* FIXME: increment a raw6 drops counter here */
301 /* FIXME: increment a raw6 drops counter here */ 303 kfree_skb(skb);
302 kfree_skb(skb); 304 return 0;
303 return 0;
304 }
305 skb->ip_summed = CHECKSUM_UNNECESSARY;
306 } 305 }
307 306
308 /* Charge it to the socket. */ 307 /* Charge it to the socket. */
@@ -335,32 +334,25 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
335 if (!rp->checksum) 334 if (!rp->checksum)
336 skb->ip_summed = CHECKSUM_UNNECESSARY; 335 skb->ip_summed = CHECKSUM_UNNECESSARY;
337 336
338 if (skb->ip_summed != CHECKSUM_UNNECESSARY) { 337 if (skb->ip_summed == CHECKSUM_HW) {
339 if (skb->ip_summed == CHECKSUM_HW) { 338 skb_postpull_rcsum(skb, skb->nh.raw,
340 skb_postpull_rcsum(skb, skb->nh.raw, 339 skb->h.raw - skb->nh.raw);
341 skb->h.raw - skb->nh.raw); 340 if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr,
341 &skb->nh.ipv6h->daddr,
342 skb->len, inet->num, skb->csum))
342 skb->ip_summed = CHECKSUM_UNNECESSARY; 343 skb->ip_summed = CHECKSUM_UNNECESSARY;
343 if (csum_ipv6_magic(&skb->nh.ipv6h->saddr,
344 &skb->nh.ipv6h->daddr,
345 skb->len, inet->num, skb->csum)) {
346 LIMIT_NETDEBUG(KERN_DEBUG "raw v6 hw csum failure.\n");
347 skb->ip_summed = CHECKSUM_NONE;
348 }
349 }
350 if (skb->ip_summed == CHECKSUM_NONE)
351 skb->csum = ~csum_ipv6_magic(&skb->nh.ipv6h->saddr,
352 &skb->nh.ipv6h->daddr,
353 skb->len, inet->num, 0);
354 } 344 }
345 if (skb->ip_summed != CHECKSUM_UNNECESSARY)
346 skb->csum = ~csum_ipv6_magic(&skb->nh.ipv6h->saddr,
347 &skb->nh.ipv6h->daddr,
348 skb->len, inet->num, 0);
355 349
356 if (inet->hdrincl) { 350 if (inet->hdrincl) {
357 if (skb->ip_summed != CHECKSUM_UNNECESSARY && 351 if (skb_checksum_complete(skb)) {
358 (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
359 /* FIXME: increment a raw6 drops counter here */ 352 /* FIXME: increment a raw6 drops counter here */
360 kfree_skb(skb); 353 kfree_skb(skb);
361 return 0; 354 return 0;
362 } 355 }
363 skb->ip_summed = CHECKSUM_UNNECESSARY;
364 } 356 }
365 357
366 rawv6_rcv_skb(sk, skb); 358 rawv6_rcv_skb(sk, skb);
@@ -405,7 +397,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
405 if (skb->ip_summed==CHECKSUM_UNNECESSARY) { 397 if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
406 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 398 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
407 } else if (msg->msg_flags&MSG_TRUNC) { 399 } else if (msg->msg_flags&MSG_TRUNC) {
408 if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) 400 if (__skb_checksum_complete(skb))
409 goto csum_copy_err; 401 goto csum_copy_err;
410 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 402 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
411 } else { 403 } else {
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index e4fe9ee484dd..5d316cb72ec9 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -74,7 +74,7 @@ struct ip6frag_skb_cb
74 74
75struct frag_queue 75struct frag_queue
76{ 76{
77 struct frag_queue *next; 77 struct hlist_node list;
78 struct list_head lru_list; /* lru list member */ 78 struct list_head lru_list; /* lru list member */
79 79
80 __u32 id; /* fragment id */ 80 __u32 id; /* fragment id */
@@ -95,14 +95,13 @@ struct frag_queue
95#define FIRST_IN 2 95#define FIRST_IN 2
96#define LAST_IN 1 96#define LAST_IN 1
97 __u16 nhoffset; 97 __u16 nhoffset;
98 struct frag_queue **pprev;
99}; 98};
100 99
101/* Hash table. */ 100/* Hash table. */
102 101
103#define IP6Q_HASHSZ 64 102#define IP6Q_HASHSZ 64
104 103
105static struct frag_queue *ip6_frag_hash[IP6Q_HASHSZ]; 104static struct hlist_head ip6_frag_hash[IP6Q_HASHSZ];
106static DEFINE_RWLOCK(ip6_frag_lock); 105static DEFINE_RWLOCK(ip6_frag_lock);
107static u32 ip6_frag_hash_rnd; 106static u32 ip6_frag_hash_rnd;
108static LIST_HEAD(ip6_frag_lru_list); 107static LIST_HEAD(ip6_frag_lru_list);
@@ -110,9 +109,7 @@ int ip6_frag_nqueues = 0;
110 109
111static __inline__ void __fq_unlink(struct frag_queue *fq) 110static __inline__ void __fq_unlink(struct frag_queue *fq)
112{ 111{
113 if(fq->next) 112 hlist_del(&fq->list);
114 fq->next->pprev = fq->pprev;
115 *fq->pprev = fq->next;
116 list_del(&fq->lru_list); 113 list_del(&fq->lru_list);
117 ip6_frag_nqueues--; 114 ip6_frag_nqueues--;
118} 115}
@@ -163,28 +160,21 @@ static void ip6_frag_secret_rebuild(unsigned long dummy)
163 get_random_bytes(&ip6_frag_hash_rnd, sizeof(u32)); 160 get_random_bytes(&ip6_frag_hash_rnd, sizeof(u32));
164 for (i = 0; i < IP6Q_HASHSZ; i++) { 161 for (i = 0; i < IP6Q_HASHSZ; i++) {
165 struct frag_queue *q; 162 struct frag_queue *q;
163 struct hlist_node *p, *n;
166 164
167 q = ip6_frag_hash[i]; 165 hlist_for_each_entry_safe(q, p, n, &ip6_frag_hash[i], list) {
168 while (q) {
169 struct frag_queue *next = q->next;
170 unsigned int hval = ip6qhashfn(q->id, 166 unsigned int hval = ip6qhashfn(q->id,
171 &q->saddr, 167 &q->saddr,
172 &q->daddr); 168 &q->daddr);
173 169
174 if (hval != i) { 170 if (hval != i) {
175 /* Unlink. */ 171 hlist_del(&q->list);
176 if (q->next)
177 q->next->pprev = q->pprev;
178 *q->pprev = q->next;
179 172
180 /* Relink to new hash chain. */ 173 /* Relink to new hash chain. */
181 if ((q->next = ip6_frag_hash[hval]) != NULL) 174 hlist_add_head(&q->list,
182 q->next->pprev = &q->next; 175 &ip6_frag_hash[hval]);
183 ip6_frag_hash[hval] = q;
184 q->pprev = &ip6_frag_hash[hval];
185 }
186 176
187 q = next; 177 }
188 } 178 }
189 } 179 }
190 write_unlock(&ip6_frag_lock); 180 write_unlock(&ip6_frag_lock);
@@ -337,10 +327,13 @@ static struct frag_queue *ip6_frag_intern(unsigned int hash,
337 struct frag_queue *fq_in) 327 struct frag_queue *fq_in)
338{ 328{
339 struct frag_queue *fq; 329 struct frag_queue *fq;
330#ifdef CONFIG_SMP
331 struct hlist_node *n;
332#endif
340 333
341 write_lock(&ip6_frag_lock); 334 write_lock(&ip6_frag_lock);
342#ifdef CONFIG_SMP 335#ifdef CONFIG_SMP
343 for (fq = ip6_frag_hash[hash]; fq; fq = fq->next) { 336 hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) {
344 if (fq->id == fq_in->id && 337 if (fq->id == fq_in->id &&
345 ipv6_addr_equal(&fq_in->saddr, &fq->saddr) && 338 ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
346 ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) { 339 ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
@@ -358,10 +351,7 @@ static struct frag_queue *ip6_frag_intern(unsigned int hash,
358 atomic_inc(&fq->refcnt); 351 atomic_inc(&fq->refcnt);
359 352
360 atomic_inc(&fq->refcnt); 353 atomic_inc(&fq->refcnt);
361 if((fq->next = ip6_frag_hash[hash]) != NULL) 354 hlist_add_head(&fq->list, &ip6_frag_hash[hash]);
362 fq->next->pprev = &fq->next;
363 ip6_frag_hash[hash] = fq;
364 fq->pprev = &ip6_frag_hash[hash];
365 INIT_LIST_HEAD(&fq->lru_list); 355 INIT_LIST_HEAD(&fq->lru_list);
366 list_add_tail(&fq->lru_list, &ip6_frag_lru_list); 356 list_add_tail(&fq->lru_list, &ip6_frag_lru_list);
367 ip6_frag_nqueues++; 357 ip6_frag_nqueues++;
@@ -401,10 +391,11 @@ static __inline__ struct frag_queue *
401fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst) 391fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst)
402{ 392{
403 struct frag_queue *fq; 393 struct frag_queue *fq;
394 struct hlist_node *n;
404 unsigned int hash = ip6qhashfn(id, src, dst); 395 unsigned int hash = ip6qhashfn(id, src, dst);
405 396
406 read_lock(&ip6_frag_lock); 397 read_lock(&ip6_frag_lock);
407 for(fq = ip6_frag_hash[hash]; fq; fq = fq->next) { 398 hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) {
408 if (fq->id == id && 399 if (fq->id == id &&
409 ipv6_addr_equal(src, &fq->saddr) && 400 ipv6_addr_equal(src, &fq->saddr) &&
410 ipv6_addr_equal(dst, &fq->daddr)) { 401 ipv6_addr_equal(dst, &fq->daddr)) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 5d5bbb49ec78..a7a537b50595 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -483,7 +483,7 @@ restart:
483 goto out; 483 goto out;
484 } 484 }
485 485
486 rt = rt6_device_match(rt, skb->dev->ifindex, 0); 486 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
487 BACKTRACK(); 487 BACKTRACK();
488 488
489 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) { 489 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
@@ -1701,16 +1701,14 @@ static void fib6_dump_end(struct netlink_callback *cb)
1701 fib6_walker_unlink(w); 1701 fib6_walker_unlink(w);
1702 kfree(w); 1702 kfree(w);
1703 } 1703 }
1704 if (cb->args[1]) { 1704 cb->done = (void*)cb->args[1];
1705 cb->done = (void*)cb->args[1]; 1705 cb->args[1] = 0;
1706 cb->args[1] = 0;
1707 }
1708} 1706}
1709 1707
1710static int fib6_dump_done(struct netlink_callback *cb) 1708static int fib6_dump_done(struct netlink_callback *cb)
1711{ 1709{
1712 fib6_dump_end(cb); 1710 fib6_dump_end(cb);
1713 return cb->done(cb); 1711 return cb->done ? cb->done(cb) : 0;
1714} 1712}
1715 1713
1716int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 1714int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d693cb988b78..62c0e5bd931c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -114,16 +114,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
114 int low = sysctl_local_port_range[0]; 114 int low = sysctl_local_port_range[0];
115 int high = sysctl_local_port_range[1]; 115 int high = sysctl_local_port_range[1];
116 int remaining = (high - low) + 1; 116 int remaining = (high - low) + 1;
117 int rover; 117 int rover = net_random() % (high - low) + low;
118 118
119 spin_lock(&tcp_hashinfo.portalloc_lock); 119 do {
120 if (tcp_hashinfo.port_rover < low)
121 rover = low;
122 else
123 rover = tcp_hashinfo.port_rover;
124 do { rover++;
125 if (rover > high)
126 rover = low;
127 head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)]; 120 head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
128 spin_lock(&head->lock); 121 spin_lock(&head->lock);
129 inet_bind_bucket_for_each(tb, node, &head->chain) 122 inet_bind_bucket_for_each(tb, node, &head->chain)
@@ -132,9 +125,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
132 break; 125 break;
133 next: 126 next:
134 spin_unlock(&head->lock); 127 spin_unlock(&head->lock);
128 if (++rover > high)
129 rover = low;
135 } while (--remaining > 0); 130 } while (--remaining > 0);
136 tcp_hashinfo.port_rover = rover;
137 spin_unlock(&tcp_hashinfo.portalloc_lock);
138 131
139 /* Exhausted local port range during search? It is not 132 /* Exhausted local port range during search? It is not
140 * possible for us to be holding one of the bind hash 133 * possible for us to be holding one of the bind hash
@@ -1408,20 +1401,18 @@ out:
1408static int tcp_v6_checksum_init(struct sk_buff *skb) 1401static int tcp_v6_checksum_init(struct sk_buff *skb)
1409{ 1402{
1410 if (skb->ip_summed == CHECKSUM_HW) { 1403 if (skb->ip_summed == CHECKSUM_HW) {
1411 skb->ip_summed = CHECKSUM_UNNECESSARY;
1412 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, 1404 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1413 &skb->nh.ipv6h->daddr,skb->csum)) 1405 &skb->nh.ipv6h->daddr,skb->csum)) {
1406 skb->ip_summed = CHECKSUM_UNNECESSARY;
1414 return 0; 1407 return 0;
1415 LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n"); 1408 }
1416 } 1409 }
1410
1411 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1412 &skb->nh.ipv6h->daddr, 0);
1413
1417 if (skb->len <= 76) { 1414 if (skb->len <= 76) {
1418 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, 1415 return __skb_checksum_complete(skb);
1419 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1420 return -1;
1421 skb->ip_summed = CHECKSUM_UNNECESSARY;
1422 } else {
1423 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1424 &skb->nh.ipv6h->daddr,0);
1425 } 1416 }
1426 return 0; 1417 return 0;
1427} 1418}
@@ -1582,7 +1573,7 @@ static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1582 goto discard_it; 1573 goto discard_it;
1583 1574
1584 if ((skb->ip_summed != CHECKSUM_UNNECESSARY && 1575 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1585 tcp_v6_checksum_init(skb) < 0)) 1576 tcp_v6_checksum_init(skb)))
1586 goto bad_packet; 1577 goto bad_packet;
1587 1578
1588 th = skb->h.th; 1579 th = skb->h.th;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e2b87cc68b7b..5cc8731eb55b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -248,7 +248,7 @@ try_again:
248 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, 248 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
249 copied); 249 copied);
250 } else if (msg->msg_flags&MSG_TRUNC) { 250 } else if (msg->msg_flags&MSG_TRUNC) {
251 if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) 251 if (__skb_checksum_complete(skb))
252 goto csum_copy_err; 252 goto csum_copy_err;
253 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, 253 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
254 copied); 254 copied);
@@ -363,13 +363,10 @@ static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
363 return -1; 363 return -1;
364 } 364 }
365 365
366 if (skb->ip_summed != CHECKSUM_UNNECESSARY) { 366 if (skb_checksum_complete(skb)) {
367 if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) { 367 UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
368 UDP6_INC_STATS_BH(UDP_MIB_INERRORS); 368 kfree_skb(skb);
369 kfree_skb(skb); 369 return 0;
370 return 0;
371 }
372 skb->ip_summed = CHECKSUM_UNNECESSARY;
373 } 370 }
374 371
375 if (sock_queue_rcv_skb(sk,skb)<0) { 372 if (sock_queue_rcv_skb(sk,skb)<0) {
@@ -491,13 +488,10 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
491 uh = skb->h.uh; 488 uh = skb->h.uh;
492 } 489 }
493 490
494 if (skb->ip_summed==CHECKSUM_HW) { 491 if (skb->ip_summed == CHECKSUM_HW &&
492 !csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum))
495 skb->ip_summed = CHECKSUM_UNNECESSARY; 493 skb->ip_summed = CHECKSUM_UNNECESSARY;
496 if (csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) { 494
497 LIMIT_NETDEBUG(KERN_DEBUG "udp v6 hw csum failure.\n");
498 skb->ip_summed = CHECKSUM_NONE;
499 }
500 }
501 if (skb->ip_summed != CHECKSUM_UNNECESSARY) 495 if (skb->ip_summed != CHECKSUM_UNNECESSARY)
502 skb->csum = ~csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, 0); 496 skb->csum = ~csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, 0);
503 497
@@ -521,8 +515,7 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
521 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 515 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
522 goto discard; 516 goto discard;
523 517
524 if (skb->ip_summed != CHECKSUM_UNNECESSARY && 518 if (skb_checksum_complete(skb))
525 (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)))
526 goto discard; 519 goto discard;
527 UDP6_INC_STATS_BH(UDP_MIB_NOPORTS); 520 UDP6_INC_STATS_BH(UDP_MIB_NOPORTS);
528 521