diff options
Diffstat (limited to 'net/ipv4')
85 files changed, 3923 insertions, 2882 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 70491d9035eb..0c94a1ac2946 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -166,7 +166,7 @@ config IP_PNP_DHCP | |||
166 | 166 | ||
167 | If unsure, say Y. Note that if you want to use DHCP, a DHCP server | 167 | If unsure, say Y. Note that if you want to use DHCP, a DHCP server |
168 | must be operating on your network. Read | 168 | must be operating on your network. Read |
169 | <file:Documentation/filesystems/nfsroot.txt> for details. | 169 | <file:Documentation/filesystems/nfs/nfsroot.txt> for details. |
170 | 170 | ||
171 | config IP_PNP_BOOTP | 171 | config IP_PNP_BOOTP |
172 | bool "IP: BOOTP support" | 172 | bool "IP: BOOTP support" |
@@ -181,7 +181,7 @@ config IP_PNP_BOOTP | |||
181 | does BOOTP itself, providing all necessary information on the kernel | 181 | does BOOTP itself, providing all necessary information on the kernel |
182 | command line, you can say N here. If unsure, say Y. Note that if you | 182 | command line, you can say N here. If unsure, say Y. Note that if you |
183 | want to use BOOTP, a BOOTP server must be operating on your network. | 183 | want to use BOOTP, a BOOTP server must be operating on your network. |
184 | Read <file:Documentation/filesystems/nfsroot.txt> for details. | 184 | Read <file:Documentation/filesystems/nfs/nfsroot.txt> for details. |
185 | 185 | ||
186 | config IP_PNP_RARP | 186 | config IP_PNP_RARP |
187 | bool "IP: RARP support" | 187 | bool "IP: RARP support" |
@@ -194,7 +194,7 @@ config IP_PNP_RARP | |||
194 | older protocol which is being obsoleted by BOOTP and DHCP), say Y | 194 | older protocol which is being obsoleted by BOOTP and DHCP), say Y |
195 | here. Note that if you want to use RARP, a RARP server must be | 195 | here. Note that if you want to use RARP, a RARP server must be |
196 | operating on your network. Read | 196 | operating on your network. Read |
197 | <file:Documentation/filesystems/nfsroot.txt> for details. | 197 | <file:Documentation/filesystems/nfs/nfsroot.txt> for details. |
198 | 198 | ||
199 | # not yet ready.. | 199 | # not yet ready.. |
200 | # bool ' IP: ARP support' CONFIG_IP_PNP_ARP | 200 | # bool ' IP: ARP support' CONFIG_IP_PNP_ARP |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 57737b8d1711..f71357422380 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -86,6 +86,7 @@ | |||
86 | #include <linux/poll.h> | 86 | #include <linux/poll.h> |
87 | #include <linux/netfilter_ipv4.h> | 87 | #include <linux/netfilter_ipv4.h> |
88 | #include <linux/random.h> | 88 | #include <linux/random.h> |
89 | #include <linux/slab.h> | ||
89 | 90 | ||
90 | #include <asm/uaccess.h> | 91 | #include <asm/uaccess.h> |
91 | #include <asm/system.h> | 92 | #include <asm/system.h> |
@@ -174,12 +175,12 @@ static int inet_autobind(struct sock *sk) | |||
174 | /* We may need to bind the socket. */ | 175 | /* We may need to bind the socket. */ |
175 | lock_sock(sk); | 176 | lock_sock(sk); |
176 | inet = inet_sk(sk); | 177 | inet = inet_sk(sk); |
177 | if (!inet->num) { | 178 | if (!inet->inet_num) { |
178 | if (sk->sk_prot->get_port(sk, 0)) { | 179 | if (sk->sk_prot->get_port(sk, 0)) { |
179 | release_sock(sk); | 180 | release_sock(sk); |
180 | return -EAGAIN; | 181 | return -EAGAIN; |
181 | } | 182 | } |
182 | inet->sport = htons(inet->num); | 183 | inet->inet_sport = htons(inet->inet_num); |
183 | } | 184 | } |
184 | release_sock(sk); | 185 | release_sock(sk); |
185 | return 0; | 186 | return 0; |
@@ -262,7 +263,8 @@ static inline int inet_netns_ok(struct net *net, int protocol) | |||
262 | * Create an inet socket. | 263 | * Create an inet socket. |
263 | */ | 264 | */ |
264 | 265 | ||
265 | static int inet_create(struct net *net, struct socket *sock, int protocol) | 266 | static int inet_create(struct net *net, struct socket *sock, int protocol, |
267 | int kern) | ||
266 | { | 268 | { |
267 | struct sock *sk; | 269 | struct sock *sk; |
268 | struct inet_protosw *answer; | 270 | struct inet_protosw *answer; |
@@ -325,7 +327,7 @@ lookup_protocol: | |||
325 | } | 327 | } |
326 | 328 | ||
327 | err = -EPERM; | 329 | err = -EPERM; |
328 | if (answer->capability > 0 && !capable(answer->capability)) | 330 | if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW)) |
329 | goto out_rcu_unlock; | 331 | goto out_rcu_unlock; |
330 | 332 | ||
331 | err = -EAFNOSUPPORT; | 333 | err = -EAFNOSUPPORT; |
@@ -354,7 +356,7 @@ lookup_protocol: | |||
354 | inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; | 356 | inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; |
355 | 357 | ||
356 | if (SOCK_RAW == sock->type) { | 358 | if (SOCK_RAW == sock->type) { |
357 | inet->num = protocol; | 359 | inet->inet_num = protocol; |
358 | if (IPPROTO_RAW == protocol) | 360 | if (IPPROTO_RAW == protocol) |
359 | inet->hdrincl = 1; | 361 | inet->hdrincl = 1; |
360 | } | 362 | } |
@@ -364,7 +366,7 @@ lookup_protocol: | |||
364 | else | 366 | else |
365 | inet->pmtudisc = IP_PMTUDISC_WANT; | 367 | inet->pmtudisc = IP_PMTUDISC_WANT; |
366 | 368 | ||
367 | inet->id = 0; | 369 | inet->inet_id = 0; |
368 | 370 | ||
369 | sock_init_data(sock, sk); | 371 | sock_init_data(sock, sk); |
370 | 372 | ||
@@ -381,13 +383,13 @@ lookup_protocol: | |||
381 | 383 | ||
382 | sk_refcnt_debug_inc(sk); | 384 | sk_refcnt_debug_inc(sk); |
383 | 385 | ||
384 | if (inet->num) { | 386 | if (inet->inet_num) { |
385 | /* It assumes that any protocol which allows | 387 | /* It assumes that any protocol which allows |
386 | * the user to assign a number at socket | 388 | * the user to assign a number at socket |
387 | * creation time automatically | 389 | * creation time automatically |
388 | * shares. | 390 | * shares. |
389 | */ | 391 | */ |
390 | inet->sport = htons(inet->num); | 392 | inet->inet_sport = htons(inet->inet_num); |
391 | /* Add to protocol hash chains. */ | 393 | /* Add to protocol hash chains. */ |
392 | sk->sk_prot->hash(sk); | 394 | sk->sk_prot->hash(sk); |
393 | } | 395 | } |
@@ -494,27 +496,27 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | |||
494 | 496 | ||
495 | /* Check these errors (active socket, double bind). */ | 497 | /* Check these errors (active socket, double bind). */ |
496 | err = -EINVAL; | 498 | err = -EINVAL; |
497 | if (sk->sk_state != TCP_CLOSE || inet->num) | 499 | if (sk->sk_state != TCP_CLOSE || inet->inet_num) |
498 | goto out_release_sock; | 500 | goto out_release_sock; |
499 | 501 | ||
500 | inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; | 502 | inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr; |
501 | if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) | 503 | if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) |
502 | inet->saddr = 0; /* Use device */ | 504 | inet->inet_saddr = 0; /* Use device */ |
503 | 505 | ||
504 | /* Make sure we are allowed to bind here. */ | 506 | /* Make sure we are allowed to bind here. */ |
505 | if (sk->sk_prot->get_port(sk, snum)) { | 507 | if (sk->sk_prot->get_port(sk, snum)) { |
506 | inet->saddr = inet->rcv_saddr = 0; | 508 | inet->inet_saddr = inet->inet_rcv_saddr = 0; |
507 | err = -EADDRINUSE; | 509 | err = -EADDRINUSE; |
508 | goto out_release_sock; | 510 | goto out_release_sock; |
509 | } | 511 | } |
510 | 512 | ||
511 | if (inet->rcv_saddr) | 513 | if (inet->inet_rcv_saddr) |
512 | sk->sk_userlocks |= SOCK_BINDADDR_LOCK; | 514 | sk->sk_userlocks |= SOCK_BINDADDR_LOCK; |
513 | if (snum) | 515 | if (snum) |
514 | sk->sk_userlocks |= SOCK_BINDPORT_LOCK; | 516 | sk->sk_userlocks |= SOCK_BINDPORT_LOCK; |
515 | inet->sport = htons(inet->num); | 517 | inet->inet_sport = htons(inet->inet_num); |
516 | inet->daddr = 0; | 518 | inet->inet_daddr = 0; |
517 | inet->dport = 0; | 519 | inet->inet_dport = 0; |
518 | sk_dst_reset(sk); | 520 | sk_dst_reset(sk); |
519 | err = 0; | 521 | err = 0; |
520 | out_release_sock: | 522 | out_release_sock: |
@@ -529,10 +531,12 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, | |||
529 | { | 531 | { |
530 | struct sock *sk = sock->sk; | 532 | struct sock *sk = sock->sk; |
531 | 533 | ||
534 | if (addr_len < sizeof(uaddr->sa_family)) | ||
535 | return -EINVAL; | ||
532 | if (uaddr->sa_family == AF_UNSPEC) | 536 | if (uaddr->sa_family == AF_UNSPEC) |
533 | return sk->sk_prot->disconnect(sk, flags); | 537 | return sk->sk_prot->disconnect(sk, flags); |
534 | 538 | ||
535 | if (!inet_sk(sk)->num && inet_autobind(sk)) | 539 | if (!inet_sk(sk)->inet_num && inet_autobind(sk)) |
536 | return -EAGAIN; | 540 | return -EAGAIN; |
537 | return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); | 541 | return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); |
538 | } | 542 | } |
@@ -572,6 +576,9 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, | |||
572 | int err; | 576 | int err; |
573 | long timeo; | 577 | long timeo; |
574 | 578 | ||
579 | if (addr_len < sizeof(uaddr->sa_family)) | ||
580 | return -EINVAL; | ||
581 | |||
575 | lock_sock(sk); | 582 | lock_sock(sk); |
576 | 583 | ||
577 | if (uaddr->sa_family == AF_UNSPEC) { | 584 | if (uaddr->sa_family == AF_UNSPEC) { |
@@ -685,21 +692,21 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, | |||
685 | { | 692 | { |
686 | struct sock *sk = sock->sk; | 693 | struct sock *sk = sock->sk; |
687 | struct inet_sock *inet = inet_sk(sk); | 694 | struct inet_sock *inet = inet_sk(sk); |
688 | struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; | 695 | DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr); |
689 | 696 | ||
690 | sin->sin_family = AF_INET; | 697 | sin->sin_family = AF_INET; |
691 | if (peer) { | 698 | if (peer) { |
692 | if (!inet->dport || | 699 | if (!inet->inet_dport || |
693 | (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) && | 700 | (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) && |
694 | peer == 1)) | 701 | peer == 1)) |
695 | return -ENOTCONN; | 702 | return -ENOTCONN; |
696 | sin->sin_port = inet->dport; | 703 | sin->sin_port = inet->inet_dport; |
697 | sin->sin_addr.s_addr = inet->daddr; | 704 | sin->sin_addr.s_addr = inet->inet_daddr; |
698 | } else { | 705 | } else { |
699 | __be32 addr = inet->rcv_saddr; | 706 | __be32 addr = inet->inet_rcv_saddr; |
700 | if (!addr) | 707 | if (!addr) |
701 | addr = inet->saddr; | 708 | addr = inet->inet_saddr; |
702 | sin->sin_port = inet->sport; | 709 | sin->sin_port = inet->inet_sport; |
703 | sin->sin_addr.s_addr = addr; | 710 | sin->sin_addr.s_addr = addr; |
704 | } | 711 | } |
705 | memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); | 712 | memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); |
@@ -714,7 +721,7 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | |||
714 | struct sock *sk = sock->sk; | 721 | struct sock *sk = sock->sk; |
715 | 722 | ||
716 | /* We may need to bind the socket. */ | 723 | /* We may need to bind the socket. */ |
717 | if (!inet_sk(sk)->num && inet_autobind(sk)) | 724 | if (!inet_sk(sk)->inet_num && inet_autobind(sk)) |
718 | return -EAGAIN; | 725 | return -EAGAIN; |
719 | 726 | ||
720 | return sk->sk_prot->sendmsg(iocb, sk, msg, size); | 727 | return sk->sk_prot->sendmsg(iocb, sk, msg, size); |
@@ -728,7 +735,7 @@ static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, | |||
728 | struct sock *sk = sock->sk; | 735 | struct sock *sk = sock->sk; |
729 | 736 | ||
730 | /* We may need to bind the socket. */ | 737 | /* We may need to bind the socket. */ |
731 | if (!inet_sk(sk)->num && inet_autobind(sk)) | 738 | if (!inet_sk(sk)->inet_num && inet_autobind(sk)) |
732 | return -EAGAIN; | 739 | return -EAGAIN; |
733 | 740 | ||
734 | if (sk->sk_prot->sendpage) | 741 | if (sk->sk_prot->sendpage) |
@@ -931,7 +938,7 @@ static const struct proto_ops inet_sockraw_ops = { | |||
931 | #endif | 938 | #endif |
932 | }; | 939 | }; |
933 | 940 | ||
934 | static struct net_proto_family inet_family_ops = { | 941 | static const struct net_proto_family inet_family_ops = { |
935 | .family = PF_INET, | 942 | .family = PF_INET, |
936 | .create = inet_create, | 943 | .create = inet_create, |
937 | .owner = THIS_MODULE, | 944 | .owner = THIS_MODULE, |
@@ -947,7 +954,6 @@ static struct inet_protosw inetsw_array[] = | |||
947 | .protocol = IPPROTO_TCP, | 954 | .protocol = IPPROTO_TCP, |
948 | .prot = &tcp_prot, | 955 | .prot = &tcp_prot, |
949 | .ops = &inet_stream_ops, | 956 | .ops = &inet_stream_ops, |
950 | .capability = -1, | ||
951 | .no_check = 0, | 957 | .no_check = 0, |
952 | .flags = INET_PROTOSW_PERMANENT | | 958 | .flags = INET_PROTOSW_PERMANENT | |
953 | INET_PROTOSW_ICSK, | 959 | INET_PROTOSW_ICSK, |
@@ -958,7 +964,6 @@ static struct inet_protosw inetsw_array[] = | |||
958 | .protocol = IPPROTO_UDP, | 964 | .protocol = IPPROTO_UDP, |
959 | .prot = &udp_prot, | 965 | .prot = &udp_prot, |
960 | .ops = &inet_dgram_ops, | 966 | .ops = &inet_dgram_ops, |
961 | .capability = -1, | ||
962 | .no_check = UDP_CSUM_DEFAULT, | 967 | .no_check = UDP_CSUM_DEFAULT, |
963 | .flags = INET_PROTOSW_PERMANENT, | 968 | .flags = INET_PROTOSW_PERMANENT, |
964 | }, | 969 | }, |
@@ -969,7 +974,6 @@ static struct inet_protosw inetsw_array[] = | |||
969 | .protocol = IPPROTO_IP, /* wild card */ | 974 | .protocol = IPPROTO_IP, /* wild card */ |
970 | .prot = &raw_prot, | 975 | .prot = &raw_prot, |
971 | .ops = &inet_sockraw_ops, | 976 | .ops = &inet_sockraw_ops, |
972 | .capability = CAP_NET_RAW, | ||
973 | .no_check = UDP_CSUM_DEFAULT, | 977 | .no_check = UDP_CSUM_DEFAULT, |
974 | .flags = INET_PROTOSW_REUSE, | 978 | .flags = INET_PROTOSW_REUSE, |
975 | } | 979 | } |
@@ -1059,9 +1063,9 @@ static int inet_sk_reselect_saddr(struct sock *sk) | |||
1059 | struct inet_sock *inet = inet_sk(sk); | 1063 | struct inet_sock *inet = inet_sk(sk); |
1060 | int err; | 1064 | int err; |
1061 | struct rtable *rt; | 1065 | struct rtable *rt; |
1062 | __be32 old_saddr = inet->saddr; | 1066 | __be32 old_saddr = inet->inet_saddr; |
1063 | __be32 new_saddr; | 1067 | __be32 new_saddr; |
1064 | __be32 daddr = inet->daddr; | 1068 | __be32 daddr = inet->inet_daddr; |
1065 | 1069 | ||
1066 | if (inet->opt && inet->opt->srr) | 1070 | if (inet->opt && inet->opt->srr) |
1067 | daddr = inet->opt->faddr; | 1071 | daddr = inet->opt->faddr; |
@@ -1071,7 +1075,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) | |||
1071 | RT_CONN_FLAGS(sk), | 1075 | RT_CONN_FLAGS(sk), |
1072 | sk->sk_bound_dev_if, | 1076 | sk->sk_bound_dev_if, |
1073 | sk->sk_protocol, | 1077 | sk->sk_protocol, |
1074 | inet->sport, inet->dport, sk, 0); | 1078 | inet->inet_sport, inet->inet_dport, sk, 0); |
1075 | if (err) | 1079 | if (err) |
1076 | return err; | 1080 | return err; |
1077 | 1081 | ||
@@ -1087,7 +1091,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) | |||
1087 | __func__, &old_saddr, &new_saddr); | 1091 | __func__, &old_saddr, &new_saddr); |
1088 | } | 1092 | } |
1089 | 1093 | ||
1090 | inet->saddr = inet->rcv_saddr = new_saddr; | 1094 | inet->inet_saddr = inet->inet_rcv_saddr = new_saddr; |
1091 | 1095 | ||
1092 | /* | 1096 | /* |
1093 | * XXX The only one ugly spot where we need to | 1097 | * XXX The only one ugly spot where we need to |
@@ -1113,7 +1117,7 @@ int inet_sk_rebuild_header(struct sock *sk) | |||
1113 | return 0; | 1117 | return 0; |
1114 | 1118 | ||
1115 | /* Reroute. */ | 1119 | /* Reroute. */ |
1116 | daddr = inet->daddr; | 1120 | daddr = inet->inet_daddr; |
1117 | if (inet->opt && inet->opt->srr) | 1121 | if (inet->opt && inet->opt->srr) |
1118 | daddr = inet->opt->faddr; | 1122 | daddr = inet->opt->faddr; |
1119 | { | 1123 | { |
@@ -1123,7 +1127,7 @@ int inet_sk_rebuild_header(struct sock *sk) | |||
1123 | .nl_u = { | 1127 | .nl_u = { |
1124 | .ip4_u = { | 1128 | .ip4_u = { |
1125 | .daddr = daddr, | 1129 | .daddr = daddr, |
1126 | .saddr = inet->saddr, | 1130 | .saddr = inet->inet_saddr, |
1127 | .tos = RT_CONN_FLAGS(sk), | 1131 | .tos = RT_CONN_FLAGS(sk), |
1128 | }, | 1132 | }, |
1129 | }, | 1133 | }, |
@@ -1131,8 +1135,8 @@ int inet_sk_rebuild_header(struct sock *sk) | |||
1131 | .flags = inet_sk_flowi_flags(sk), | 1135 | .flags = inet_sk_flowi_flags(sk), |
1132 | .uli_u = { | 1136 | .uli_u = { |
1133 | .ports = { | 1137 | .ports = { |
1134 | .sport = inet->sport, | 1138 | .sport = inet->inet_sport, |
1135 | .dport = inet->dport, | 1139 | .dport = inet->inet_dport, |
1136 | }, | 1140 | }, |
1137 | }, | 1141 | }, |
1138 | }; | 1142 | }; |
@@ -1387,7 +1391,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, | |||
1387 | } | 1391 | } |
1388 | EXPORT_SYMBOL_GPL(inet_ctl_sock_create); | 1392 | EXPORT_SYMBOL_GPL(inet_ctl_sock_create); |
1389 | 1393 | ||
1390 | unsigned long snmp_fold_field(void *mib[], int offt) | 1394 | unsigned long snmp_fold_field(void __percpu *mib[], int offt) |
1391 | { | 1395 | { |
1392 | unsigned long res = 0; | 1396 | unsigned long res = 0; |
1393 | int i; | 1397 | int i; |
@@ -1400,7 +1404,7 @@ unsigned long snmp_fold_field(void *mib[], int offt) | |||
1400 | } | 1404 | } |
1401 | EXPORT_SYMBOL_GPL(snmp_fold_field); | 1405 | EXPORT_SYMBOL_GPL(snmp_fold_field); |
1402 | 1406 | ||
1403 | int snmp_mib_init(void *ptr[2], size_t mibsize) | 1407 | int snmp_mib_init(void __percpu *ptr[2], size_t mibsize) |
1404 | { | 1408 | { |
1405 | BUG_ON(ptr == NULL); | 1409 | BUG_ON(ptr == NULL); |
1406 | ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); | 1410 | ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); |
@@ -1418,7 +1422,7 @@ err0: | |||
1418 | } | 1422 | } |
1419 | EXPORT_SYMBOL_GPL(snmp_mib_init); | 1423 | EXPORT_SYMBOL_GPL(snmp_mib_init); |
1420 | 1424 | ||
1421 | void snmp_mib_free(void *ptr[2]) | 1425 | void snmp_mib_free(void __percpu *ptr[2]) |
1422 | { | 1426 | { |
1423 | BUG_ON(ptr == NULL); | 1427 | BUG_ON(ptr == NULL); |
1424 | free_percpu(ptr[0]); | 1428 | free_percpu(ptr[0]); |
@@ -1462,25 +1466,25 @@ static const struct net_protocol icmp_protocol = { | |||
1462 | 1466 | ||
1463 | static __net_init int ipv4_mib_init_net(struct net *net) | 1467 | static __net_init int ipv4_mib_init_net(struct net *net) |
1464 | { | 1468 | { |
1465 | if (snmp_mib_init((void **)net->mib.tcp_statistics, | 1469 | if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics, |
1466 | sizeof(struct tcp_mib)) < 0) | 1470 | sizeof(struct tcp_mib)) < 0) |
1467 | goto err_tcp_mib; | 1471 | goto err_tcp_mib; |
1468 | if (snmp_mib_init((void **)net->mib.ip_statistics, | 1472 | if (snmp_mib_init((void __percpu **)net->mib.ip_statistics, |
1469 | sizeof(struct ipstats_mib)) < 0) | 1473 | sizeof(struct ipstats_mib)) < 0) |
1470 | goto err_ip_mib; | 1474 | goto err_ip_mib; |
1471 | if (snmp_mib_init((void **)net->mib.net_statistics, | 1475 | if (snmp_mib_init((void __percpu **)net->mib.net_statistics, |
1472 | sizeof(struct linux_mib)) < 0) | 1476 | sizeof(struct linux_mib)) < 0) |
1473 | goto err_net_mib; | 1477 | goto err_net_mib; |
1474 | if (snmp_mib_init((void **)net->mib.udp_statistics, | 1478 | if (snmp_mib_init((void __percpu **)net->mib.udp_statistics, |
1475 | sizeof(struct udp_mib)) < 0) | 1479 | sizeof(struct udp_mib)) < 0) |
1476 | goto err_udp_mib; | 1480 | goto err_udp_mib; |
1477 | if (snmp_mib_init((void **)net->mib.udplite_statistics, | 1481 | if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics, |
1478 | sizeof(struct udp_mib)) < 0) | 1482 | sizeof(struct udp_mib)) < 0) |
1479 | goto err_udplite_mib; | 1483 | goto err_udplite_mib; |
1480 | if (snmp_mib_init((void **)net->mib.icmp_statistics, | 1484 | if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics, |
1481 | sizeof(struct icmp_mib)) < 0) | 1485 | sizeof(struct icmp_mib)) < 0) |
1482 | goto err_icmp_mib; | 1486 | goto err_icmp_mib; |
1483 | if (snmp_mib_init((void **)net->mib.icmpmsg_statistics, | 1487 | if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics, |
1484 | sizeof(struct icmpmsg_mib)) < 0) | 1488 | sizeof(struct icmpmsg_mib)) < 0) |
1485 | goto err_icmpmsg_mib; | 1489 | goto err_icmpmsg_mib; |
1486 | 1490 | ||
@@ -1488,30 +1492,30 @@ static __net_init int ipv4_mib_init_net(struct net *net) | |||
1488 | return 0; | 1492 | return 0; |
1489 | 1493 | ||
1490 | err_icmpmsg_mib: | 1494 | err_icmpmsg_mib: |
1491 | snmp_mib_free((void **)net->mib.icmp_statistics); | 1495 | snmp_mib_free((void __percpu **)net->mib.icmp_statistics); |
1492 | err_icmp_mib: | 1496 | err_icmp_mib: |
1493 | snmp_mib_free((void **)net->mib.udplite_statistics); | 1497 | snmp_mib_free((void __percpu **)net->mib.udplite_statistics); |
1494 | err_udplite_mib: | 1498 | err_udplite_mib: |
1495 | snmp_mib_free((void **)net->mib.udp_statistics); | 1499 | snmp_mib_free((void __percpu **)net->mib.udp_statistics); |
1496 | err_udp_mib: | 1500 | err_udp_mib: |
1497 | snmp_mib_free((void **)net->mib.net_statistics); | 1501 | snmp_mib_free((void __percpu **)net->mib.net_statistics); |
1498 | err_net_mib: | 1502 | err_net_mib: |
1499 | snmp_mib_free((void **)net->mib.ip_statistics); | 1503 | snmp_mib_free((void __percpu **)net->mib.ip_statistics); |
1500 | err_ip_mib: | 1504 | err_ip_mib: |
1501 | snmp_mib_free((void **)net->mib.tcp_statistics); | 1505 | snmp_mib_free((void __percpu **)net->mib.tcp_statistics); |
1502 | err_tcp_mib: | 1506 | err_tcp_mib: |
1503 | return -ENOMEM; | 1507 | return -ENOMEM; |
1504 | } | 1508 | } |
1505 | 1509 | ||
1506 | static __net_exit void ipv4_mib_exit_net(struct net *net) | 1510 | static __net_exit void ipv4_mib_exit_net(struct net *net) |
1507 | { | 1511 | { |
1508 | snmp_mib_free((void **)net->mib.icmpmsg_statistics); | 1512 | snmp_mib_free((void __percpu **)net->mib.icmpmsg_statistics); |
1509 | snmp_mib_free((void **)net->mib.icmp_statistics); | 1513 | snmp_mib_free((void __percpu **)net->mib.icmp_statistics); |
1510 | snmp_mib_free((void **)net->mib.udplite_statistics); | 1514 | snmp_mib_free((void __percpu **)net->mib.udplite_statistics); |
1511 | snmp_mib_free((void **)net->mib.udp_statistics); | 1515 | snmp_mib_free((void __percpu **)net->mib.udp_statistics); |
1512 | snmp_mib_free((void **)net->mib.net_statistics); | 1516 | snmp_mib_free((void __percpu **)net->mib.net_statistics); |
1513 | snmp_mib_free((void **)net->mib.ip_statistics); | 1517 | snmp_mib_free((void __percpu **)net->mib.ip_statistics); |
1514 | snmp_mib_free((void **)net->mib.tcp_statistics); | 1518 | snmp_mib_free((void __percpu **)net->mib.tcp_statistics); |
1515 | } | 1519 | } |
1516 | 1520 | ||
1517 | static __net_initdata struct pernet_operations ipv4_mib_ops = { | 1521 | static __net_initdata struct pernet_operations ipv4_mib_ops = { |
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 5c662703eb1e..880a5ec6dce0 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c | |||
@@ -1,14 +1,73 @@ | |||
1 | #include <crypto/hash.h> | ||
1 | #include <linux/err.h> | 2 | #include <linux/err.h> |
2 | #include <linux/module.h> | 3 | #include <linux/module.h> |
4 | #include <linux/slab.h> | ||
3 | #include <net/ip.h> | 5 | #include <net/ip.h> |
4 | #include <net/xfrm.h> | 6 | #include <net/xfrm.h> |
5 | #include <net/ah.h> | 7 | #include <net/ah.h> |
6 | #include <linux/crypto.h> | 8 | #include <linux/crypto.h> |
7 | #include <linux/pfkeyv2.h> | 9 | #include <linux/pfkeyv2.h> |
8 | #include <linux/spinlock.h> | 10 | #include <linux/scatterlist.h> |
9 | #include <net/icmp.h> | 11 | #include <net/icmp.h> |
10 | #include <net/protocol.h> | 12 | #include <net/protocol.h> |
11 | 13 | ||
14 | struct ah_skb_cb { | ||
15 | struct xfrm_skb_cb xfrm; | ||
16 | void *tmp; | ||
17 | }; | ||
18 | |||
19 | #define AH_SKB_CB(__skb) ((struct ah_skb_cb *)&((__skb)->cb[0])) | ||
20 | |||
21 | static void *ah_alloc_tmp(struct crypto_ahash *ahash, int nfrags, | ||
22 | unsigned int size) | ||
23 | { | ||
24 | unsigned int len; | ||
25 | |||
26 | len = size + crypto_ahash_digestsize(ahash) + | ||
27 | (crypto_ahash_alignmask(ahash) & | ||
28 | ~(crypto_tfm_ctx_alignment() - 1)); | ||
29 | |||
30 | len = ALIGN(len, crypto_tfm_ctx_alignment()); | ||
31 | |||
32 | len += sizeof(struct ahash_request) + crypto_ahash_reqsize(ahash); | ||
33 | len = ALIGN(len, __alignof__(struct scatterlist)); | ||
34 | |||
35 | len += sizeof(struct scatterlist) * nfrags; | ||
36 | |||
37 | return kmalloc(len, GFP_ATOMIC); | ||
38 | } | ||
39 | |||
40 | static inline u8 *ah_tmp_auth(void *tmp, unsigned int offset) | ||
41 | { | ||
42 | return tmp + offset; | ||
43 | } | ||
44 | |||
45 | static inline u8 *ah_tmp_icv(struct crypto_ahash *ahash, void *tmp, | ||
46 | unsigned int offset) | ||
47 | { | ||
48 | return PTR_ALIGN((u8 *)tmp + offset, crypto_ahash_alignmask(ahash) + 1); | ||
49 | } | ||
50 | |||
51 | static inline struct ahash_request *ah_tmp_req(struct crypto_ahash *ahash, | ||
52 | u8 *icv) | ||
53 | { | ||
54 | struct ahash_request *req; | ||
55 | |||
56 | req = (void *)PTR_ALIGN(icv + crypto_ahash_digestsize(ahash), | ||
57 | crypto_tfm_ctx_alignment()); | ||
58 | |||
59 | ahash_request_set_tfm(req, ahash); | ||
60 | |||
61 | return req; | ||
62 | } | ||
63 | |||
64 | static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash, | ||
65 | struct ahash_request *req) | ||
66 | { | ||
67 | return (void *)ALIGN((unsigned long)(req + 1) + | ||
68 | crypto_ahash_reqsize(ahash), | ||
69 | __alignof__(struct scatterlist)); | ||
70 | } | ||
12 | 71 | ||
13 | /* Clear mutable options and find final destination to substitute | 72 | /* Clear mutable options and find final destination to substitute |
14 | * into IP header for icv calculation. Options are already checked | 73 | * into IP header for icv calculation. Options are already checked |
@@ -54,20 +113,72 @@ static int ip_clear_mutable_options(struct iphdr *iph, __be32 *daddr) | |||
54 | return 0; | 113 | return 0; |
55 | } | 114 | } |
56 | 115 | ||
116 | static void ah_output_done(struct crypto_async_request *base, int err) | ||
117 | { | ||
118 | u8 *icv; | ||
119 | struct iphdr *iph; | ||
120 | struct sk_buff *skb = base->data; | ||
121 | struct xfrm_state *x = skb_dst(skb)->xfrm; | ||
122 | struct ah_data *ahp = x->data; | ||
123 | struct iphdr *top_iph = ip_hdr(skb); | ||
124 | struct ip_auth_hdr *ah = ip_auth_hdr(skb); | ||
125 | int ihl = ip_hdrlen(skb); | ||
126 | |||
127 | iph = AH_SKB_CB(skb)->tmp; | ||
128 | icv = ah_tmp_icv(ahp->ahash, iph, ihl); | ||
129 | memcpy(ah->auth_data, icv, ahp->icv_trunc_len); | ||
130 | |||
131 | top_iph->tos = iph->tos; | ||
132 | top_iph->ttl = iph->ttl; | ||
133 | top_iph->frag_off = iph->frag_off; | ||
134 | if (top_iph->ihl != 5) { | ||
135 | top_iph->daddr = iph->daddr; | ||
136 | memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); | ||
137 | } | ||
138 | |||
139 | err = ah->nexthdr; | ||
140 | |||
141 | kfree(AH_SKB_CB(skb)->tmp); | ||
142 | xfrm_output_resume(skb, err); | ||
143 | } | ||
144 | |||
57 | static int ah_output(struct xfrm_state *x, struct sk_buff *skb) | 145 | static int ah_output(struct xfrm_state *x, struct sk_buff *skb) |
58 | { | 146 | { |
59 | int err; | 147 | int err; |
148 | int nfrags; | ||
149 | int ihl; | ||
150 | u8 *icv; | ||
151 | struct sk_buff *trailer; | ||
152 | struct crypto_ahash *ahash; | ||
153 | struct ahash_request *req; | ||
154 | struct scatterlist *sg; | ||
60 | struct iphdr *iph, *top_iph; | 155 | struct iphdr *iph, *top_iph; |
61 | struct ip_auth_hdr *ah; | 156 | struct ip_auth_hdr *ah; |
62 | struct ah_data *ahp; | 157 | struct ah_data *ahp; |
63 | union { | 158 | |
64 | struct iphdr iph; | 159 | ahp = x->data; |
65 | char buf[60]; | 160 | ahash = ahp->ahash; |
66 | } tmp_iph; | 161 | |
162 | if ((err = skb_cow_data(skb, 0, &trailer)) < 0) | ||
163 | goto out; | ||
164 | nfrags = err; | ||
67 | 165 | ||
68 | skb_push(skb, -skb_network_offset(skb)); | 166 | skb_push(skb, -skb_network_offset(skb)); |
167 | ah = ip_auth_hdr(skb); | ||
168 | ihl = ip_hdrlen(skb); | ||
169 | |||
170 | err = -ENOMEM; | ||
171 | iph = ah_alloc_tmp(ahash, nfrags, ihl); | ||
172 | if (!iph) | ||
173 | goto out; | ||
174 | |||
175 | icv = ah_tmp_icv(ahash, iph, ihl); | ||
176 | req = ah_tmp_req(ahash, icv); | ||
177 | sg = ah_req_sg(ahash, req); | ||
178 | |||
179 | memset(ah->auth_data, 0, ahp->icv_trunc_len); | ||
180 | |||
69 | top_iph = ip_hdr(skb); | 181 | top_iph = ip_hdr(skb); |
70 | iph = &tmp_iph.iph; | ||
71 | 182 | ||
72 | iph->tos = top_iph->tos; | 183 | iph->tos = top_iph->tos; |
73 | iph->ttl = top_iph->ttl; | 184 | iph->ttl = top_iph->ttl; |
@@ -78,10 +189,9 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) | |||
78 | memcpy(iph+1, top_iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); | 189 | memcpy(iph+1, top_iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); |
79 | err = ip_clear_mutable_options(top_iph, &top_iph->daddr); | 190 | err = ip_clear_mutable_options(top_iph, &top_iph->daddr); |
80 | if (err) | 191 | if (err) |
81 | goto error; | 192 | goto out_free; |
82 | } | 193 | } |
83 | 194 | ||
84 | ah = ip_auth_hdr(skb); | ||
85 | ah->nexthdr = *skb_mac_header(skb); | 195 | ah->nexthdr = *skb_mac_header(skb); |
86 | *skb_mac_header(skb) = IPPROTO_AH; | 196 | *skb_mac_header(skb) = IPPROTO_AH; |
87 | 197 | ||
@@ -91,20 +201,31 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) | |||
91 | top_iph->ttl = 0; | 201 | top_iph->ttl = 0; |
92 | top_iph->check = 0; | 202 | top_iph->check = 0; |
93 | 203 | ||
94 | ahp = x->data; | ||
95 | ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; | 204 | ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; |
96 | 205 | ||
97 | ah->reserved = 0; | 206 | ah->reserved = 0; |
98 | ah->spi = x->id.spi; | 207 | ah->spi = x->id.spi; |
99 | ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); | 208 | ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); |
100 | 209 | ||
101 | spin_lock_bh(&x->lock); | 210 | sg_init_table(sg, nfrags); |
102 | err = ah_mac_digest(ahp, skb, ah->auth_data); | 211 | skb_to_sgvec(skb, sg, 0, skb->len); |
103 | memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len); | ||
104 | spin_unlock_bh(&x->lock); | ||
105 | 212 | ||
106 | if (err) | 213 | ahash_request_set_crypt(req, sg, icv, skb->len); |
107 | goto error; | 214 | ahash_request_set_callback(req, 0, ah_output_done, skb); |
215 | |||
216 | AH_SKB_CB(skb)->tmp = iph; | ||
217 | |||
218 | err = crypto_ahash_digest(req); | ||
219 | if (err) { | ||
220 | if (err == -EINPROGRESS) | ||
221 | goto out; | ||
222 | |||
223 | if (err == -EBUSY) | ||
224 | err = NET_XMIT_DROP; | ||
225 | goto out_free; | ||
226 | } | ||
227 | |||
228 | memcpy(ah->auth_data, icv, ahp->icv_trunc_len); | ||
108 | 229 | ||
109 | top_iph->tos = iph->tos; | 230 | top_iph->tos = iph->tos; |
110 | top_iph->ttl = iph->ttl; | 231 | top_iph->ttl = iph->ttl; |
@@ -114,28 +235,67 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) | |||
114 | memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); | 235 | memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); |
115 | } | 236 | } |
116 | 237 | ||
117 | err = 0; | 238 | out_free: |
118 | 239 | kfree(iph); | |
119 | error: | 240 | out: |
120 | return err; | 241 | return err; |
121 | } | 242 | } |
122 | 243 | ||
244 | static void ah_input_done(struct crypto_async_request *base, int err) | ||
245 | { | ||
246 | u8 *auth_data; | ||
247 | u8 *icv; | ||
248 | struct iphdr *work_iph; | ||
249 | struct sk_buff *skb = base->data; | ||
250 | struct xfrm_state *x = xfrm_input_state(skb); | ||
251 | struct ah_data *ahp = x->data; | ||
252 | struct ip_auth_hdr *ah = ip_auth_hdr(skb); | ||
253 | int ihl = ip_hdrlen(skb); | ||
254 | int ah_hlen = (ah->hdrlen + 2) << 2; | ||
255 | |||
256 | work_iph = AH_SKB_CB(skb)->tmp; | ||
257 | auth_data = ah_tmp_auth(work_iph, ihl); | ||
258 | icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len); | ||
259 | |||
260 | err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0; | ||
261 | if (err) | ||
262 | goto out; | ||
263 | |||
264 | skb->network_header += ah_hlen; | ||
265 | memcpy(skb_network_header(skb), work_iph, ihl); | ||
266 | __skb_pull(skb, ah_hlen + ihl); | ||
267 | skb_set_transport_header(skb, -ihl); | ||
268 | |||
269 | err = ah->nexthdr; | ||
270 | out: | ||
271 | kfree(AH_SKB_CB(skb)->tmp); | ||
272 | xfrm_input_resume(skb, err); | ||
273 | } | ||
274 | |||
123 | static int ah_input(struct xfrm_state *x, struct sk_buff *skb) | 275 | static int ah_input(struct xfrm_state *x, struct sk_buff *skb) |
124 | { | 276 | { |
125 | int ah_hlen; | 277 | int ah_hlen; |
126 | int ihl; | 278 | int ihl; |
127 | int nexthdr; | 279 | int nexthdr; |
128 | int err = -EINVAL; | 280 | int nfrags; |
129 | struct iphdr *iph; | 281 | u8 *auth_data; |
282 | u8 *icv; | ||
283 | struct sk_buff *trailer; | ||
284 | struct crypto_ahash *ahash; | ||
285 | struct ahash_request *req; | ||
286 | struct scatterlist *sg; | ||
287 | struct iphdr *iph, *work_iph; | ||
130 | struct ip_auth_hdr *ah; | 288 | struct ip_auth_hdr *ah; |
131 | struct ah_data *ahp; | 289 | struct ah_data *ahp; |
132 | char work_buf[60]; | 290 | int err = -ENOMEM; |
133 | 291 | ||
134 | if (!pskb_may_pull(skb, sizeof(*ah))) | 292 | if (!pskb_may_pull(skb, sizeof(*ah))) |
135 | goto out; | 293 | goto out; |
136 | 294 | ||
137 | ah = (struct ip_auth_hdr *)skb->data; | 295 | ah = (struct ip_auth_hdr *)skb->data; |
138 | ahp = x->data; | 296 | ahp = x->data; |
297 | ahash = ahp->ahash; | ||
298 | |||
139 | nexthdr = ah->nexthdr; | 299 | nexthdr = ah->nexthdr; |
140 | ah_hlen = (ah->hdrlen + 2) << 2; | 300 | ah_hlen = (ah->hdrlen + 2) << 2; |
141 | 301 | ||
@@ -156,9 +316,24 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) | |||
156 | 316 | ||
157 | ah = (struct ip_auth_hdr *)skb->data; | 317 | ah = (struct ip_auth_hdr *)skb->data; |
158 | iph = ip_hdr(skb); | 318 | iph = ip_hdr(skb); |
319 | ihl = ip_hdrlen(skb); | ||
320 | |||
321 | if ((err = skb_cow_data(skb, 0, &trailer)) < 0) | ||
322 | goto out; | ||
323 | nfrags = err; | ||
324 | |||
325 | work_iph = ah_alloc_tmp(ahash, nfrags, ihl + ahp->icv_trunc_len); | ||
326 | if (!work_iph) | ||
327 | goto out; | ||
328 | |||
329 | auth_data = ah_tmp_auth(work_iph, ihl); | ||
330 | icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len); | ||
331 | req = ah_tmp_req(ahash, icv); | ||
332 | sg = ah_req_sg(ahash, req); | ||
159 | 333 | ||
160 | ihl = skb->data - skb_network_header(skb); | 334 | memcpy(work_iph, iph, ihl); |
161 | memcpy(work_buf, iph, ihl); | 335 | memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); |
336 | memset(ah->auth_data, 0, ahp->icv_trunc_len); | ||
162 | 337 | ||
163 | iph->ttl = 0; | 338 | iph->ttl = 0; |
164 | iph->tos = 0; | 339 | iph->tos = 0; |
@@ -166,35 +341,44 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) | |||
166 | iph->check = 0; | 341 | iph->check = 0; |
167 | if (ihl > sizeof(*iph)) { | 342 | if (ihl > sizeof(*iph)) { |
168 | __be32 dummy; | 343 | __be32 dummy; |
169 | if (ip_clear_mutable_options(iph, &dummy)) | 344 | err = ip_clear_mutable_options(iph, &dummy); |
170 | goto out; | 345 | if (err) |
346 | goto out_free; | ||
171 | } | 347 | } |
172 | 348 | ||
173 | spin_lock(&x->lock); | 349 | skb_push(skb, ihl); |
174 | { | ||
175 | u8 auth_data[MAX_AH_AUTH_LEN]; | ||
176 | 350 | ||
177 | memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); | 351 | sg_init_table(sg, nfrags); |
178 | skb_push(skb, ihl); | 352 | skb_to_sgvec(skb, sg, 0, skb->len); |
179 | err = ah_mac_digest(ahp, skb, ah->auth_data); | 353 | |
180 | if (err) | 354 | ahash_request_set_crypt(req, sg, icv, skb->len); |
181 | goto unlock; | 355 | ahash_request_set_callback(req, 0, ah_input_done, skb); |
182 | if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) | 356 | |
183 | err = -EBADMSG; | 357 | AH_SKB_CB(skb)->tmp = work_iph; |
358 | |||
359 | err = crypto_ahash_digest(req); | ||
360 | if (err) { | ||
361 | if (err == -EINPROGRESS) | ||
362 | goto out; | ||
363 | |||
364 | if (err == -EBUSY) | ||
365 | err = NET_XMIT_DROP; | ||
366 | goto out_free; | ||
184 | } | 367 | } |
185 | unlock: | ||
186 | spin_unlock(&x->lock); | ||
187 | 368 | ||
369 | err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0; | ||
188 | if (err) | 370 | if (err) |
189 | goto out; | 371 | goto out_free; |
190 | 372 | ||
191 | skb->network_header += ah_hlen; | 373 | skb->network_header += ah_hlen; |
192 | memcpy(skb_network_header(skb), work_buf, ihl); | 374 | memcpy(skb_network_header(skb), work_iph, ihl); |
193 | skb->transport_header = skb->network_header; | ||
194 | __skb_pull(skb, ah_hlen + ihl); | 375 | __skb_pull(skb, ah_hlen + ihl); |
376 | skb_set_transport_header(skb, -ihl); | ||
195 | 377 | ||
196 | return nexthdr; | 378 | err = nexthdr; |
197 | 379 | ||
380 | out_free: | ||
381 | kfree (work_iph); | ||
198 | out: | 382 | out: |
199 | return err; | 383 | return err; |
200 | } | 384 | } |
@@ -210,7 +394,7 @@ static void ah4_err(struct sk_buff *skb, u32 info) | |||
210 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) | 394 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) |
211 | return; | 395 | return; |
212 | 396 | ||
213 | x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); | 397 | x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); |
214 | if (!x) | 398 | if (!x) |
215 | return; | 399 | return; |
216 | printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", | 400 | printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", |
@@ -222,7 +406,7 @@ static int ah_init_state(struct xfrm_state *x) | |||
222 | { | 406 | { |
223 | struct ah_data *ahp = NULL; | 407 | struct ah_data *ahp = NULL; |
224 | struct xfrm_algo_desc *aalg_desc; | 408 | struct xfrm_algo_desc *aalg_desc; |
225 | struct crypto_hash *tfm; | 409 | struct crypto_ahash *ahash; |
226 | 410 | ||
227 | if (!x->aalg) | 411 | if (!x->aalg) |
228 | goto error; | 412 | goto error; |
@@ -231,44 +415,40 @@ static int ah_init_state(struct xfrm_state *x) | |||
231 | goto error; | 415 | goto error; |
232 | 416 | ||
233 | ahp = kzalloc(sizeof(*ahp), GFP_KERNEL); | 417 | ahp = kzalloc(sizeof(*ahp), GFP_KERNEL); |
234 | if (ahp == NULL) | 418 | if (!ahp) |
235 | return -ENOMEM; | 419 | return -ENOMEM; |
236 | 420 | ||
237 | tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC); | 421 | ahash = crypto_alloc_ahash(x->aalg->alg_name, 0, 0); |
238 | if (IS_ERR(tfm)) | 422 | if (IS_ERR(ahash)) |
239 | goto error; | 423 | goto error; |
240 | 424 | ||
241 | ahp->tfm = tfm; | 425 | ahp->ahash = ahash; |
242 | if (crypto_hash_setkey(tfm, x->aalg->alg_key, | 426 | if (crypto_ahash_setkey(ahash, x->aalg->alg_key, |
243 | (x->aalg->alg_key_len + 7) / 8)) | 427 | (x->aalg->alg_key_len + 7) / 8)) |
244 | goto error; | 428 | goto error; |
245 | 429 | ||
246 | /* | 430 | /* |
247 | * Lookup the algorithm description maintained by xfrm_algo, | 431 | * Lookup the algorithm description maintained by xfrm_algo, |
248 | * verify crypto transform properties, and store information | 432 | * verify crypto transform properties, and store information |
249 | * we need for AH processing. This lookup cannot fail here | 433 | * we need for AH processing. This lookup cannot fail here |
250 | * after a successful crypto_alloc_hash(). | 434 | * after a successful crypto_alloc_ahash(). |
251 | */ | 435 | */ |
252 | aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); | 436 | aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); |
253 | BUG_ON(!aalg_desc); | 437 | BUG_ON(!aalg_desc); |
254 | 438 | ||
255 | if (aalg_desc->uinfo.auth.icv_fullbits/8 != | 439 | if (aalg_desc->uinfo.auth.icv_fullbits/8 != |
256 | crypto_hash_digestsize(tfm)) { | 440 | crypto_ahash_digestsize(ahash)) { |
257 | printk(KERN_INFO "AH: %s digestsize %u != %hu\n", | 441 | printk(KERN_INFO "AH: %s digestsize %u != %hu\n", |
258 | x->aalg->alg_name, crypto_hash_digestsize(tfm), | 442 | x->aalg->alg_name, crypto_ahash_digestsize(ahash), |
259 | aalg_desc->uinfo.auth.icv_fullbits/8); | 443 | aalg_desc->uinfo.auth.icv_fullbits/8); |
260 | goto error; | 444 | goto error; |
261 | } | 445 | } |
262 | 446 | ||
263 | ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8; | 447 | ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8; |
264 | ahp->icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8; | 448 | ahp->icv_trunc_len = x->aalg->alg_trunc_len/8; |
265 | 449 | ||
266 | BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN); | 450 | BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN); |
267 | 451 | ||
268 | ahp->work_icv = kmalloc(ahp->icv_full_len, GFP_KERNEL); | ||
269 | if (!ahp->work_icv) | ||
270 | goto error; | ||
271 | |||
272 | x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + | 452 | x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + |
273 | ahp->icv_trunc_len); | 453 | ahp->icv_trunc_len); |
274 | if (x->props.mode == XFRM_MODE_TUNNEL) | 454 | if (x->props.mode == XFRM_MODE_TUNNEL) |
@@ -279,8 +459,7 @@ static int ah_init_state(struct xfrm_state *x) | |||
279 | 459 | ||
280 | error: | 460 | error: |
281 | if (ahp) { | 461 | if (ahp) { |
282 | kfree(ahp->work_icv); | 462 | crypto_free_ahash(ahp->ahash); |
283 | crypto_free_hash(ahp->tfm); | ||
284 | kfree(ahp); | 463 | kfree(ahp); |
285 | } | 464 | } |
286 | return -EINVAL; | 465 | return -EINVAL; |
@@ -293,8 +472,7 @@ static void ah_destroy(struct xfrm_state *x) | |||
293 | if (!ahp) | 472 | if (!ahp) |
294 | return; | 473 | return; |
295 | 474 | ||
296 | kfree(ahp->work_icv); | 475 | crypto_free_ahash(ahp->ahash); |
297 | crypto_free_hash(ahp->tfm); | ||
298 | kfree(ahp); | 476 | kfree(ahp); |
299 | } | 477 | } |
300 | 478 | ||
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 4e80f336c0cf..80769f1f9fab 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -70,6 +70,7 @@ | |||
70 | * bonding can change the skb before | 70 | * bonding can change the skb before |
71 | * sending (e.g. insert 8021q tag). | 71 | * sending (e.g. insert 8021q tag). |
72 | * Harald Welte : convert to make use of jenkins hash | 72 | * Harald Welte : convert to make use of jenkins hash |
73 | * Jesper D. Brouer: Proxy ARP PVLAN RFC 3069 support. | ||
73 | */ | 74 | */ |
74 | 75 | ||
75 | #include <linux/module.h> | 76 | #include <linux/module.h> |
@@ -97,6 +98,7 @@ | |||
97 | #include <linux/net.h> | 98 | #include <linux/net.h> |
98 | #include <linux/rcupdate.h> | 99 | #include <linux/rcupdate.h> |
99 | #include <linux/jhash.h> | 100 | #include <linux/jhash.h> |
101 | #include <linux/slab.h> | ||
100 | #ifdef CONFIG_SYSCTL | 102 | #ifdef CONFIG_SYSCTL |
101 | #include <linux/sysctl.h> | 103 | #include <linux/sysctl.h> |
102 | #endif | 104 | #endif |
@@ -524,12 +526,15 @@ int arp_bind_neighbour(struct dst_entry *dst) | |||
524 | /* | 526 | /* |
525 | * Check if we can use proxy ARP for this path | 527 | * Check if we can use proxy ARP for this path |
526 | */ | 528 | */ |
527 | 529 | static inline int arp_fwd_proxy(struct in_device *in_dev, | |
528 | static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt) | 530 | struct net_device *dev, struct rtable *rt) |
529 | { | 531 | { |
530 | struct in_device *out_dev; | 532 | struct in_device *out_dev; |
531 | int imi, omi = -1; | 533 | int imi, omi = -1; |
532 | 534 | ||
535 | if (rt->u.dst.dev == dev) | ||
536 | return 0; | ||
537 | |||
533 | if (!IN_DEV_PROXY_ARP(in_dev)) | 538 | if (!IN_DEV_PROXY_ARP(in_dev)) |
534 | return 0; | 539 | return 0; |
535 | 540 | ||
@@ -548,6 +553,43 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt) | |||
548 | } | 553 | } |
549 | 554 | ||
550 | /* | 555 | /* |
556 | * Check for RFC3069 proxy arp private VLAN (allow to send back to same dev) | ||
557 | * | ||
558 | * RFC3069 supports proxy arp replies back to the same interface. This | ||
559 | * is done to support (ethernet) switch features, like RFC 3069, where | ||
560 | * the individual ports are not allowed to communicate with each | ||
561 | * other, BUT they are allowed to talk to the upstream router. As | ||
562 | * described in RFC 3069, it is possible to allow these hosts to | ||
563 | * communicate through the upstream router, by proxy_arp'ing. | ||
564 | * | ||
565 | * RFC 3069: "VLAN Aggregation for Efficient IP Address Allocation" | ||
566 | * | ||
567 | * This technology is known by different names: | ||
568 | * In RFC 3069 it is called VLAN Aggregation. | ||
569 | * Cisco and Allied Telesyn call it Private VLAN. | ||
570 | * Hewlett-Packard call it Source-Port filtering or port-isolation. | ||
571 | * Ericsson call it MAC-Forced Forwarding (RFC Draft). | ||
572 | * | ||
573 | */ | ||
574 | static inline int arp_fwd_pvlan(struct in_device *in_dev, | ||
575 | struct net_device *dev, struct rtable *rt, | ||
576 | __be32 sip, __be32 tip) | ||
577 | { | ||
578 | /* Private VLAN is only concerned about the same ethernet segment */ | ||
579 | if (rt->u.dst.dev != dev) | ||
580 | return 0; | ||
581 | |||
582 | /* Don't reply on self probes (often done by windowz boxes)*/ | ||
583 | if (sip == tip) | ||
584 | return 0; | ||
585 | |||
586 | if (IN_DEV_PROXY_ARP_PVLAN(in_dev)) | ||
587 | return 1; | ||
588 | else | ||
589 | return 0; | ||
590 | } | ||
591 | |||
592 | /* | ||
551 | * Interface to link layer: send routine and receive handler. | 593 | * Interface to link layer: send routine and receive handler. |
552 | */ | 594 | */ |
553 | 595 | ||
@@ -619,13 +661,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, | |||
619 | #endif | 661 | #endif |
620 | #endif | 662 | #endif |
621 | 663 | ||
622 | #ifdef CONFIG_FDDI | 664 | #if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE) |
623 | case ARPHRD_FDDI: | 665 | case ARPHRD_FDDI: |
624 | arp->ar_hrd = htons(ARPHRD_ETHER); | 666 | arp->ar_hrd = htons(ARPHRD_ETHER); |
625 | arp->ar_pro = htons(ETH_P_IP); | 667 | arp->ar_pro = htons(ETH_P_IP); |
626 | break; | 668 | break; |
627 | #endif | 669 | #endif |
628 | #ifdef CONFIG_TR | 670 | #if defined(CONFIG_TR) || defined(CONFIG_TR_MODULE) |
629 | case ARPHRD_IEEE802_TR: | 671 | case ARPHRD_IEEE802_TR: |
630 | arp->ar_hrd = htons(ARPHRD_IEEE802); | 672 | arp->ar_hrd = htons(ARPHRD_IEEE802); |
631 | arp->ar_pro = htons(ETH_P_IP); | 673 | arp->ar_pro = htons(ETH_P_IP); |
@@ -833,8 +875,11 @@ static int arp_process(struct sk_buff *skb) | |||
833 | } | 875 | } |
834 | goto out; | 876 | goto out; |
835 | } else if (IN_DEV_FORWARD(in_dev)) { | 877 | } else if (IN_DEV_FORWARD(in_dev)) { |
836 | if (addr_type == RTN_UNICAST && rt->u.dst.dev != dev && | 878 | if (addr_type == RTN_UNICAST && |
837 | (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) { | 879 | (arp_fwd_proxy(in_dev, dev, rt) || |
880 | arp_fwd_pvlan(in_dev, dev, rt, sip, tip) || | ||
881 | pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) | ||
882 | { | ||
838 | n = neigh_event_ns(&arp_tbl, sha, &sip, dev); | 883 | n = neigh_event_ns(&arp_tbl, sha, &sip, dev); |
839 | if (n) | 884 | if (n) |
840 | neigh_release(n); | 885 | neigh_release(n); |
@@ -863,7 +908,8 @@ static int arp_process(struct sk_buff *skb) | |||
863 | devices (strip is candidate) | 908 | devices (strip is candidate) |
864 | */ | 909 | */ |
865 | if (n == NULL && | 910 | if (n == NULL && |
866 | arp->ar_op == htons(ARPOP_REPLY) && | 911 | (arp->ar_op == htons(ARPOP_REPLY) || |
912 | (arp->ar_op == htons(ARPOP_REQUEST) && tip == sip)) && | ||
867 | inet_addr_type(net, sip) == RTN_UNICAST) | 913 | inet_addr_type(net, sip) == RTN_UNICAST) |
868 | n = __neigh_lookup(&arp_tbl, &sip, dev, 1); | 914 | n = __neigh_lookup(&arp_tbl, &sip, dev, 1); |
869 | } | 915 | } |
@@ -1005,7 +1051,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, | |||
1005 | return -EINVAL; | 1051 | return -EINVAL; |
1006 | } | 1052 | } |
1007 | switch (dev->type) { | 1053 | switch (dev->type) { |
1008 | #ifdef CONFIG_FDDI | 1054 | #if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE) |
1009 | case ARPHRD_FDDI: | 1055 | case ARPHRD_FDDI: |
1010 | /* | 1056 | /* |
1011 | * According to RFC 1390, FDDI devices should accept ARP | 1057 | * According to RFC 1390, FDDI devices should accept ARP |
@@ -1239,8 +1285,7 @@ void __init arp_init(void) | |||
1239 | dev_add_pack(&arp_packet_type); | 1285 | dev_add_pack(&arp_packet_type); |
1240 | arp_proc_init(); | 1286 | arp_proc_init(); |
1241 | #ifdef CONFIG_SYSCTL | 1287 | #ifdef CONFIG_SYSCTL |
1242 | neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4, | 1288 | neigh_sysctl_register(NULL, &arp_tbl.parms, "ipv4", NULL); |
1243 | NET_IPV4_NEIGH, "ipv4", NULL, NULL); | ||
1244 | #endif | 1289 | #endif |
1245 | register_netdevice_notifier(&arp_netdev_notifier); | 1290 | register_netdevice_notifier(&arp_netdev_notifier); |
1246 | } | 1291 | } |
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 039cc1ffe977..c97cd9ff697e 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include <linux/string.h> | 44 | #include <linux/string.h> |
45 | #include <linux/jhash.h> | 45 | #include <linux/jhash.h> |
46 | #include <linux/audit.h> | 46 | #include <linux/audit.h> |
47 | #include <linux/slab.h> | ||
47 | #include <net/ip.h> | 48 | #include <net/ip.h> |
48 | #include <net/icmp.h> | 49 | #include <net/icmp.h> |
49 | #include <net/tcp.h> | 50 | #include <net/tcp.h> |
@@ -2017,7 +2018,7 @@ req_setattr_failure: | |||
2017 | * values on failure. | 2018 | * values on failure. |
2018 | * | 2019 | * |
2019 | */ | 2020 | */ |
2020 | int cipso_v4_delopt(struct ip_options **opt_ptr) | 2021 | static int cipso_v4_delopt(struct ip_options **opt_ptr) |
2021 | { | 2022 | { |
2022 | int hdr_delta = 0; | 2023 | int hdr_delta = 0; |
2023 | struct ip_options *opt = *opt_ptr; | 2024 | struct ip_options *opt = *opt_ptr; |
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 5e6c5a0f3fde..fb2465811b48 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c | |||
@@ -39,7 +39,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
39 | sk_dst_reset(sk); | 39 | sk_dst_reset(sk); |
40 | 40 | ||
41 | oif = sk->sk_bound_dev_if; | 41 | oif = sk->sk_bound_dev_if; |
42 | saddr = inet->saddr; | 42 | saddr = inet->inet_saddr; |
43 | if (ipv4_is_multicast(usin->sin_addr.s_addr)) { | 43 | if (ipv4_is_multicast(usin->sin_addr.s_addr)) { |
44 | if (!oif) | 44 | if (!oif) |
45 | oif = inet->mc_index; | 45 | oif = inet->mc_index; |
@@ -49,7 +49,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
49 | err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr, | 49 | err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr, |
50 | RT_CONN_FLAGS(sk), oif, | 50 | RT_CONN_FLAGS(sk), oif, |
51 | sk->sk_protocol, | 51 | sk->sk_protocol, |
52 | inet->sport, usin->sin_port, sk, 1); | 52 | inet->inet_sport, usin->sin_port, sk, 1); |
53 | if (err) { | 53 | if (err) { |
54 | if (err == -ENETUNREACH) | 54 | if (err == -ENETUNREACH) |
55 | IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); | 55 | IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); |
@@ -60,14 +60,14 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
60 | ip_rt_put(rt); | 60 | ip_rt_put(rt); |
61 | return -EACCES; | 61 | return -EACCES; |
62 | } | 62 | } |
63 | if (!inet->saddr) | 63 | if (!inet->inet_saddr) |
64 | inet->saddr = rt->rt_src; /* Update source address */ | 64 | inet->inet_saddr = rt->rt_src; /* Update source address */ |
65 | if (!inet->rcv_saddr) | 65 | if (!inet->inet_rcv_saddr) |
66 | inet->rcv_saddr = rt->rt_src; | 66 | inet->inet_rcv_saddr = rt->rt_src; |
67 | inet->daddr = rt->rt_dst; | 67 | inet->inet_daddr = rt->rt_dst; |
68 | inet->dport = usin->sin_port; | 68 | inet->inet_dport = usin->sin_port; |
69 | sk->sk_state = TCP_ESTABLISHED; | 69 | sk->sk_state = TCP_ESTABLISHED; |
70 | inet->id = jiffies; | 70 | inet->inet_id = jiffies; |
71 | 71 | ||
72 | sk_dst_set(sk, &rt->u.dst); | 72 | sk_dst_set(sk, &rt->u.dst); |
73 | return(0); | 73 | return(0); |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5df2f6a0b0f0..90e3d6379a42 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <linux/notifier.h> | 50 | #include <linux/notifier.h> |
51 | #include <linux/inetdevice.h> | 51 | #include <linux/inetdevice.h> |
52 | #include <linux/igmp.h> | 52 | #include <linux/igmp.h> |
53 | #include <linux/slab.h> | ||
53 | #ifdef CONFIG_SYSCTL | 54 | #ifdef CONFIG_SYSCTL |
54 | #include <linux/sysctl.h> | 55 | #include <linux/sysctl.h> |
55 | #endif | 56 | #endif |
@@ -64,20 +65,20 @@ | |||
64 | 65 | ||
65 | static struct ipv4_devconf ipv4_devconf = { | 66 | static struct ipv4_devconf ipv4_devconf = { |
66 | .data = { | 67 | .data = { |
67 | [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, | 68 | [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, |
68 | [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, | 69 | [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, |
69 | [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1, | 70 | [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, |
70 | [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1, | 71 | [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, |
71 | }, | 72 | }, |
72 | }; | 73 | }; |
73 | 74 | ||
74 | static struct ipv4_devconf ipv4_devconf_dflt = { | 75 | static struct ipv4_devconf ipv4_devconf_dflt = { |
75 | .data = { | 76 | .data = { |
76 | [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, | 77 | [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, |
77 | [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, | 78 | [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, |
78 | [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1, | 79 | [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, |
79 | [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1, | 80 | [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, |
80 | [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1, | 81 | [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1, |
81 | }, | 82 | }, |
82 | }; | 83 | }; |
83 | 84 | ||
@@ -140,11 +141,11 @@ void in_dev_finish_destroy(struct in_device *idev) | |||
140 | #endif | 141 | #endif |
141 | dev_put(dev); | 142 | dev_put(dev); |
142 | if (!idev->dead) | 143 | if (!idev->dead) |
143 | printk("Freeing alive in_device %p\n", idev); | 144 | pr_err("Freeing alive in_device %p\n", idev); |
144 | else { | 145 | else |
145 | kfree(idev); | 146 | kfree(idev); |
146 | } | ||
147 | } | 147 | } |
148 | EXPORT_SYMBOL(in_dev_finish_destroy); | ||
148 | 149 | ||
149 | static struct in_device *inetdev_init(struct net_device *dev) | 150 | static struct in_device *inetdev_init(struct net_device *dev) |
150 | { | 151 | { |
@@ -159,7 +160,8 @@ static struct in_device *inetdev_init(struct net_device *dev) | |||
159 | sizeof(in_dev->cnf)); | 160 | sizeof(in_dev->cnf)); |
160 | in_dev->cnf.sysctl = NULL; | 161 | in_dev->cnf.sysctl = NULL; |
161 | in_dev->dev = dev; | 162 | in_dev->dev = dev; |
162 | if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL) | 163 | in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl); |
164 | if (!in_dev->arp_parms) | ||
163 | goto out_kfree; | 165 | goto out_kfree; |
164 | if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) | 166 | if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) |
165 | dev_disable_lro(dev); | 167 | dev_disable_lro(dev); |
@@ -405,13 +407,15 @@ struct in_device *inetdev_by_index(struct net *net, int ifindex) | |||
405 | { | 407 | { |
406 | struct net_device *dev; | 408 | struct net_device *dev; |
407 | struct in_device *in_dev = NULL; | 409 | struct in_device *in_dev = NULL; |
408 | read_lock(&dev_base_lock); | 410 | |
409 | dev = __dev_get_by_index(net, ifindex); | 411 | rcu_read_lock(); |
412 | dev = dev_get_by_index_rcu(net, ifindex); | ||
410 | if (dev) | 413 | if (dev) |
411 | in_dev = in_dev_get(dev); | 414 | in_dev = in_dev_get(dev); |
412 | read_unlock(&dev_base_lock); | 415 | rcu_read_unlock(); |
413 | return in_dev; | 416 | return in_dev; |
414 | } | 417 | } |
418 | EXPORT_SYMBOL(inetdev_by_index); | ||
415 | 419 | ||
416 | /* Called only from RTNL semaphored context. No locks. */ | 420 | /* Called only from RTNL semaphored context. No locks. */ |
417 | 421 | ||
@@ -557,7 +561,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg | |||
557 | * Determine a default network mask, based on the IP address. | 561 | * Determine a default network mask, based on the IP address. |
558 | */ | 562 | */ |
559 | 563 | ||
560 | static __inline__ int inet_abc_len(__be32 addr) | 564 | static inline int inet_abc_len(__be32 addr) |
561 | { | 565 | { |
562 | int rc = -1; /* Something else, probably a multicast. */ | 566 | int rc = -1; /* Something else, probably a multicast. */ |
563 | 567 | ||
@@ -646,13 +650,15 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
646 | rtnl_lock(); | 650 | rtnl_lock(); |
647 | 651 | ||
648 | ret = -ENODEV; | 652 | ret = -ENODEV; |
649 | if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL) | 653 | dev = __dev_get_by_name(net, ifr.ifr_name); |
654 | if (!dev) | ||
650 | goto done; | 655 | goto done; |
651 | 656 | ||
652 | if (colon) | 657 | if (colon) |
653 | *colon = ':'; | 658 | *colon = ':'; |
654 | 659 | ||
655 | if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { | 660 | in_dev = __in_dev_get_rtnl(dev); |
661 | if (in_dev) { | ||
656 | if (tryaddrmatch) { | 662 | if (tryaddrmatch) { |
657 | /* Matthias Andree */ | 663 | /* Matthias Andree */ |
658 | /* compare label and address (4.4BSD style) */ | 664 | /* compare label and address (4.4BSD style) */ |
@@ -720,7 +726,8 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
720 | 726 | ||
721 | if (!ifa) { | 727 | if (!ifa) { |
722 | ret = -ENOBUFS; | 728 | ret = -ENOBUFS; |
723 | if ((ifa = inet_alloc_ifa()) == NULL) | 729 | ifa = inet_alloc_ifa(); |
730 | if (!ifa) | ||
724 | break; | 731 | break; |
725 | if (colon) | 732 | if (colon) |
726 | memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); | 733 | memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); |
@@ -822,10 +829,10 @@ static int inet_gifconf(struct net_device *dev, char __user *buf, int len) | |||
822 | struct ifreq ifr; | 829 | struct ifreq ifr; |
823 | int done = 0; | 830 | int done = 0; |
824 | 831 | ||
825 | if (!in_dev || (ifa = in_dev->ifa_list) == NULL) | 832 | if (!in_dev) |
826 | goto out; | 833 | goto out; |
827 | 834 | ||
828 | for (; ifa; ifa = ifa->ifa_next) { | 835 | for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { |
829 | if (!buf) { | 836 | if (!buf) { |
830 | done += sizeof(ifr); | 837 | done += sizeof(ifr); |
831 | continue; | 838 | continue; |
@@ -875,36 +882,33 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) | |||
875 | if (!addr) | 882 | if (!addr) |
876 | addr = ifa->ifa_local; | 883 | addr = ifa->ifa_local; |
877 | } endfor_ifa(in_dev); | 884 | } endfor_ifa(in_dev); |
878 | no_in_dev: | ||
879 | rcu_read_unlock(); | ||
880 | 885 | ||
881 | if (addr) | 886 | if (addr) |
882 | goto out; | 887 | goto out_unlock; |
888 | no_in_dev: | ||
883 | 889 | ||
884 | /* Not loopback addresses on loopback should be preferred | 890 | /* Not loopback addresses on loopback should be preferred |
885 | in this case. It is importnat that lo is the first interface | 891 | in this case. It is importnat that lo is the first interface |
886 | in dev_base list. | 892 | in dev_base list. |
887 | */ | 893 | */ |
888 | read_lock(&dev_base_lock); | 894 | for_each_netdev_rcu(net, dev) { |
889 | rcu_read_lock(); | 895 | in_dev = __in_dev_get_rcu(dev); |
890 | for_each_netdev(net, dev) { | 896 | if (!in_dev) |
891 | if ((in_dev = __in_dev_get_rcu(dev)) == NULL) | ||
892 | continue; | 897 | continue; |
893 | 898 | ||
894 | for_primary_ifa(in_dev) { | 899 | for_primary_ifa(in_dev) { |
895 | if (ifa->ifa_scope != RT_SCOPE_LINK && | 900 | if (ifa->ifa_scope != RT_SCOPE_LINK && |
896 | ifa->ifa_scope <= scope) { | 901 | ifa->ifa_scope <= scope) { |
897 | addr = ifa->ifa_local; | 902 | addr = ifa->ifa_local; |
898 | goto out_unlock_both; | 903 | goto out_unlock; |
899 | } | 904 | } |
900 | } endfor_ifa(in_dev); | 905 | } endfor_ifa(in_dev); |
901 | } | 906 | } |
902 | out_unlock_both: | 907 | out_unlock: |
903 | read_unlock(&dev_base_lock); | ||
904 | rcu_read_unlock(); | 908 | rcu_read_unlock(); |
905 | out: | ||
906 | return addr; | 909 | return addr; |
907 | } | 910 | } |
911 | EXPORT_SYMBOL(inet_select_addr); | ||
908 | 912 | ||
909 | static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, | 913 | static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, |
910 | __be32 local, int scope) | 914 | __be32 local, int scope) |
@@ -940,7 +944,7 @@ static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, | |||
940 | } | 944 | } |
941 | } endfor_ifa(in_dev); | 945 | } endfor_ifa(in_dev); |
942 | 946 | ||
943 | return same? addr : 0; | 947 | return same ? addr : 0; |
944 | } | 948 | } |
945 | 949 | ||
946 | /* | 950 | /* |
@@ -961,17 +965,16 @@ __be32 inet_confirm_addr(struct in_device *in_dev, | |||
961 | return confirm_addr_indev(in_dev, dst, local, scope); | 965 | return confirm_addr_indev(in_dev, dst, local, scope); |
962 | 966 | ||
963 | net = dev_net(in_dev->dev); | 967 | net = dev_net(in_dev->dev); |
964 | read_lock(&dev_base_lock); | ||
965 | rcu_read_lock(); | 968 | rcu_read_lock(); |
966 | for_each_netdev(net, dev) { | 969 | for_each_netdev_rcu(net, dev) { |
967 | if ((in_dev = __in_dev_get_rcu(dev))) { | 970 | in_dev = __in_dev_get_rcu(dev); |
971 | if (in_dev) { | ||
968 | addr = confirm_addr_indev(in_dev, dst, local, scope); | 972 | addr = confirm_addr_indev(in_dev, dst, local, scope); |
969 | if (addr) | 973 | if (addr) |
970 | break; | 974 | break; |
971 | } | 975 | } |
972 | } | 976 | } |
973 | rcu_read_unlock(); | 977 | rcu_read_unlock(); |
974 | read_unlock(&dev_base_lock); | ||
975 | 978 | ||
976 | return addr; | 979 | return addr; |
977 | } | 980 | } |
@@ -984,14 +987,16 @@ int register_inetaddr_notifier(struct notifier_block *nb) | |||
984 | { | 987 | { |
985 | return blocking_notifier_chain_register(&inetaddr_chain, nb); | 988 | return blocking_notifier_chain_register(&inetaddr_chain, nb); |
986 | } | 989 | } |
990 | EXPORT_SYMBOL(register_inetaddr_notifier); | ||
987 | 991 | ||
988 | int unregister_inetaddr_notifier(struct notifier_block *nb) | 992 | int unregister_inetaddr_notifier(struct notifier_block *nb) |
989 | { | 993 | { |
990 | return blocking_notifier_chain_unregister(&inetaddr_chain, nb); | 994 | return blocking_notifier_chain_unregister(&inetaddr_chain, nb); |
991 | } | 995 | } |
996 | EXPORT_SYMBOL(unregister_inetaddr_notifier); | ||
992 | 997 | ||
993 | /* Rename ifa_labels for a device name change. Make some effort to preserve existing | 998 | /* Rename ifa_labels for a device name change. Make some effort to preserve |
994 | * alias numbering and to create unique labels if possible. | 999 | * existing alias numbering and to create unique labels if possible. |
995 | */ | 1000 | */ |
996 | static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) | 1001 | static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) |
997 | { | 1002 | { |
@@ -1010,11 +1015,10 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) | |||
1010 | sprintf(old, ":%d", named); | 1015 | sprintf(old, ":%d", named); |
1011 | dot = old; | 1016 | dot = old; |
1012 | } | 1017 | } |
1013 | if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) { | 1018 | if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) |
1014 | strcat(ifa->ifa_label, dot); | 1019 | strcat(ifa->ifa_label, dot); |
1015 | } else { | 1020 | else |
1016 | strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); | 1021 | strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); |
1017 | } | ||
1018 | skip: | 1022 | skip: |
1019 | rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); | 1023 | rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); |
1020 | } | 1024 | } |
@@ -1061,8 +1065,9 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, | |||
1061 | if (!inetdev_valid_mtu(dev->mtu)) | 1065 | if (!inetdev_valid_mtu(dev->mtu)) |
1062 | break; | 1066 | break; |
1063 | if (dev->flags & IFF_LOOPBACK) { | 1067 | if (dev->flags & IFF_LOOPBACK) { |
1064 | struct in_ifaddr *ifa; | 1068 | struct in_ifaddr *ifa = inet_alloc_ifa(); |
1065 | if ((ifa = inet_alloc_ifa()) != NULL) { | 1069 | |
1070 | if (ifa) { | ||
1066 | ifa->ifa_local = | 1071 | ifa->ifa_local = |
1067 | ifa->ifa_address = htonl(INADDR_LOOPBACK); | 1072 | ifa->ifa_address = htonl(INADDR_LOOPBACK); |
1068 | ifa->ifa_prefixlen = 8; | 1073 | ifa->ifa_prefixlen = 8; |
@@ -1170,38 +1175,54 @@ nla_put_failure: | |||
1170 | static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) | 1175 | static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) |
1171 | { | 1176 | { |
1172 | struct net *net = sock_net(skb->sk); | 1177 | struct net *net = sock_net(skb->sk); |
1173 | int idx, ip_idx; | 1178 | int h, s_h; |
1179 | int idx, s_idx; | ||
1180 | int ip_idx, s_ip_idx; | ||
1174 | struct net_device *dev; | 1181 | struct net_device *dev; |
1175 | struct in_device *in_dev; | 1182 | struct in_device *in_dev; |
1176 | struct in_ifaddr *ifa; | 1183 | struct in_ifaddr *ifa; |
1177 | int s_ip_idx, s_idx = cb->args[0]; | 1184 | struct hlist_head *head; |
1185 | struct hlist_node *node; | ||
1178 | 1186 | ||
1179 | s_ip_idx = ip_idx = cb->args[1]; | 1187 | s_h = cb->args[0]; |
1180 | idx = 0; | 1188 | s_idx = idx = cb->args[1]; |
1181 | for_each_netdev(net, dev) { | 1189 | s_ip_idx = ip_idx = cb->args[2]; |
1182 | if (idx < s_idx) | 1190 | |
1183 | goto cont; | 1191 | for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { |
1184 | if (idx > s_idx) | 1192 | idx = 0; |
1185 | s_ip_idx = 0; | 1193 | head = &net->dev_index_head[h]; |
1186 | if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) | 1194 | rcu_read_lock(); |
1187 | goto cont; | 1195 | hlist_for_each_entry_rcu(dev, node, head, index_hlist) { |
1188 | 1196 | if (idx < s_idx) | |
1189 | for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; | 1197 | goto cont; |
1190 | ifa = ifa->ifa_next, ip_idx++) { | 1198 | if (h > s_h || idx > s_idx) |
1191 | if (ip_idx < s_ip_idx) | 1199 | s_ip_idx = 0; |
1192 | continue; | 1200 | in_dev = __in_dev_get_rcu(dev); |
1193 | if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, | 1201 | if (!in_dev) |
1202 | goto cont; | ||
1203 | |||
1204 | for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; | ||
1205 | ifa = ifa->ifa_next, ip_idx++) { | ||
1206 | if (ip_idx < s_ip_idx) | ||
1207 | continue; | ||
1208 | if (inet_fill_ifaddr(skb, ifa, | ||
1209 | NETLINK_CB(cb->skb).pid, | ||
1194 | cb->nlh->nlmsg_seq, | 1210 | cb->nlh->nlmsg_seq, |
1195 | RTM_NEWADDR, NLM_F_MULTI) <= 0) | 1211 | RTM_NEWADDR, NLM_F_MULTI) <= 0) { |
1196 | goto done; | 1212 | rcu_read_unlock(); |
1197 | } | 1213 | goto done; |
1214 | } | ||
1215 | } | ||
1198 | cont: | 1216 | cont: |
1199 | idx++; | 1217 | idx++; |
1218 | } | ||
1219 | rcu_read_unlock(); | ||
1200 | } | 1220 | } |
1201 | 1221 | ||
1202 | done: | 1222 | done: |
1203 | cb->args[0] = idx; | 1223 | cb->args[0] = h; |
1204 | cb->args[1] = ip_idx; | 1224 | cb->args[1] = idx; |
1225 | cb->args[2] = ip_idx; | ||
1205 | 1226 | ||
1206 | return skb->len; | 1227 | return skb->len; |
1207 | } | 1228 | } |
@@ -1239,18 +1260,18 @@ static void devinet_copy_dflt_conf(struct net *net, int i) | |||
1239 | { | 1260 | { |
1240 | struct net_device *dev; | 1261 | struct net_device *dev; |
1241 | 1262 | ||
1242 | read_lock(&dev_base_lock); | 1263 | rcu_read_lock(); |
1243 | for_each_netdev(net, dev) { | 1264 | for_each_netdev_rcu(net, dev) { |
1244 | struct in_device *in_dev; | 1265 | struct in_device *in_dev; |
1245 | rcu_read_lock(); | 1266 | |
1246 | in_dev = __in_dev_get_rcu(dev); | 1267 | in_dev = __in_dev_get_rcu(dev); |
1247 | if (in_dev && !test_bit(i, in_dev->cnf.state)) | 1268 | if (in_dev && !test_bit(i, in_dev->cnf.state)) |
1248 | in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i]; | 1269 | in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i]; |
1249 | rcu_read_unlock(); | ||
1250 | } | 1270 | } |
1251 | read_unlock(&dev_base_lock); | 1271 | rcu_read_unlock(); |
1252 | } | 1272 | } |
1253 | 1273 | ||
1274 | /* called with RTNL locked */ | ||
1254 | static void inet_forward_change(struct net *net) | 1275 | static void inet_forward_change(struct net *net) |
1255 | { | 1276 | { |
1256 | struct net_device *dev; | 1277 | struct net_device *dev; |
@@ -1259,7 +1280,6 @@ static void inet_forward_change(struct net *net) | |||
1259 | IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; | 1280 | IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; |
1260 | IPV4_DEVCONF_DFLT(net, FORWARDING) = on; | 1281 | IPV4_DEVCONF_DFLT(net, FORWARDING) = on; |
1261 | 1282 | ||
1262 | read_lock(&dev_base_lock); | ||
1263 | for_each_netdev(net, dev) { | 1283 | for_each_netdev(net, dev) { |
1264 | struct in_device *in_dev; | 1284 | struct in_device *in_dev; |
1265 | if (on) | 1285 | if (on) |
@@ -1270,7 +1290,6 @@ static void inet_forward_change(struct net *net) | |||
1270 | IN_DEV_CONF_SET(in_dev, FORWARDING, on); | 1290 | IN_DEV_CONF_SET(in_dev, FORWARDING, on); |
1271 | rcu_read_unlock(); | 1291 | rcu_read_unlock(); |
1272 | } | 1292 | } |
1273 | read_unlock(&dev_base_lock); | ||
1274 | } | 1293 | } |
1275 | 1294 | ||
1276 | static int devinet_conf_proc(ctl_table *ctl, int write, | 1295 | static int devinet_conf_proc(ctl_table *ctl, int write, |
@@ -1293,72 +1312,25 @@ static int devinet_conf_proc(ctl_table *ctl, int write, | |||
1293 | return ret; | 1312 | return ret; |
1294 | } | 1313 | } |
1295 | 1314 | ||
1296 | static int devinet_conf_sysctl(ctl_table *table, | ||
1297 | void __user *oldval, size_t __user *oldlenp, | ||
1298 | void __user *newval, size_t newlen) | ||
1299 | { | ||
1300 | struct ipv4_devconf *cnf; | ||
1301 | struct net *net; | ||
1302 | int *valp = table->data; | ||
1303 | int new; | ||
1304 | int i; | ||
1305 | |||
1306 | if (!newval || !newlen) | ||
1307 | return 0; | ||
1308 | |||
1309 | if (newlen != sizeof(int)) | ||
1310 | return -EINVAL; | ||
1311 | |||
1312 | if (get_user(new, (int __user *)newval)) | ||
1313 | return -EFAULT; | ||
1314 | |||
1315 | if (new == *valp) | ||
1316 | return 0; | ||
1317 | |||
1318 | if (oldval && oldlenp) { | ||
1319 | size_t len; | ||
1320 | |||
1321 | if (get_user(len, oldlenp)) | ||
1322 | return -EFAULT; | ||
1323 | |||
1324 | if (len) { | ||
1325 | if (len > table->maxlen) | ||
1326 | len = table->maxlen; | ||
1327 | if (copy_to_user(oldval, valp, len)) | ||
1328 | return -EFAULT; | ||
1329 | if (put_user(len, oldlenp)) | ||
1330 | return -EFAULT; | ||
1331 | } | ||
1332 | } | ||
1333 | |||
1334 | *valp = new; | ||
1335 | |||
1336 | cnf = table->extra1; | ||
1337 | net = table->extra2; | ||
1338 | i = (int *)table->data - cnf->data; | ||
1339 | |||
1340 | set_bit(i, cnf->state); | ||
1341 | |||
1342 | if (cnf == net->ipv4.devconf_dflt) | ||
1343 | devinet_copy_dflt_conf(net, i); | ||
1344 | |||
1345 | return 1; | ||
1346 | } | ||
1347 | |||
1348 | static int devinet_sysctl_forward(ctl_table *ctl, int write, | 1315 | static int devinet_sysctl_forward(ctl_table *ctl, int write, |
1349 | void __user *buffer, | 1316 | void __user *buffer, |
1350 | size_t *lenp, loff_t *ppos) | 1317 | size_t *lenp, loff_t *ppos) |
1351 | { | 1318 | { |
1352 | int *valp = ctl->data; | 1319 | int *valp = ctl->data; |
1353 | int val = *valp; | 1320 | int val = *valp; |
1321 | loff_t pos = *ppos; | ||
1354 | int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); | 1322 | int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); |
1355 | 1323 | ||
1356 | if (write && *valp != val) { | 1324 | if (write && *valp != val) { |
1357 | struct net *net = ctl->extra2; | 1325 | struct net *net = ctl->extra2; |
1358 | 1326 | ||
1359 | if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { | 1327 | if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { |
1360 | if (!rtnl_trylock()) | 1328 | if (!rtnl_trylock()) { |
1329 | /* Restore the original values before restarting */ | ||
1330 | *valp = val; | ||
1331 | *ppos = pos; | ||
1361 | return restart_syscall(); | 1332 | return restart_syscall(); |
1333 | } | ||
1362 | if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { | 1334 | if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { |
1363 | inet_forward_change(net); | 1335 | inet_forward_change(net); |
1364 | } else if (*valp) { | 1336 | } else if (*valp) { |
@@ -1390,57 +1362,37 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write, | |||
1390 | return ret; | 1362 | return ret; |
1391 | } | 1363 | } |
1392 | 1364 | ||
1393 | int ipv4_doint_and_flush_strategy(ctl_table *table, | 1365 | #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \ |
1394 | void __user *oldval, size_t __user *oldlenp, | ||
1395 | void __user *newval, size_t newlen) | ||
1396 | { | ||
1397 | int ret = devinet_conf_sysctl(table, oldval, oldlenp, newval, newlen); | ||
1398 | struct net *net = table->extra2; | ||
1399 | |||
1400 | if (ret == 1) | ||
1401 | rt_cache_flush(net, 0); | ||
1402 | |||
1403 | return ret; | ||
1404 | } | ||
1405 | |||
1406 | |||
1407 | #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \ | ||
1408 | { \ | 1366 | { \ |
1409 | .ctl_name = NET_IPV4_CONF_ ## attr, \ | ||
1410 | .procname = name, \ | 1367 | .procname = name, \ |
1411 | .data = ipv4_devconf.data + \ | 1368 | .data = ipv4_devconf.data + \ |
1412 | NET_IPV4_CONF_ ## attr - 1, \ | 1369 | IPV4_DEVCONF_ ## attr - 1, \ |
1413 | .maxlen = sizeof(int), \ | 1370 | .maxlen = sizeof(int), \ |
1414 | .mode = mval, \ | 1371 | .mode = mval, \ |
1415 | .proc_handler = proc, \ | 1372 | .proc_handler = proc, \ |
1416 | .strategy = sysctl, \ | ||
1417 | .extra1 = &ipv4_devconf, \ | 1373 | .extra1 = &ipv4_devconf, \ |
1418 | } | 1374 | } |
1419 | 1375 | ||
1420 | #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \ | 1376 | #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \ |
1421 | DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \ | 1377 | DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc) |
1422 | devinet_conf_sysctl) | ||
1423 | 1378 | ||
1424 | #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \ | 1379 | #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \ |
1425 | DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \ | 1380 | DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc) |
1426 | devinet_conf_sysctl) | ||
1427 | 1381 | ||
1428 | #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \ | 1382 | #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \ |
1429 | DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl) | 1383 | DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc) |
1430 | 1384 | ||
1431 | #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \ | 1385 | #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \ |
1432 | DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \ | 1386 | DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush) |
1433 | ipv4_doint_and_flush_strategy) | ||
1434 | 1387 | ||
1435 | static struct devinet_sysctl_table { | 1388 | static struct devinet_sysctl_table { |
1436 | struct ctl_table_header *sysctl_header; | 1389 | struct ctl_table_header *sysctl_header; |
1437 | struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX]; | 1390 | struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX]; |
1438 | char *dev_name; | 1391 | char *dev_name; |
1439 | } devinet_sysctl = { | 1392 | } devinet_sysctl = { |
1440 | .devinet_vars = { | 1393 | .devinet_vars = { |
1441 | DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", | 1394 | DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", |
1442 | devinet_sysctl_forward, | 1395 | devinet_sysctl_forward), |
1443 | devinet_conf_sysctl), | ||
1444 | DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"), | 1396 | DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"), |
1445 | 1397 | ||
1446 | DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"), | 1398 | DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"), |
@@ -1450,6 +1402,8 @@ static struct devinet_sysctl_table { | |||
1450 | DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"), | 1402 | DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"), |
1451 | DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, | 1403 | DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, |
1452 | "accept_source_route"), | 1404 | "accept_source_route"), |
1405 | DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"), | ||
1406 | DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"), | ||
1453 | DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), | 1407 | DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), |
1454 | DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), | 1408 | DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), |
1455 | DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), | 1409 | DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), |
@@ -1460,6 +1414,7 @@ static struct devinet_sysctl_table { | |||
1460 | DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), | 1414 | DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), |
1461 | DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), | 1415 | DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), |
1462 | DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), | 1416 | DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), |
1417 | DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"), | ||
1463 | 1418 | ||
1464 | DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), | 1419 | DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), |
1465 | DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), | 1420 | DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), |
@@ -1471,7 +1426,7 @@ static struct devinet_sysctl_table { | |||
1471 | }; | 1426 | }; |
1472 | 1427 | ||
1473 | static int __devinet_sysctl_register(struct net *net, char *dev_name, | 1428 | static int __devinet_sysctl_register(struct net *net, char *dev_name, |
1474 | int ctl_name, struct ipv4_devconf *p) | 1429 | struct ipv4_devconf *p) |
1475 | { | 1430 | { |
1476 | int i; | 1431 | int i; |
1477 | struct devinet_sysctl_table *t; | 1432 | struct devinet_sysctl_table *t; |
@@ -1479,9 +1434,9 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, | |||
1479 | #define DEVINET_CTL_PATH_DEV 3 | 1434 | #define DEVINET_CTL_PATH_DEV 3 |
1480 | 1435 | ||
1481 | struct ctl_path devinet_ctl_path[] = { | 1436 | struct ctl_path devinet_ctl_path[] = { |
1482 | { .procname = "net", .ctl_name = CTL_NET, }, | 1437 | { .procname = "net", }, |
1483 | { .procname = "ipv4", .ctl_name = NET_IPV4, }, | 1438 | { .procname = "ipv4", }, |
1484 | { .procname = "conf", .ctl_name = NET_IPV4_CONF, }, | 1439 | { .procname = "conf", }, |
1485 | { /* to be set */ }, | 1440 | { /* to be set */ }, |
1486 | { }, | 1441 | { }, |
1487 | }; | 1442 | }; |
@@ -1506,7 +1461,6 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, | |||
1506 | goto free; | 1461 | goto free; |
1507 | 1462 | ||
1508 | devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name; | 1463 | devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name; |
1509 | devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name; | ||
1510 | 1464 | ||
1511 | t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path, | 1465 | t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path, |
1512 | t->devinet_vars); | 1466 | t->devinet_vars); |
@@ -1539,10 +1493,9 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) | |||
1539 | 1493 | ||
1540 | static void devinet_sysctl_register(struct in_device *idev) | 1494 | static void devinet_sysctl_register(struct in_device *idev) |
1541 | { | 1495 | { |
1542 | neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4, | 1496 | neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL); |
1543 | NET_IPV4_NEIGH, "ipv4", NULL, NULL); | ||
1544 | __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, | 1497 | __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, |
1545 | idev->dev->ifindex, &idev->cnf); | 1498 | &idev->cnf); |
1546 | } | 1499 | } |
1547 | 1500 | ||
1548 | static void devinet_sysctl_unregister(struct in_device *idev) | 1501 | static void devinet_sysctl_unregister(struct in_device *idev) |
@@ -1553,14 +1506,12 @@ static void devinet_sysctl_unregister(struct in_device *idev) | |||
1553 | 1506 | ||
1554 | static struct ctl_table ctl_forward_entry[] = { | 1507 | static struct ctl_table ctl_forward_entry[] = { |
1555 | { | 1508 | { |
1556 | .ctl_name = NET_IPV4_FORWARD, | ||
1557 | .procname = "ip_forward", | 1509 | .procname = "ip_forward", |
1558 | .data = &ipv4_devconf.data[ | 1510 | .data = &ipv4_devconf.data[ |
1559 | NET_IPV4_CONF_FORWARDING - 1], | 1511 | IPV4_DEVCONF_FORWARDING - 1], |
1560 | .maxlen = sizeof(int), | 1512 | .maxlen = sizeof(int), |
1561 | .mode = 0644, | 1513 | .mode = 0644, |
1562 | .proc_handler = devinet_sysctl_forward, | 1514 | .proc_handler = devinet_sysctl_forward, |
1563 | .strategy = devinet_conf_sysctl, | ||
1564 | .extra1 = &ipv4_devconf, | 1515 | .extra1 = &ipv4_devconf, |
1565 | .extra2 = &init_net, | 1516 | .extra2 = &init_net, |
1566 | }, | 1517 | }, |
@@ -1568,8 +1519,8 @@ static struct ctl_table ctl_forward_entry[] = { | |||
1568 | }; | 1519 | }; |
1569 | 1520 | ||
1570 | static __net_initdata struct ctl_path net_ipv4_path[] = { | 1521 | static __net_initdata struct ctl_path net_ipv4_path[] = { |
1571 | { .procname = "net", .ctl_name = CTL_NET, }, | 1522 | { .procname = "net", }, |
1572 | { .procname = "ipv4", .ctl_name = NET_IPV4, }, | 1523 | { .procname = "ipv4", }, |
1573 | { }, | 1524 | { }, |
1574 | }; | 1525 | }; |
1575 | #endif | 1526 | #endif |
@@ -1587,7 +1538,7 @@ static __net_init int devinet_init_net(struct net *net) | |||
1587 | all = &ipv4_devconf; | 1538 | all = &ipv4_devconf; |
1588 | dflt = &ipv4_devconf_dflt; | 1539 | dflt = &ipv4_devconf_dflt; |
1589 | 1540 | ||
1590 | if (net != &init_net) { | 1541 | if (!net_eq(net, &init_net)) { |
1591 | all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL); | 1542 | all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL); |
1592 | if (all == NULL) | 1543 | if (all == NULL) |
1593 | goto err_alloc_all; | 1544 | goto err_alloc_all; |
@@ -1601,20 +1552,18 @@ static __net_init int devinet_init_net(struct net *net) | |||
1601 | if (tbl == NULL) | 1552 | if (tbl == NULL) |
1602 | goto err_alloc_ctl; | 1553 | goto err_alloc_ctl; |
1603 | 1554 | ||
1604 | tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1]; | 1555 | tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1]; |
1605 | tbl[0].extra1 = all; | 1556 | tbl[0].extra1 = all; |
1606 | tbl[0].extra2 = net; | 1557 | tbl[0].extra2 = net; |
1607 | #endif | 1558 | #endif |
1608 | } | 1559 | } |
1609 | 1560 | ||
1610 | #ifdef CONFIG_SYSCTL | 1561 | #ifdef CONFIG_SYSCTL |
1611 | err = __devinet_sysctl_register(net, "all", | 1562 | err = __devinet_sysctl_register(net, "all", all); |
1612 | NET_PROTO_CONF_ALL, all); | ||
1613 | if (err < 0) | 1563 | if (err < 0) |
1614 | goto err_reg_all; | 1564 | goto err_reg_all; |
1615 | 1565 | ||
1616 | err = __devinet_sysctl_register(net, "default", | 1566 | err = __devinet_sysctl_register(net, "default", dflt); |
1617 | NET_PROTO_CONF_DEFAULT, dflt); | ||
1618 | if (err < 0) | 1567 | if (err < 0) |
1619 | goto err_reg_dflt; | 1568 | goto err_reg_dflt; |
1620 | 1569 | ||
@@ -1680,8 +1629,3 @@ void __init devinet_init(void) | |||
1680 | rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); | 1629 | rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); |
1681 | } | 1630 | } |
1682 | 1631 | ||
1683 | EXPORT_SYMBOL(in_dev_finish_destroy); | ||
1684 | EXPORT_SYMBOL(inet_select_addr); | ||
1685 | EXPORT_SYMBOL(inetdev_by_index); | ||
1686 | EXPORT_SYMBOL(register_inetaddr_notifier); | ||
1687 | EXPORT_SYMBOL(unregister_inetaddr_notifier); | ||
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 12f7287e902d..14ca1f1c3fb0 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c | |||
@@ -422,7 +422,7 @@ static void esp4_err(struct sk_buff *skb, u32 info) | |||
422 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) | 422 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) |
423 | return; | 423 | return; |
424 | 424 | ||
425 | x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); | 425 | x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); |
426 | if (!x) | 426 | if (!x) |
427 | return; | 427 | return; |
428 | NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", | 428 | NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", |
@@ -530,7 +530,7 @@ static int esp_init_authenc(struct xfrm_state *x) | |||
530 | } | 530 | } |
531 | 531 | ||
532 | err = crypto_aead_setauthsize( | 532 | err = crypto_aead_setauthsize( |
533 | aead, aalg_desc->uinfo.auth.icv_truncbits / 8); | 533 | aead, x->aalg->alg_trunc_len / 8); |
534 | if (err) | 534 | if (err) |
535 | goto free_key; | 535 | goto free_key; |
536 | } | 536 | } |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index aa00398be80e..4f0ed458c883 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/skbuff.h> | 34 | #include <linux/skbuff.h> |
35 | #include <linux/init.h> | 35 | #include <linux/init.h> |
36 | #include <linux/list.h> | 36 | #include <linux/list.h> |
37 | #include <linux/slab.h> | ||
37 | 38 | ||
38 | #include <net/ip.h> | 39 | #include <net/ip.h> |
39 | #include <net/protocol.h> | 40 | #include <net/protocol.h> |
@@ -125,7 +126,7 @@ void fib_select_default(struct net *net, | |||
125 | #endif | 126 | #endif |
126 | tb = fib_get_table(net, table); | 127 | tb = fib_get_table(net, table); |
127 | if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | 128 | if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) |
128 | tb->tb_select_default(tb, flp, res); | 129 | fib_table_select_default(tb, flp, res); |
129 | } | 130 | } |
130 | 131 | ||
131 | static void fib_flush(struct net *net) | 132 | static void fib_flush(struct net *net) |
@@ -139,7 +140,7 @@ static void fib_flush(struct net *net) | |||
139 | for (h = 0; h < FIB_TABLE_HASHSZ; h++) { | 140 | for (h = 0; h < FIB_TABLE_HASHSZ; h++) { |
140 | head = &net->ipv4.fib_table_hash[h]; | 141 | head = &net->ipv4.fib_table_hash[h]; |
141 | hlist_for_each_entry(tb, node, head, tb_hlist) | 142 | hlist_for_each_entry(tb, node, head, tb_hlist) |
142 | flushed += tb->tb_flush(tb); | 143 | flushed += fib_table_flush(tb); |
143 | } | 144 | } |
144 | 145 | ||
145 | if (flushed) | 146 | if (flushed) |
@@ -162,7 +163,7 @@ struct net_device * ip_dev_find(struct net *net, __be32 addr) | |||
162 | #endif | 163 | #endif |
163 | 164 | ||
164 | local_table = fib_get_table(net, RT_TABLE_LOCAL); | 165 | local_table = fib_get_table(net, RT_TABLE_LOCAL); |
165 | if (!local_table || local_table->tb_lookup(local_table, &fl, &res)) | 166 | if (!local_table || fib_table_lookup(local_table, &fl, &res)) |
166 | return NULL; | 167 | return NULL; |
167 | if (res.type != RTN_LOCAL) | 168 | if (res.type != RTN_LOCAL) |
168 | goto out; | 169 | goto out; |
@@ -200,7 +201,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net, | |||
200 | local_table = fib_get_table(net, RT_TABLE_LOCAL); | 201 | local_table = fib_get_table(net, RT_TABLE_LOCAL); |
201 | if (local_table) { | 202 | if (local_table) { |
202 | ret = RTN_UNICAST; | 203 | ret = RTN_UNICAST; |
203 | if (!local_table->tb_lookup(local_table, &fl, &res)) { | 204 | if (!fib_table_lookup(local_table, &fl, &res)) { |
204 | if (!dev || dev == res.fi->fib_dev) | 205 | if (!dev || dev == res.fi->fib_dev) |
205 | ret = res.type; | 206 | ret = res.type; |
206 | fib_res_put(&res); | 207 | fib_res_put(&res); |
@@ -241,16 +242,19 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
241 | .iif = oif }; | 242 | .iif = oif }; |
242 | 243 | ||
243 | struct fib_result res; | 244 | struct fib_result res; |
244 | int no_addr, rpf; | 245 | int no_addr, rpf, accept_local; |
245 | int ret; | 246 | int ret; |
246 | struct net *net; | 247 | struct net *net; |
247 | 248 | ||
248 | no_addr = rpf = 0; | 249 | no_addr = rpf = accept_local = 0; |
249 | rcu_read_lock(); | 250 | rcu_read_lock(); |
250 | in_dev = __in_dev_get_rcu(dev); | 251 | in_dev = __in_dev_get_rcu(dev); |
251 | if (in_dev) { | 252 | if (in_dev) { |
252 | no_addr = in_dev->ifa_list == NULL; | 253 | no_addr = in_dev->ifa_list == NULL; |
253 | rpf = IN_DEV_RPFILTER(in_dev); | 254 | rpf = IN_DEV_RPFILTER(in_dev); |
255 | accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); | ||
256 | if (mark && !IN_DEV_SRC_VMARK(in_dev)) | ||
257 | fl.mark = 0; | ||
254 | } | 258 | } |
255 | rcu_read_unlock(); | 259 | rcu_read_unlock(); |
256 | 260 | ||
@@ -260,8 +264,10 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
260 | net = dev_net(dev); | 264 | net = dev_net(dev); |
261 | if (fib_lookup(net, &fl, &res)) | 265 | if (fib_lookup(net, &fl, &res)) |
262 | goto last_resort; | 266 | goto last_resort; |
263 | if (res.type != RTN_UNICAST) | 267 | if (res.type != RTN_UNICAST) { |
264 | goto e_inval_res; | 268 | if (res.type != RTN_LOCAL || !accept_local) |
269 | goto e_inval_res; | ||
270 | } | ||
265 | *spec_dst = FIB_RES_PREFSRC(res); | 271 | *spec_dst = FIB_RES_PREFSRC(res); |
266 | fib_combine_itag(itag, &res); | 272 | fib_combine_itag(itag, &res); |
267 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 273 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
@@ -476,13 +482,13 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
476 | if (cmd == SIOCDELRT) { | 482 | if (cmd == SIOCDELRT) { |
477 | tb = fib_get_table(net, cfg.fc_table); | 483 | tb = fib_get_table(net, cfg.fc_table); |
478 | if (tb) | 484 | if (tb) |
479 | err = tb->tb_delete(tb, &cfg); | 485 | err = fib_table_delete(tb, &cfg); |
480 | else | 486 | else |
481 | err = -ESRCH; | 487 | err = -ESRCH; |
482 | } else { | 488 | } else { |
483 | tb = fib_new_table(net, cfg.fc_table); | 489 | tb = fib_new_table(net, cfg.fc_table); |
484 | if (tb) | 490 | if (tb) |
485 | err = tb->tb_insert(tb, &cfg); | 491 | err = fib_table_insert(tb, &cfg); |
486 | else | 492 | else |
487 | err = -ENOBUFS; | 493 | err = -ENOBUFS; |
488 | } | 494 | } |
@@ -597,7 +603,7 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *ar | |||
597 | goto errout; | 603 | goto errout; |
598 | } | 604 | } |
599 | 605 | ||
600 | err = tb->tb_delete(tb, &cfg); | 606 | err = fib_table_delete(tb, &cfg); |
601 | errout: | 607 | errout: |
602 | return err; | 608 | return err; |
603 | } | 609 | } |
@@ -619,7 +625,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *ar | |||
619 | goto errout; | 625 | goto errout; |
620 | } | 626 | } |
621 | 627 | ||
622 | err = tb->tb_insert(tb, &cfg); | 628 | err = fib_table_insert(tb, &cfg); |
623 | errout: | 629 | errout: |
624 | return err; | 630 | return err; |
625 | } | 631 | } |
@@ -650,7 +656,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) | |||
650 | if (dumped) | 656 | if (dumped) |
651 | memset(&cb->args[2], 0, sizeof(cb->args) - | 657 | memset(&cb->args[2], 0, sizeof(cb->args) - |
652 | 2 * sizeof(cb->args[0])); | 658 | 2 * sizeof(cb->args[0])); |
653 | if (tb->tb_dump(tb, skb, cb) < 0) | 659 | if (fib_table_dump(tb, skb, cb) < 0) |
654 | goto out; | 660 | goto out; |
655 | dumped = 1; | 661 | dumped = 1; |
656 | next: | 662 | next: |
@@ -704,9 +710,9 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad | |||
704 | cfg.fc_scope = RT_SCOPE_HOST; | 710 | cfg.fc_scope = RT_SCOPE_HOST; |
705 | 711 | ||
706 | if (cmd == RTM_NEWROUTE) | 712 | if (cmd == RTM_NEWROUTE) |
707 | tb->tb_insert(tb, &cfg); | 713 | fib_table_insert(tb, &cfg); |
708 | else | 714 | else |
709 | tb->tb_delete(tb, &cfg); | 715 | fib_table_delete(tb, &cfg); |
710 | } | 716 | } |
711 | 717 | ||
712 | void fib_add_ifaddr(struct in_ifaddr *ifa) | 718 | void fib_add_ifaddr(struct in_ifaddr *ifa) |
@@ -835,7 +841,7 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) | |||
835 | local_bh_disable(); | 841 | local_bh_disable(); |
836 | 842 | ||
837 | frn->tb_id = tb->tb_id; | 843 | frn->tb_id = tb->tb_id; |
838 | frn->err = tb->tb_lookup(tb, &fl, &res); | 844 | frn->err = fib_table_lookup(tb, &fl, &res); |
839 | 845 | ||
840 | if (!frn->err) { | 846 | if (!frn->err) { |
841 | frn->prefixlen = res.prefixlen; | 847 | frn->prefixlen = res.prefixlen; |
@@ -878,7 +884,7 @@ static void nl_fib_input(struct sk_buff *skb) | |||
878 | netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); | 884 | netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); |
879 | } | 885 | } |
880 | 886 | ||
881 | static int nl_fib_lookup_init(struct net *net) | 887 | static int __net_init nl_fib_lookup_init(struct net *net) |
882 | { | 888 | { |
883 | struct sock *sk; | 889 | struct sock *sk; |
884 | sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, | 890 | sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, |
@@ -895,11 +901,11 @@ static void nl_fib_lookup_exit(struct net *net) | |||
895 | net->ipv4.fibnl = NULL; | 901 | net->ipv4.fibnl = NULL; |
896 | } | 902 | } |
897 | 903 | ||
898 | static void fib_disable_ip(struct net_device *dev, int force) | 904 | static void fib_disable_ip(struct net_device *dev, int force, int delay) |
899 | { | 905 | { |
900 | if (fib_sync_down_dev(dev, force)) | 906 | if (fib_sync_down_dev(dev, force)) |
901 | fib_flush(dev_net(dev)); | 907 | fib_flush(dev_net(dev)); |
902 | rt_cache_flush(dev_net(dev), 0); | 908 | rt_cache_flush(dev_net(dev), delay); |
903 | arp_ifdown(dev); | 909 | arp_ifdown(dev); |
904 | } | 910 | } |
905 | 911 | ||
@@ -922,7 +928,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, | |||
922 | /* Last address was deleted from this interface. | 928 | /* Last address was deleted from this interface. |
923 | Disable IP. | 929 | Disable IP. |
924 | */ | 930 | */ |
925 | fib_disable_ip(dev, 1); | 931 | fib_disable_ip(dev, 1, 0); |
926 | } else { | 932 | } else { |
927 | rt_cache_flush(dev_net(dev), -1); | 933 | rt_cache_flush(dev_net(dev), -1); |
928 | } | 934 | } |
@@ -937,7 +943,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo | |||
937 | struct in_device *in_dev = __in_dev_get_rtnl(dev); | 943 | struct in_device *in_dev = __in_dev_get_rtnl(dev); |
938 | 944 | ||
939 | if (event == NETDEV_UNREGISTER) { | 945 | if (event == NETDEV_UNREGISTER) { |
940 | fib_disable_ip(dev, 2); | 946 | fib_disable_ip(dev, 2, -1); |
941 | return NOTIFY_DONE; | 947 | return NOTIFY_DONE; |
942 | } | 948 | } |
943 | 949 | ||
@@ -955,12 +961,15 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo | |||
955 | rt_cache_flush(dev_net(dev), -1); | 961 | rt_cache_flush(dev_net(dev), -1); |
956 | break; | 962 | break; |
957 | case NETDEV_DOWN: | 963 | case NETDEV_DOWN: |
958 | fib_disable_ip(dev, 0); | 964 | fib_disable_ip(dev, 0, 0); |
959 | break; | 965 | break; |
960 | case NETDEV_CHANGEMTU: | 966 | case NETDEV_CHANGEMTU: |
961 | case NETDEV_CHANGE: | 967 | case NETDEV_CHANGE: |
962 | rt_cache_flush(dev_net(dev), 0); | 968 | rt_cache_flush(dev_net(dev), 0); |
963 | break; | 969 | break; |
970 | case NETDEV_UNREGISTER_BATCH: | ||
971 | rt_cache_flush_batch(); | ||
972 | break; | ||
964 | } | 973 | } |
965 | return NOTIFY_DONE; | 974 | return NOTIFY_DONE; |
966 | } | 975 | } |
@@ -996,7 +1005,7 @@ fail: | |||
996 | return err; | 1005 | return err; |
997 | } | 1006 | } |
998 | 1007 | ||
999 | static void __net_exit ip_fib_net_exit(struct net *net) | 1008 | static void ip_fib_net_exit(struct net *net) |
1000 | { | 1009 | { |
1001 | unsigned int i; | 1010 | unsigned int i; |
1002 | 1011 | ||
@@ -1012,7 +1021,7 @@ static void __net_exit ip_fib_net_exit(struct net *net) | |||
1012 | head = &net->ipv4.fib_table_hash[i]; | 1021 | head = &net->ipv4.fib_table_hash[i]; |
1013 | hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { | 1022 | hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { |
1014 | hlist_del(node); | 1023 | hlist_del(node); |
1015 | tb->tb_flush(tb); | 1024 | fib_table_flush(tb); |
1016 | kfree(tb); | 1025 | kfree(tb); |
1017 | } | 1026 | } |
1018 | } | 1027 | } |
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index ecd39454235c..4ed7e0dea1bc 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/skbuff.h> | 32 | #include <linux/skbuff.h> |
33 | #include <linux/netlink.h> | 33 | #include <linux/netlink.h> |
34 | #include <linux/init.h> | 34 | #include <linux/init.h> |
35 | #include <linux/slab.h> | ||
35 | 36 | ||
36 | #include <net/net_namespace.h> | 37 | #include <net/net_namespace.h> |
37 | #include <net/ip.h> | 38 | #include <net/ip.h> |
@@ -242,8 +243,8 @@ fn_new_zone(struct fn_hash *table, int z) | |||
242 | return fz; | 243 | return fz; |
243 | } | 244 | } |
244 | 245 | ||
245 | static int | 246 | int fib_table_lookup(struct fib_table *tb, |
246 | fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) | 247 | const struct flowi *flp, struct fib_result *res) |
247 | { | 248 | { |
248 | int err; | 249 | int err; |
249 | struct fn_zone *fz; | 250 | struct fn_zone *fz; |
@@ -274,8 +275,8 @@ out: | |||
274 | return err; | 275 | return err; |
275 | } | 276 | } |
276 | 277 | ||
277 | static void | 278 | void fib_table_select_default(struct fib_table *tb, |
278 | fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) | 279 | const struct flowi *flp, struct fib_result *res) |
279 | { | 280 | { |
280 | int order, last_idx; | 281 | int order, last_idx; |
281 | struct hlist_node *node; | 282 | struct hlist_node *node; |
@@ -366,7 +367,7 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key) | |||
366 | return NULL; | 367 | return NULL; |
367 | } | 368 | } |
368 | 369 | ||
369 | static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) | 370 | int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) |
370 | { | 371 | { |
371 | struct fn_hash *table = (struct fn_hash *) tb->tb_data; | 372 | struct fn_hash *table = (struct fn_hash *) tb->tb_data; |
372 | struct fib_node *new_f = NULL; | 373 | struct fib_node *new_f = NULL; |
@@ -544,8 +545,7 @@ out: | |||
544 | return err; | 545 | return err; |
545 | } | 546 | } |
546 | 547 | ||
547 | 548 | int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) | |
548 | static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg) | ||
549 | { | 549 | { |
550 | struct fn_hash *table = (struct fn_hash *)tb->tb_data; | 550 | struct fn_hash *table = (struct fn_hash *)tb->tb_data; |
551 | struct fib_node *f; | 551 | struct fib_node *f; |
@@ -662,7 +662,7 @@ static int fn_flush_list(struct fn_zone *fz, int idx) | |||
662 | return found; | 662 | return found; |
663 | } | 663 | } |
664 | 664 | ||
665 | static int fn_hash_flush(struct fib_table *tb) | 665 | int fib_table_flush(struct fib_table *tb) |
666 | { | 666 | { |
667 | struct fn_hash *table = (struct fn_hash *) tb->tb_data; | 667 | struct fn_hash *table = (struct fn_hash *) tb->tb_data; |
668 | struct fn_zone *fz; | 668 | struct fn_zone *fz; |
@@ -743,7 +743,8 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb, | |||
743 | return skb->len; | 743 | return skb->len; |
744 | } | 744 | } |
745 | 745 | ||
746 | static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) | 746 | int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, |
747 | struct netlink_callback *cb) | ||
747 | { | 748 | { |
748 | int m, s_m; | 749 | int m, s_m; |
749 | struct fn_zone *fz; | 750 | struct fn_zone *fz; |
@@ -787,12 +788,7 @@ struct fib_table *fib_hash_table(u32 id) | |||
787 | 788 | ||
788 | tb->tb_id = id; | 789 | tb->tb_id = id; |
789 | tb->tb_default = -1; | 790 | tb->tb_default = -1; |
790 | tb->tb_lookup = fn_hash_lookup; | 791 | |
791 | tb->tb_insert = fn_hash_insert; | ||
792 | tb->tb_delete = fn_hash_delete; | ||
793 | tb->tb_flush = fn_hash_flush; | ||
794 | tb->tb_select_default = fn_hash_select_default; | ||
795 | tb->tb_dump = fn_hash_dump; | ||
796 | memset(tb->tb_data, 0, sizeof(struct fn_hash)); | 792 | memset(tb->tb_data, 0, sizeof(struct fn_hash)); |
797 | return tb; | 793 | return tb; |
798 | } | 794 | } |
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 92d9d97ec5e3..ca2d07b1c706 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c | |||
@@ -94,7 +94,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, | |||
94 | if ((tbl = fib_get_table(rule->fr_net, rule->table)) == NULL) | 94 | if ((tbl = fib_get_table(rule->fr_net, rule->table)) == NULL) |
95 | goto errout; | 95 | goto errout; |
96 | 96 | ||
97 | err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result); | 97 | err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result); |
98 | if (err > 0) | 98 | if (err > 0) |
99 | err = -EAGAIN; | 99 | err = -EAGAIN; |
100 | errout: | 100 | errout: |
@@ -284,7 +284,7 @@ static int fib_default_rules_init(struct fib_rules_ops *ops) | |||
284 | { | 284 | { |
285 | int err; | 285 | int err; |
286 | 286 | ||
287 | err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL, FIB_RULE_PERMANENT); | 287 | err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL, 0); |
288 | if (err < 0) | 288 | if (err < 0) |
289 | return err; | 289 | return err; |
290 | err = fib_default_rule_add(ops, 0x7FFE, RT_TABLE_MAIN, 0); | 290 | err = fib_default_rule_add(ops, 0x7FFE, RT_TABLE_MAIN, 0); |
@@ -301,13 +301,9 @@ int __net_init fib4_rules_init(struct net *net) | |||
301 | int err; | 301 | int err; |
302 | struct fib_rules_ops *ops; | 302 | struct fib_rules_ops *ops; |
303 | 303 | ||
304 | ops = kmemdup(&fib4_rules_ops_template, sizeof(*ops), GFP_KERNEL); | 304 | ops = fib_rules_register(&fib4_rules_ops_template, net); |
305 | if (ops == NULL) | 305 | if (IS_ERR(ops)) |
306 | return -ENOMEM; | 306 | return PTR_ERR(ops); |
307 | INIT_LIST_HEAD(&ops->rules_list); | ||
308 | ops->fro_net = net; | ||
309 | |||
310 | fib_rules_register(ops); | ||
311 | 307 | ||
312 | err = fib_default_rules_init(ops); | 308 | err = fib_default_rules_init(ops); |
313 | if (err < 0) | 309 | if (err < 0) |
@@ -318,12 +314,10 @@ int __net_init fib4_rules_init(struct net *net) | |||
318 | fail: | 314 | fail: |
319 | /* also cleans all rules already added */ | 315 | /* also cleans all rules already added */ |
320 | fib_rules_unregister(ops); | 316 | fib_rules_unregister(ops); |
321 | kfree(ops); | ||
322 | return err; | 317 | return err; |
323 | } | 318 | } |
324 | 319 | ||
325 | void __net_exit fib4_rules_exit(struct net *net) | 320 | void __net_exit fib4_rules_exit(struct net *net) |
326 | { | 321 | { |
327 | fib_rules_unregister(net->ipv4.rules_ops); | 322 | fib_rules_unregister(net->ipv4.rules_ops); |
328 | kfree(net->ipv4.rules_ops); | ||
329 | } | 323 | } |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 9b096d6ff3f2..20f09c5b31e8 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/proc_fs.h> | 32 | #include <linux/proc_fs.h> |
33 | #include <linux/skbuff.h> | 33 | #include <linux/skbuff.h> |
34 | #include <linux/init.h> | 34 | #include <linux/init.h> |
35 | #include <linux/slab.h> | ||
35 | 36 | ||
36 | #include <net/arp.h> | 37 | #include <net/arp.h> |
37 | #include <net/ip.h> | 38 | #include <net/ip.h> |
@@ -62,8 +63,8 @@ static DEFINE_SPINLOCK(fib_multipath_lock); | |||
62 | #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \ | 63 | #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \ |
63 | for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) | 64 | for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) |
64 | 65 | ||
65 | #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \ | 66 | #define change_nexthops(fi) { int nhsel; struct fib_nh *nexthop_nh; \ |
66 | for (nhsel=0, nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) | 67 | for (nhsel=0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nexthop_nh++, nhsel++) |
67 | 68 | ||
68 | #else /* CONFIG_IP_ROUTE_MULTIPATH */ | 69 | #else /* CONFIG_IP_ROUTE_MULTIPATH */ |
69 | 70 | ||
@@ -72,7 +73,7 @@ for (nhsel=0, nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, | |||
72 | #define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \ | 73 | #define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \ |
73 | for (nhsel=0; nhsel < 1; nhsel++) | 74 | for (nhsel=0; nhsel < 1; nhsel++) |
74 | 75 | ||
75 | #define change_nexthops(fi) { int nhsel = 0; struct fib_nh * nh = (struct fib_nh *)((fi)->fib_nh); \ | 76 | #define change_nexthops(fi) { int nhsel = 0; struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ |
76 | for (nhsel=0; nhsel < 1; nhsel++) | 77 | for (nhsel=0; nhsel < 1; nhsel++) |
77 | 78 | ||
78 | #endif /* CONFIG_IP_ROUTE_MULTIPATH */ | 79 | #endif /* CONFIG_IP_ROUTE_MULTIPATH */ |
@@ -145,9 +146,9 @@ void free_fib_info(struct fib_info *fi) | |||
145 | return; | 146 | return; |
146 | } | 147 | } |
147 | change_nexthops(fi) { | 148 | change_nexthops(fi) { |
148 | if (nh->nh_dev) | 149 | if (nexthop_nh->nh_dev) |
149 | dev_put(nh->nh_dev); | 150 | dev_put(nexthop_nh->nh_dev); |
150 | nh->nh_dev = NULL; | 151 | nexthop_nh->nh_dev = NULL; |
151 | } endfor_nexthops(fi); | 152 | } endfor_nexthops(fi); |
152 | fib_info_cnt--; | 153 | fib_info_cnt--; |
153 | release_net(fi->fib_net); | 154 | release_net(fi->fib_net); |
@@ -162,9 +163,9 @@ void fib_release_info(struct fib_info *fi) | |||
162 | if (fi->fib_prefsrc) | 163 | if (fi->fib_prefsrc) |
163 | hlist_del(&fi->fib_lhash); | 164 | hlist_del(&fi->fib_lhash); |
164 | change_nexthops(fi) { | 165 | change_nexthops(fi) { |
165 | if (!nh->nh_dev) | 166 | if (!nexthop_nh->nh_dev) |
166 | continue; | 167 | continue; |
167 | hlist_del(&nh->nh_hash); | 168 | hlist_del(&nexthop_nh->nh_hash); |
168 | } endfor_nexthops(fi) | 169 | } endfor_nexthops(fi) |
169 | fi->fib_dead = 1; | 170 | fi->fib_dead = 1; |
170 | fib_info_put(fi); | 171 | fib_info_put(fi); |
@@ -228,7 +229,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) | |||
228 | head = &fib_info_hash[hash]; | 229 | head = &fib_info_hash[hash]; |
229 | 230 | ||
230 | hlist_for_each_entry(fi, node, head, fib_hash) { | 231 | hlist_for_each_entry(fi, node, head, fib_hash) { |
231 | if (fi->fib_net != nfi->fib_net) | 232 | if (!net_eq(fi->fib_net, nfi->fib_net)) |
232 | continue; | 233 | continue; |
233 | if (fi->fib_nhs != nfi->fib_nhs) | 234 | if (fi->fib_nhs != nfi->fib_nhs) |
234 | continue; | 235 | continue; |
@@ -395,19 +396,20 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, | |||
395 | if (!rtnh_ok(rtnh, remaining)) | 396 | if (!rtnh_ok(rtnh, remaining)) |
396 | return -EINVAL; | 397 | return -EINVAL; |
397 | 398 | ||
398 | nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; | 399 | nexthop_nh->nh_flags = |
399 | nh->nh_oif = rtnh->rtnh_ifindex; | 400 | (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; |
400 | nh->nh_weight = rtnh->rtnh_hops + 1; | 401 | nexthop_nh->nh_oif = rtnh->rtnh_ifindex; |
402 | nexthop_nh->nh_weight = rtnh->rtnh_hops + 1; | ||
401 | 403 | ||
402 | attrlen = rtnh_attrlen(rtnh); | 404 | attrlen = rtnh_attrlen(rtnh); |
403 | if (attrlen > 0) { | 405 | if (attrlen > 0) { |
404 | struct nlattr *nla, *attrs = rtnh_attrs(rtnh); | 406 | struct nlattr *nla, *attrs = rtnh_attrs(rtnh); |
405 | 407 | ||
406 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); | 408 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); |
407 | nh->nh_gw = nla ? nla_get_be32(nla) : 0; | 409 | nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; |
408 | #ifdef CONFIG_NET_CLS_ROUTE | 410 | #ifdef CONFIG_NET_CLS_ROUTE |
409 | nla = nla_find(attrs, attrlen, RTA_FLOW); | 411 | nla = nla_find(attrs, attrlen, RTA_FLOW); |
410 | nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; | 412 | nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; |
411 | #endif | 413 | #endif |
412 | } | 414 | } |
413 | 415 | ||
@@ -527,10 +529,6 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, | |||
527 | if (nh->nh_gw) { | 529 | if (nh->nh_gw) { |
528 | struct fib_result res; | 530 | struct fib_result res; |
529 | 531 | ||
530 | #ifdef CONFIG_IP_ROUTE_PERVASIVE | ||
531 | if (nh->nh_flags&RTNH_F_PERVASIVE) | ||
532 | return 0; | ||
533 | #endif | ||
534 | if (nh->nh_flags&RTNH_F_ONLINK) { | 532 | if (nh->nh_flags&RTNH_F_ONLINK) { |
535 | struct net_device *dev; | 533 | struct net_device *dev; |
536 | 534 | ||
@@ -738,7 +736,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
738 | 736 | ||
739 | fi->fib_nhs = nhs; | 737 | fi->fib_nhs = nhs; |
740 | change_nexthops(fi) { | 738 | change_nexthops(fi) { |
741 | nh->nh_parent = fi; | 739 | nexthop_nh->nh_parent = fi; |
742 | } endfor_nexthops(fi) | 740 | } endfor_nexthops(fi) |
743 | 741 | ||
744 | if (cfg->fc_mx) { | 742 | if (cfg->fc_mx) { |
@@ -808,7 +806,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
808 | goto failure; | 806 | goto failure; |
809 | } else { | 807 | } else { |
810 | change_nexthops(fi) { | 808 | change_nexthops(fi) { |
811 | if ((err = fib_check_nh(cfg, fi, nh)) != 0) | 809 | if ((err = fib_check_nh(cfg, fi, nexthop_nh)) != 0) |
812 | goto failure; | 810 | goto failure; |
813 | } endfor_nexthops(fi) | 811 | } endfor_nexthops(fi) |
814 | } | 812 | } |
@@ -843,11 +841,11 @@ link_it: | |||
843 | struct hlist_head *head; | 841 | struct hlist_head *head; |
844 | unsigned int hash; | 842 | unsigned int hash; |
845 | 843 | ||
846 | if (!nh->nh_dev) | 844 | if (!nexthop_nh->nh_dev) |
847 | continue; | 845 | continue; |
848 | hash = fib_devindex_hashfn(nh->nh_dev->ifindex); | 846 | hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex); |
849 | head = &fib_info_devhash[hash]; | 847 | head = &fib_info_devhash[hash]; |
850 | hlist_add_head(&nh->nh_hash, head); | 848 | hlist_add_head(&nexthop_nh->nh_hash, head); |
851 | } endfor_nexthops(fi) | 849 | } endfor_nexthops(fi) |
852 | spin_unlock_bh(&fib_info_lock); | 850 | spin_unlock_bh(&fib_info_lock); |
853 | return fi; | 851 | return fi; |
@@ -1047,7 +1045,7 @@ int fib_sync_down_addr(struct net *net, __be32 local) | |||
1047 | return 0; | 1045 | return 0; |
1048 | 1046 | ||
1049 | hlist_for_each_entry(fi, node, head, fib_lhash) { | 1047 | hlist_for_each_entry(fi, node, head, fib_lhash) { |
1050 | if (fi->fib_net != net) | 1048 | if (!net_eq(fi->fib_net, net)) |
1051 | continue; | 1049 | continue; |
1052 | if (fi->fib_prefsrc == local) { | 1050 | if (fi->fib_prefsrc == local) { |
1053 | fi->fib_flags |= RTNH_F_DEAD; | 1051 | fi->fib_flags |= RTNH_F_DEAD; |
@@ -1080,21 +1078,21 @@ int fib_sync_down_dev(struct net_device *dev, int force) | |||
1080 | prev_fi = fi; | 1078 | prev_fi = fi; |
1081 | dead = 0; | 1079 | dead = 0; |
1082 | change_nexthops(fi) { | 1080 | change_nexthops(fi) { |
1083 | if (nh->nh_flags&RTNH_F_DEAD) | 1081 | if (nexthop_nh->nh_flags&RTNH_F_DEAD) |
1084 | dead++; | 1082 | dead++; |
1085 | else if (nh->nh_dev == dev && | 1083 | else if (nexthop_nh->nh_dev == dev && |
1086 | nh->nh_scope != scope) { | 1084 | nexthop_nh->nh_scope != scope) { |
1087 | nh->nh_flags |= RTNH_F_DEAD; | 1085 | nexthop_nh->nh_flags |= RTNH_F_DEAD; |
1088 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 1086 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
1089 | spin_lock_bh(&fib_multipath_lock); | 1087 | spin_lock_bh(&fib_multipath_lock); |
1090 | fi->fib_power -= nh->nh_power; | 1088 | fi->fib_power -= nexthop_nh->nh_power; |
1091 | nh->nh_power = 0; | 1089 | nexthop_nh->nh_power = 0; |
1092 | spin_unlock_bh(&fib_multipath_lock); | 1090 | spin_unlock_bh(&fib_multipath_lock); |
1093 | #endif | 1091 | #endif |
1094 | dead++; | 1092 | dead++; |
1095 | } | 1093 | } |
1096 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 1094 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
1097 | if (force > 1 && nh->nh_dev == dev) { | 1095 | if (force > 1 && nexthop_nh->nh_dev == dev) { |
1098 | dead = fi->fib_nhs; | 1096 | dead = fi->fib_nhs; |
1099 | break; | 1097 | break; |
1100 | } | 1098 | } |
@@ -1144,18 +1142,20 @@ int fib_sync_up(struct net_device *dev) | |||
1144 | prev_fi = fi; | 1142 | prev_fi = fi; |
1145 | alive = 0; | 1143 | alive = 0; |
1146 | change_nexthops(fi) { | 1144 | change_nexthops(fi) { |
1147 | if (!(nh->nh_flags&RTNH_F_DEAD)) { | 1145 | if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) { |
1148 | alive++; | 1146 | alive++; |
1149 | continue; | 1147 | continue; |
1150 | } | 1148 | } |
1151 | if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) | 1149 | if (nexthop_nh->nh_dev == NULL || |
1150 | !(nexthop_nh->nh_dev->flags&IFF_UP)) | ||
1152 | continue; | 1151 | continue; |
1153 | if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev)) | 1152 | if (nexthop_nh->nh_dev != dev || |
1153 | !__in_dev_get_rtnl(dev)) | ||
1154 | continue; | 1154 | continue; |
1155 | alive++; | 1155 | alive++; |
1156 | spin_lock_bh(&fib_multipath_lock); | 1156 | spin_lock_bh(&fib_multipath_lock); |
1157 | nh->nh_power = 0; | 1157 | nexthop_nh->nh_power = 0; |
1158 | nh->nh_flags &= ~RTNH_F_DEAD; | 1158 | nexthop_nh->nh_flags &= ~RTNH_F_DEAD; |
1159 | spin_unlock_bh(&fib_multipath_lock); | 1159 | spin_unlock_bh(&fib_multipath_lock); |
1160 | } endfor_nexthops(fi) | 1160 | } endfor_nexthops(fi) |
1161 | 1161 | ||
@@ -1182,9 +1182,9 @@ void fib_select_multipath(const struct flowi *flp, struct fib_result *res) | |||
1182 | if (fi->fib_power <= 0) { | 1182 | if (fi->fib_power <= 0) { |
1183 | int power = 0; | 1183 | int power = 0; |
1184 | change_nexthops(fi) { | 1184 | change_nexthops(fi) { |
1185 | if (!(nh->nh_flags&RTNH_F_DEAD)) { | 1185 | if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) { |
1186 | power += nh->nh_weight; | 1186 | power += nexthop_nh->nh_weight; |
1187 | nh->nh_power = nh->nh_weight; | 1187 | nexthop_nh->nh_power = nexthop_nh->nh_weight; |
1188 | } | 1188 | } |
1189 | } endfor_nexthops(fi); | 1189 | } endfor_nexthops(fi); |
1190 | fi->fib_power = power; | 1190 | fi->fib_power = power; |
@@ -1204,9 +1204,10 @@ void fib_select_multipath(const struct flowi *flp, struct fib_result *res) | |||
1204 | w = jiffies % fi->fib_power; | 1204 | w = jiffies % fi->fib_power; |
1205 | 1205 | ||
1206 | change_nexthops(fi) { | 1206 | change_nexthops(fi) { |
1207 | if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { | 1207 | if (!(nexthop_nh->nh_flags&RTNH_F_DEAD) && |
1208 | if ((w -= nh->nh_power) <= 0) { | 1208 | nexthop_nh->nh_power) { |
1209 | nh->nh_power--; | 1209 | if ((w -= nexthop_nh->nh_power) <= 0) { |
1210 | nexthop_nh->nh_power--; | ||
1210 | fi->fib_power--; | 1211 | fi->fib_power--; |
1211 | res->nh_sel = nhsel; | 1212 | res->nh_sel = nhsel; |
1212 | spin_unlock_bh(&fib_multipath_lock); | 1213 | spin_unlock_bh(&fib_multipath_lock); |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 291bdf50a21f..c98f115fb0fd 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -71,6 +71,7 @@ | |||
71 | #include <linux/netlink.h> | 71 | #include <linux/netlink.h> |
72 | #include <linux/init.h> | 72 | #include <linux/init.h> |
73 | #include <linux/list.h> | 73 | #include <linux/list.h> |
74 | #include <linux/slab.h> | ||
74 | #include <net/net_namespace.h> | 75 | #include <net/net_namespace.h> |
75 | #include <net/ip.h> | 76 | #include <net/ip.h> |
76 | #include <net/protocol.h> | 77 | #include <net/protocol.h> |
@@ -208,7 +209,9 @@ static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) | |||
208 | { | 209 | { |
209 | struct node *ret = tnode_get_child(tn, i); | 210 | struct node *ret = tnode_get_child(tn, i); |
210 | 211 | ||
211 | return rcu_dereference(ret); | 212 | return rcu_dereference_check(ret, |
213 | rcu_read_lock_held() || | ||
214 | lockdep_rtnl_is_held()); | ||
212 | } | 215 | } |
213 | 216 | ||
214 | static inline int tnode_child_length(const struct tnode *tn) | 217 | static inline int tnode_child_length(const struct tnode *tn) |
@@ -961,7 +964,9 @@ fib_find_node(struct trie *t, u32 key) | |||
961 | struct node *n; | 964 | struct node *n; |
962 | 965 | ||
963 | pos = 0; | 966 | pos = 0; |
964 | n = rcu_dereference(t->trie); | 967 | n = rcu_dereference_check(t->trie, |
968 | rcu_read_lock_held() || | ||
969 | lockdep_rtnl_is_held()); | ||
965 | 970 | ||
966 | while (n != NULL && NODE_TYPE(n) == T_TNODE) { | 971 | while (n != NULL && NODE_TYPE(n) == T_TNODE) { |
967 | tn = (struct tnode *) n; | 972 | tn = (struct tnode *) n; |
@@ -1174,7 +1179,7 @@ done: | |||
1174 | /* | 1179 | /* |
1175 | * Caller must hold RTNL. | 1180 | * Caller must hold RTNL. |
1176 | */ | 1181 | */ |
1177 | static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) | 1182 | int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) |
1178 | { | 1183 | { |
1179 | struct trie *t = (struct trie *) tb->tb_data; | 1184 | struct trie *t = (struct trie *) tb->tb_data; |
1180 | struct fib_alias *fa, *new_fa; | 1185 | struct fib_alias *fa, *new_fa; |
@@ -1373,8 +1378,8 @@ static int check_leaf(struct trie *t, struct leaf *l, | |||
1373 | return 1; | 1378 | return 1; |
1374 | } | 1379 | } |
1375 | 1380 | ||
1376 | static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, | 1381 | int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, |
1377 | struct fib_result *res) | 1382 | struct fib_result *res) |
1378 | { | 1383 | { |
1379 | struct trie *t = (struct trie *) tb->tb_data; | 1384 | struct trie *t = (struct trie *) tb->tb_data; |
1380 | int ret; | 1385 | int ret; |
@@ -1595,7 +1600,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l) | |||
1595 | /* | 1600 | /* |
1596 | * Caller must hold RTNL. | 1601 | * Caller must hold RTNL. |
1597 | */ | 1602 | */ |
1598 | static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) | 1603 | int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) |
1599 | { | 1604 | { |
1600 | struct trie *t = (struct trie *) tb->tb_data; | 1605 | struct trie *t = (struct trie *) tb->tb_data; |
1601 | u32 key, mask; | 1606 | u32 key, mask; |
@@ -1786,7 +1791,7 @@ static struct leaf *trie_leafindex(struct trie *t, int index) | |||
1786 | /* | 1791 | /* |
1787 | * Caller must hold RTNL. | 1792 | * Caller must hold RTNL. |
1788 | */ | 1793 | */ |
1789 | static int fn_trie_flush(struct fib_table *tb) | 1794 | int fib_table_flush(struct fib_table *tb) |
1790 | { | 1795 | { |
1791 | struct trie *t = (struct trie *) tb->tb_data; | 1796 | struct trie *t = (struct trie *) tb->tb_data; |
1792 | struct leaf *l, *ll = NULL; | 1797 | struct leaf *l, *ll = NULL; |
@@ -1807,9 +1812,9 @@ static int fn_trie_flush(struct fib_table *tb) | |||
1807 | return found; | 1812 | return found; |
1808 | } | 1813 | } |
1809 | 1814 | ||
1810 | static void fn_trie_select_default(struct fib_table *tb, | 1815 | void fib_table_select_default(struct fib_table *tb, |
1811 | const struct flowi *flp, | 1816 | const struct flowi *flp, |
1812 | struct fib_result *res) | 1817 | struct fib_result *res) |
1813 | { | 1818 | { |
1814 | struct trie *t = (struct trie *) tb->tb_data; | 1819 | struct trie *t = (struct trie *) tb->tb_data; |
1815 | int order, last_idx; | 1820 | int order, last_idx; |
@@ -1952,8 +1957,8 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb, | |||
1952 | return skb->len; | 1957 | return skb->len; |
1953 | } | 1958 | } |
1954 | 1959 | ||
1955 | static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, | 1960 | int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, |
1956 | struct netlink_callback *cb) | 1961 | struct netlink_callback *cb) |
1957 | { | 1962 | { |
1958 | struct leaf *l; | 1963 | struct leaf *l; |
1959 | struct trie *t = (struct trie *) tb->tb_data; | 1964 | struct trie *t = (struct trie *) tb->tb_data; |
@@ -2020,12 +2025,6 @@ struct fib_table *fib_hash_table(u32 id) | |||
2020 | 2025 | ||
2021 | tb->tb_id = id; | 2026 | tb->tb_id = id; |
2022 | tb->tb_default = -1; | 2027 | tb->tb_default = -1; |
2023 | tb->tb_lookup = fn_trie_lookup; | ||
2024 | tb->tb_insert = fn_trie_insert; | ||
2025 | tb->tb_delete = fn_trie_delete; | ||
2026 | tb->tb_flush = fn_trie_flush; | ||
2027 | tb->tb_select_default = fn_trie_select_default; | ||
2028 | tb->tb_dump = fn_trie_dump; | ||
2029 | 2028 | ||
2030 | t = (struct trie *) tb->tb_data; | 2029 | t = (struct trie *) tb->tb_data; |
2031 | memset(t, 0, sizeof(*t)); | 2030 | memset(t, 0, sizeof(*t)); |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 5bc13fe816d1..ac4dec132735 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -74,6 +74,7 @@ | |||
74 | #include <linux/netdevice.h> | 74 | #include <linux/netdevice.h> |
75 | #include <linux/string.h> | 75 | #include <linux/string.h> |
76 | #include <linux/netfilter_ipv4.h> | 76 | #include <linux/netfilter_ipv4.h> |
77 | #include <linux/slab.h> | ||
77 | #include <net/snmp.h> | 78 | #include <net/snmp.h> |
78 | #include <net/ip.h> | 79 | #include <net/ip.h> |
79 | #include <net/route.h> | 80 | #include <net/route.h> |
@@ -114,7 +115,7 @@ struct icmp_bxm { | |||
114 | /* An array of errno for error messages from dest unreach. */ | 115 | /* An array of errno for error messages from dest unreach. */ |
115 | /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */ | 116 | /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */ |
116 | 117 | ||
117 | struct icmp_err icmp_err_convert[] = { | 118 | const struct icmp_err icmp_err_convert[] = { |
118 | { | 119 | { |
119 | .errno = ENETUNREACH, /* ICMP_NET_UNREACH */ | 120 | .errno = ENETUNREACH, /* ICMP_NET_UNREACH */ |
120 | .fatal = 0, | 121 | .fatal = 0, |
@@ -501,15 +502,16 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
501 | if (!(rt->rt_flags & RTCF_LOCAL)) { | 502 | if (!(rt->rt_flags & RTCF_LOCAL)) { |
502 | struct net_device *dev = NULL; | 503 | struct net_device *dev = NULL; |
503 | 504 | ||
505 | rcu_read_lock(); | ||
504 | if (rt->fl.iif && | 506 | if (rt->fl.iif && |
505 | net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) | 507 | net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) |
506 | dev = dev_get_by_index(net, rt->fl.iif); | 508 | dev = dev_get_by_index_rcu(net, rt->fl.iif); |
507 | 509 | ||
508 | if (dev) { | 510 | if (dev) |
509 | saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); | 511 | saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); |
510 | dev_put(dev); | 512 | else |
511 | } else | ||
512 | saddr = 0; | 513 | saddr = 0; |
514 | rcu_read_unlock(); | ||
513 | } | 515 | } |
514 | 516 | ||
515 | tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | | 517 | tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | |
@@ -1165,6 +1167,10 @@ static int __net_init icmp_sk_init(struct net *net) | |||
1165 | sk->sk_sndbuf = | 1167 | sk->sk_sndbuf = |
1166 | (2 * ((64 * 1024) + sizeof(struct sk_buff))); | 1168 | (2 * ((64 * 1024) + sizeof(struct sk_buff))); |
1167 | 1169 | ||
1170 | /* | ||
1171 | * Speedup sock_wfree() | ||
1172 | */ | ||
1173 | sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); | ||
1168 | inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT; | 1174 | inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT; |
1169 | } | 1175 | } |
1170 | 1176 | ||
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d41e5de79a82..15d3eeda92f5 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -71,6 +71,7 @@ | |||
71 | */ | 71 | */ |
72 | 72 | ||
73 | #include <linux/module.h> | 73 | #include <linux/module.h> |
74 | #include <linux/slab.h> | ||
74 | #include <asm/uaccess.h> | 75 | #include <asm/uaccess.h> |
75 | #include <asm/system.h> | 76 | #include <asm/system.h> |
76 | #include <linux/types.h> | 77 | #include <linux/types.h> |
@@ -946,7 +947,6 @@ int igmp_rcv(struct sk_buff *skb) | |||
946 | break; | 947 | break; |
947 | case IGMP_HOST_MEMBERSHIP_REPORT: | 948 | case IGMP_HOST_MEMBERSHIP_REPORT: |
948 | case IGMPV2_HOST_MEMBERSHIP_REPORT: | 949 | case IGMPV2_HOST_MEMBERSHIP_REPORT: |
949 | case IGMPV3_HOST_MEMBERSHIP_REPORT: | ||
950 | /* Is it our report looped back? */ | 950 | /* Is it our report looped back? */ |
951 | if (skb_rtable(skb)->fl.iif == 0) | 951 | if (skb_rtable(skb)->fl.iif == 0) |
952 | break; | 952 | break; |
@@ -960,6 +960,7 @@ int igmp_rcv(struct sk_buff *skb) | |||
960 | in_dev_put(in_dev); | 960 | in_dev_put(in_dev); |
961 | return pim_rcv_v1(skb); | 961 | return pim_rcv_v1(skb); |
962 | #endif | 962 | #endif |
963 | case IGMPV3_HOST_MEMBERSHIP_REPORT: | ||
963 | case IGMP_DVMRP: | 964 | case IGMP_DVMRP: |
964 | case IGMP_TRACE: | 965 | case IGMP_TRACE: |
965 | case IGMP_HOST_LEAVE_MESSAGE: | 966 | case IGMP_HOST_LEAVE_MESSAGE: |
@@ -1799,7 +1800,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) | |||
1799 | iml->next = inet->mc_list; | 1800 | iml->next = inet->mc_list; |
1800 | iml->sflist = NULL; | 1801 | iml->sflist = NULL; |
1801 | iml->sfmode = MCAST_EXCLUDE; | 1802 | iml->sfmode = MCAST_EXCLUDE; |
1802 | inet->mc_list = iml; | 1803 | rcu_assign_pointer(inet->mc_list, iml); |
1803 | ip_mc_inc_group(in_dev, addr); | 1804 | ip_mc_inc_group(in_dev, addr); |
1804 | err = 0; | 1805 | err = 0; |
1805 | done: | 1806 | done: |
@@ -1807,24 +1808,46 @@ done: | |||
1807 | return err; | 1808 | return err; |
1808 | } | 1809 | } |
1809 | 1810 | ||
1811 | static void ip_sf_socklist_reclaim(struct rcu_head *rp) | ||
1812 | { | ||
1813 | struct ip_sf_socklist *psf; | ||
1814 | |||
1815 | psf = container_of(rp, struct ip_sf_socklist, rcu); | ||
1816 | /* sk_omem_alloc should have been decreased by the caller*/ | ||
1817 | kfree(psf); | ||
1818 | } | ||
1819 | |||
1810 | static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, | 1820 | static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, |
1811 | struct in_device *in_dev) | 1821 | struct in_device *in_dev) |
1812 | { | 1822 | { |
1823 | struct ip_sf_socklist *psf = iml->sflist; | ||
1813 | int err; | 1824 | int err; |
1814 | 1825 | ||
1815 | if (iml->sflist == NULL) { | 1826 | if (psf == NULL) { |
1816 | /* any-source empty exclude case */ | 1827 | /* any-source empty exclude case */ |
1817 | return ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, | 1828 | return ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, |
1818 | iml->sfmode, 0, NULL, 0); | 1829 | iml->sfmode, 0, NULL, 0); |
1819 | } | 1830 | } |
1820 | err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, | 1831 | err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, |
1821 | iml->sfmode, iml->sflist->sl_count, | 1832 | iml->sfmode, psf->sl_count, psf->sl_addr, 0); |
1822 | iml->sflist->sl_addr, 0); | 1833 | rcu_assign_pointer(iml->sflist, NULL); |
1823 | sock_kfree_s(sk, iml->sflist, IP_SFLSIZE(iml->sflist->sl_max)); | 1834 | /* decrease mem now to avoid the memleak warning */ |
1824 | iml->sflist = NULL; | 1835 | atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc); |
1836 | call_rcu(&psf->rcu, ip_sf_socklist_reclaim); | ||
1825 | return err; | 1837 | return err; |
1826 | } | 1838 | } |
1827 | 1839 | ||
1840 | |||
1841 | static void ip_mc_socklist_reclaim(struct rcu_head *rp) | ||
1842 | { | ||
1843 | struct ip_mc_socklist *iml; | ||
1844 | |||
1845 | iml = container_of(rp, struct ip_mc_socklist, rcu); | ||
1846 | /* sk_omem_alloc should have been decreased by the caller*/ | ||
1847 | kfree(iml); | ||
1848 | } | ||
1849 | |||
1850 | |||
1828 | /* | 1851 | /* |
1829 | * Ask a socket to leave a group. | 1852 | * Ask a socket to leave a group. |
1830 | */ | 1853 | */ |
@@ -1854,12 +1877,14 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) | |||
1854 | 1877 | ||
1855 | (void) ip_mc_leave_src(sk, iml, in_dev); | 1878 | (void) ip_mc_leave_src(sk, iml, in_dev); |
1856 | 1879 | ||
1857 | *imlp = iml->next; | 1880 | rcu_assign_pointer(*imlp, iml->next); |
1858 | 1881 | ||
1859 | if (in_dev) | 1882 | if (in_dev) |
1860 | ip_mc_dec_group(in_dev, group); | 1883 | ip_mc_dec_group(in_dev, group); |
1861 | rtnl_unlock(); | 1884 | rtnl_unlock(); |
1862 | sock_kfree_s(sk, iml, sizeof(*iml)); | 1885 | /* decrease mem now to avoid the memleak warning */ |
1886 | atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); | ||
1887 | call_rcu(&iml->rcu, ip_mc_socklist_reclaim); | ||
1863 | return 0; | 1888 | return 0; |
1864 | } | 1889 | } |
1865 | if (!in_dev) | 1890 | if (!in_dev) |
@@ -1899,8 +1924,9 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct | |||
1899 | err = -EADDRNOTAVAIL; | 1924 | err = -EADDRNOTAVAIL; |
1900 | 1925 | ||
1901 | for (pmc=inet->mc_list; pmc; pmc=pmc->next) { | 1926 | for (pmc=inet->mc_list; pmc; pmc=pmc->next) { |
1902 | if (pmc->multi.imr_multiaddr.s_addr == imr.imr_multiaddr.s_addr | 1927 | if ((pmc->multi.imr_multiaddr.s_addr == |
1903 | && pmc->multi.imr_ifindex == imr.imr_ifindex) | 1928 | imr.imr_multiaddr.s_addr) && |
1929 | (pmc->multi.imr_ifindex == imr.imr_ifindex)) | ||
1904 | break; | 1930 | break; |
1905 | } | 1931 | } |
1906 | if (!pmc) { /* must have a prior join */ | 1932 | if (!pmc) { /* must have a prior join */ |
@@ -1973,9 +1999,12 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct | |||
1973 | if (psl) { | 1999 | if (psl) { |
1974 | for (i=0; i<psl->sl_count; i++) | 2000 | for (i=0; i<psl->sl_count; i++) |
1975 | newpsl->sl_addr[i] = psl->sl_addr[i]; | 2001 | newpsl->sl_addr[i] = psl->sl_addr[i]; |
1976 | sock_kfree_s(sk, psl, IP_SFLSIZE(psl->sl_max)); | 2002 | /* decrease mem now to avoid the memleak warning */ |
2003 | atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); | ||
2004 | call_rcu(&psl->rcu, ip_sf_socklist_reclaim); | ||
1977 | } | 2005 | } |
1978 | pmc->sflist = psl = newpsl; | 2006 | rcu_assign_pointer(pmc->sflist, newpsl); |
2007 | psl = newpsl; | ||
1979 | } | 2008 | } |
1980 | rv = 1; /* > 0 for insert logic below if sl_count is 0 */ | 2009 | rv = 1; /* > 0 for insert logic below if sl_count is 0 */ |
1981 | for (i=0; i<psl->sl_count; i++) { | 2010 | for (i=0; i<psl->sl_count; i++) { |
@@ -2071,11 +2100,13 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) | |||
2071 | if (psl) { | 2100 | if (psl) { |
2072 | (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, | 2101 | (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, |
2073 | psl->sl_count, psl->sl_addr, 0); | 2102 | psl->sl_count, psl->sl_addr, 0); |
2074 | sock_kfree_s(sk, psl, IP_SFLSIZE(psl->sl_max)); | 2103 | /* decrease mem now to avoid the memleak warning */ |
2104 | atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); | ||
2105 | call_rcu(&psl->rcu, ip_sf_socklist_reclaim); | ||
2075 | } else | 2106 | } else |
2076 | (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, | 2107 | (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, |
2077 | 0, NULL, 0); | 2108 | 0, NULL, 0); |
2078 | pmc->sflist = newpsl; | 2109 | rcu_assign_pointer(pmc->sflist, newpsl); |
2079 | pmc->sfmode = msf->imsf_fmode; | 2110 | pmc->sfmode = msf->imsf_fmode; |
2080 | err = 0; | 2111 | err = 0; |
2081 | done: | 2112 | done: |
@@ -2208,30 +2239,40 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) | |||
2208 | struct ip_mc_socklist *pmc; | 2239 | struct ip_mc_socklist *pmc; |
2209 | struct ip_sf_socklist *psl; | 2240 | struct ip_sf_socklist *psl; |
2210 | int i; | 2241 | int i; |
2242 | int ret; | ||
2211 | 2243 | ||
2244 | ret = 1; | ||
2212 | if (!ipv4_is_multicast(loc_addr)) | 2245 | if (!ipv4_is_multicast(loc_addr)) |
2213 | return 1; | 2246 | goto out; |
2214 | 2247 | ||
2215 | for (pmc=inet->mc_list; pmc; pmc=pmc->next) { | 2248 | rcu_read_lock(); |
2249 | for (pmc=rcu_dereference(inet->mc_list); pmc; pmc=rcu_dereference(pmc->next)) { | ||
2216 | if (pmc->multi.imr_multiaddr.s_addr == loc_addr && | 2250 | if (pmc->multi.imr_multiaddr.s_addr == loc_addr && |
2217 | pmc->multi.imr_ifindex == dif) | 2251 | pmc->multi.imr_ifindex == dif) |
2218 | break; | 2252 | break; |
2219 | } | 2253 | } |
2254 | ret = inet->mc_all; | ||
2220 | if (!pmc) | 2255 | if (!pmc) |
2221 | return inet->mc_all; | 2256 | goto unlock; |
2222 | psl = pmc->sflist; | 2257 | psl = pmc->sflist; |
2258 | ret = (pmc->sfmode == MCAST_EXCLUDE); | ||
2223 | if (!psl) | 2259 | if (!psl) |
2224 | return pmc->sfmode == MCAST_EXCLUDE; | 2260 | goto unlock; |
2225 | 2261 | ||
2226 | for (i=0; i<psl->sl_count; i++) { | 2262 | for (i=0; i<psl->sl_count; i++) { |
2227 | if (psl->sl_addr[i] == rmt_addr) | 2263 | if (psl->sl_addr[i] == rmt_addr) |
2228 | break; | 2264 | break; |
2229 | } | 2265 | } |
2266 | ret = 0; | ||
2230 | if (pmc->sfmode == MCAST_INCLUDE && i >= psl->sl_count) | 2267 | if (pmc->sfmode == MCAST_INCLUDE && i >= psl->sl_count) |
2231 | return 0; | 2268 | goto unlock; |
2232 | if (pmc->sfmode == MCAST_EXCLUDE && i < psl->sl_count) | 2269 | if (pmc->sfmode == MCAST_EXCLUDE && i < psl->sl_count) |
2233 | return 0; | 2270 | goto unlock; |
2234 | return 1; | 2271 | ret = 1; |
2272 | unlock: | ||
2273 | rcu_read_unlock(); | ||
2274 | out: | ||
2275 | return ret; | ||
2235 | } | 2276 | } |
2236 | 2277 | ||
2237 | /* | 2278 | /* |
@@ -2250,7 +2291,7 @@ void ip_mc_drop_socket(struct sock *sk) | |||
2250 | rtnl_lock(); | 2291 | rtnl_lock(); |
2251 | while ((iml = inet->mc_list) != NULL) { | 2292 | while ((iml = inet->mc_list) != NULL) { |
2252 | struct in_device *in_dev; | 2293 | struct in_device *in_dev; |
2253 | inet->mc_list = iml->next; | 2294 | rcu_assign_pointer(inet->mc_list, iml->next); |
2254 | 2295 | ||
2255 | in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); | 2296 | in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); |
2256 | (void) ip_mc_leave_src(sk, iml, in_dev); | 2297 | (void) ip_mc_leave_src(sk, iml, in_dev); |
@@ -2258,7 +2299,9 @@ void ip_mc_drop_socket(struct sock *sk) | |||
2258 | ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); | 2299 | ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); |
2259 | in_dev_put(in_dev); | 2300 | in_dev_put(in_dev); |
2260 | } | 2301 | } |
2261 | sock_kfree_s(sk, iml, sizeof(*iml)); | 2302 | /* decrease mem now to avoid the memleak warning */ |
2303 | atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); | ||
2304 | call_rcu(&iml->rcu, ip_mc_socklist_reclaim); | ||
2262 | } | 2305 | } |
2263 | rtnl_unlock(); | 2306 | rtnl_unlock(); |
2264 | } | 2307 | } |
@@ -2311,9 +2354,10 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) | |||
2311 | struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); | 2354 | struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); |
2312 | 2355 | ||
2313 | state->in_dev = NULL; | 2356 | state->in_dev = NULL; |
2314 | for_each_netdev(net, state->dev) { | 2357 | for_each_netdev_rcu(net, state->dev) { |
2315 | struct in_device *in_dev; | 2358 | struct in_device *in_dev; |
2316 | in_dev = in_dev_get(state->dev); | 2359 | |
2360 | in_dev = __in_dev_get_rcu(state->dev); | ||
2317 | if (!in_dev) | 2361 | if (!in_dev) |
2318 | continue; | 2362 | continue; |
2319 | read_lock(&in_dev->mc_list_lock); | 2363 | read_lock(&in_dev->mc_list_lock); |
@@ -2323,7 +2367,6 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) | |||
2323 | break; | 2367 | break; |
2324 | } | 2368 | } |
2325 | read_unlock(&in_dev->mc_list_lock); | 2369 | read_unlock(&in_dev->mc_list_lock); |
2326 | in_dev_put(in_dev); | ||
2327 | } | 2370 | } |
2328 | return im; | 2371 | return im; |
2329 | } | 2372 | } |
@@ -2333,16 +2376,15 @@ static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_li | |||
2333 | struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); | 2376 | struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); |
2334 | im = im->next; | 2377 | im = im->next; |
2335 | while (!im) { | 2378 | while (!im) { |
2336 | if (likely(state->in_dev != NULL)) { | 2379 | if (likely(state->in_dev != NULL)) |
2337 | read_unlock(&state->in_dev->mc_list_lock); | 2380 | read_unlock(&state->in_dev->mc_list_lock); |
2338 | in_dev_put(state->in_dev); | 2381 | |
2339 | } | 2382 | state->dev = next_net_device_rcu(state->dev); |
2340 | state->dev = next_net_device(state->dev); | ||
2341 | if (!state->dev) { | 2383 | if (!state->dev) { |
2342 | state->in_dev = NULL; | 2384 | state->in_dev = NULL; |
2343 | break; | 2385 | break; |
2344 | } | 2386 | } |
2345 | state->in_dev = in_dev_get(state->dev); | 2387 | state->in_dev = __in_dev_get_rcu(state->dev); |
2346 | if (!state->in_dev) | 2388 | if (!state->in_dev) |
2347 | continue; | 2389 | continue; |
2348 | read_lock(&state->in_dev->mc_list_lock); | 2390 | read_lock(&state->in_dev->mc_list_lock); |
@@ -2361,9 +2403,9 @@ static struct ip_mc_list *igmp_mc_get_idx(struct seq_file *seq, loff_t pos) | |||
2361 | } | 2403 | } |
2362 | 2404 | ||
2363 | static void *igmp_mc_seq_start(struct seq_file *seq, loff_t *pos) | 2405 | static void *igmp_mc_seq_start(struct seq_file *seq, loff_t *pos) |
2364 | __acquires(dev_base_lock) | 2406 | __acquires(rcu) |
2365 | { | 2407 | { |
2366 | read_lock(&dev_base_lock); | 2408 | rcu_read_lock(); |
2367 | return *pos ? igmp_mc_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; | 2409 | return *pos ? igmp_mc_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; |
2368 | } | 2410 | } |
2369 | 2411 | ||
@@ -2379,16 +2421,15 @@ static void *igmp_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
2379 | } | 2421 | } |
2380 | 2422 | ||
2381 | static void igmp_mc_seq_stop(struct seq_file *seq, void *v) | 2423 | static void igmp_mc_seq_stop(struct seq_file *seq, void *v) |
2382 | __releases(dev_base_lock) | 2424 | __releases(rcu) |
2383 | { | 2425 | { |
2384 | struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); | 2426 | struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); |
2385 | if (likely(state->in_dev != NULL)) { | 2427 | if (likely(state->in_dev != NULL)) { |
2386 | read_unlock(&state->in_dev->mc_list_lock); | 2428 | read_unlock(&state->in_dev->mc_list_lock); |
2387 | in_dev_put(state->in_dev); | ||
2388 | state->in_dev = NULL; | 2429 | state->in_dev = NULL; |
2389 | } | 2430 | } |
2390 | state->dev = NULL; | 2431 | state->dev = NULL; |
2391 | read_unlock(&dev_base_lock); | 2432 | rcu_read_unlock(); |
2392 | } | 2433 | } |
2393 | 2434 | ||
2394 | static int igmp_mc_seq_show(struct seq_file *seq, void *v) | 2435 | static int igmp_mc_seq_show(struct seq_file *seq, void *v) |
@@ -2462,9 +2503,9 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) | |||
2462 | 2503 | ||
2463 | state->idev = NULL; | 2504 | state->idev = NULL; |
2464 | state->im = NULL; | 2505 | state->im = NULL; |
2465 | for_each_netdev(net, state->dev) { | 2506 | for_each_netdev_rcu(net, state->dev) { |
2466 | struct in_device *idev; | 2507 | struct in_device *idev; |
2467 | idev = in_dev_get(state->dev); | 2508 | idev = __in_dev_get_rcu(state->dev); |
2468 | if (unlikely(idev == NULL)) | 2509 | if (unlikely(idev == NULL)) |
2469 | continue; | 2510 | continue; |
2470 | read_lock(&idev->mc_list_lock); | 2511 | read_lock(&idev->mc_list_lock); |
@@ -2480,7 +2521,6 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) | |||
2480 | spin_unlock_bh(&im->lock); | 2521 | spin_unlock_bh(&im->lock); |
2481 | } | 2522 | } |
2482 | read_unlock(&idev->mc_list_lock); | 2523 | read_unlock(&idev->mc_list_lock); |
2483 | in_dev_put(idev); | ||
2484 | } | 2524 | } |
2485 | return psf; | 2525 | return psf; |
2486 | } | 2526 | } |
@@ -2494,16 +2534,15 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l | |||
2494 | spin_unlock_bh(&state->im->lock); | 2534 | spin_unlock_bh(&state->im->lock); |
2495 | state->im = state->im->next; | 2535 | state->im = state->im->next; |
2496 | while (!state->im) { | 2536 | while (!state->im) { |
2497 | if (likely(state->idev != NULL)) { | 2537 | if (likely(state->idev != NULL)) |
2498 | read_unlock(&state->idev->mc_list_lock); | 2538 | read_unlock(&state->idev->mc_list_lock); |
2499 | in_dev_put(state->idev); | 2539 | |
2500 | } | 2540 | state->dev = next_net_device_rcu(state->dev); |
2501 | state->dev = next_net_device(state->dev); | ||
2502 | if (!state->dev) { | 2541 | if (!state->dev) { |
2503 | state->idev = NULL; | 2542 | state->idev = NULL; |
2504 | goto out; | 2543 | goto out; |
2505 | } | 2544 | } |
2506 | state->idev = in_dev_get(state->dev); | 2545 | state->idev = __in_dev_get_rcu(state->dev); |
2507 | if (!state->idev) | 2546 | if (!state->idev) |
2508 | continue; | 2547 | continue; |
2509 | read_lock(&state->idev->mc_list_lock); | 2548 | read_lock(&state->idev->mc_list_lock); |
@@ -2528,8 +2567,9 @@ static struct ip_sf_list *igmp_mcf_get_idx(struct seq_file *seq, loff_t pos) | |||
2528 | } | 2567 | } |
2529 | 2568 | ||
2530 | static void *igmp_mcf_seq_start(struct seq_file *seq, loff_t *pos) | 2569 | static void *igmp_mcf_seq_start(struct seq_file *seq, loff_t *pos) |
2570 | __acquires(rcu) | ||
2531 | { | 2571 | { |
2532 | read_lock(&dev_base_lock); | 2572 | rcu_read_lock(); |
2533 | return *pos ? igmp_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; | 2573 | return *pos ? igmp_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; |
2534 | } | 2574 | } |
2535 | 2575 | ||
@@ -2545,6 +2585,7 @@ static void *igmp_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
2545 | } | 2585 | } |
2546 | 2586 | ||
2547 | static void igmp_mcf_seq_stop(struct seq_file *seq, void *v) | 2587 | static void igmp_mcf_seq_stop(struct seq_file *seq, void *v) |
2588 | __releases(rcu) | ||
2548 | { | 2589 | { |
2549 | struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq); | 2590 | struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq); |
2550 | if (likely(state->im != NULL)) { | 2591 | if (likely(state->im != NULL)) { |
@@ -2553,11 +2594,10 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v) | |||
2553 | } | 2594 | } |
2554 | if (likely(state->idev != NULL)) { | 2595 | if (likely(state->idev != NULL)) { |
2555 | read_unlock(&state->idev->mc_list_lock); | 2596 | read_unlock(&state->idev->mc_list_lock); |
2556 | in_dev_put(state->idev); | ||
2557 | state->idev = NULL; | 2597 | state->idev = NULL; |
2558 | } | 2598 | } |
2559 | state->dev = NULL; | 2599 | state->dev = NULL; |
2560 | read_unlock(&dev_base_lock); | 2600 | rcu_read_unlock(); |
2561 | } | 2601 | } |
2562 | 2602 | ||
2563 | static int igmp_mcf_seq_show(struct seq_file *seq, void *v) | 2603 | static int igmp_mcf_seq_show(struct seq_file *seq, void *v) |
@@ -2605,7 +2645,7 @@ static const struct file_operations igmp_mcf_seq_fops = { | |||
2605 | .release = seq_release_net, | 2645 | .release = seq_release_net, |
2606 | }; | 2646 | }; |
2607 | 2647 | ||
2608 | static int igmp_net_init(struct net *net) | 2648 | static int __net_init igmp_net_init(struct net *net) |
2609 | { | 2649 | { |
2610 | struct proc_dir_entry *pde; | 2650 | struct proc_dir_entry *pde; |
2611 | 2651 | ||
@@ -2623,7 +2663,7 @@ out_igmp: | |||
2623 | return -ENOMEM; | 2663 | return -ENOMEM; |
2624 | } | 2664 | } |
2625 | 2665 | ||
2626 | static void igmp_net_exit(struct net *net) | 2666 | static void __net_exit igmp_net_exit(struct net *net) |
2627 | { | 2667 | { |
2628 | proc_net_remove(net, "mcfilter"); | 2668 | proc_net_remove(net, "mcfilter"); |
2629 | proc_net_remove(net, "igmp"); | 2669 | proc_net_remove(net, "igmp"); |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 537731b3bcb3..8da6429269dd 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -112,7 +112,7 @@ again: | |||
112 | hashinfo->bhash_size)]; | 112 | hashinfo->bhash_size)]; |
113 | spin_lock(&head->lock); | 113 | spin_lock(&head->lock); |
114 | inet_bind_bucket_for_each(tb, node, &head->chain) | 114 | inet_bind_bucket_for_each(tb, node, &head->chain) |
115 | if (ib_net(tb) == net && tb->port == rover) { | 115 | if (net_eq(ib_net(tb), net) && tb->port == rover) { |
116 | if (tb->fastreuse > 0 && | 116 | if (tb->fastreuse > 0 && |
117 | sk->sk_reuse && | 117 | sk->sk_reuse && |
118 | sk->sk_state != TCP_LISTEN && | 118 | sk->sk_state != TCP_LISTEN && |
@@ -158,7 +158,7 @@ have_snum: | |||
158 | hashinfo->bhash_size)]; | 158 | hashinfo->bhash_size)]; |
159 | spin_lock(&head->lock); | 159 | spin_lock(&head->lock); |
160 | inet_bind_bucket_for_each(tb, node, &head->chain) | 160 | inet_bind_bucket_for_each(tb, node, &head->chain) |
161 | if (ib_net(tb) == net && tb->port == snum) | 161 | if (net_eq(ib_net(tb), net) && tb->port == snum) |
162 | goto tb_found; | 162 | goto tb_found; |
163 | } | 163 | } |
164 | tb = NULL; | 164 | tb = NULL; |
@@ -358,6 +358,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, | |||
358 | const struct inet_request_sock *ireq = inet_rsk(req); | 358 | const struct inet_request_sock *ireq = inet_rsk(req); |
359 | struct ip_options *opt = inet_rsk(req)->opt; | 359 | struct ip_options *opt = inet_rsk(req)->opt; |
360 | struct flowi fl = { .oif = sk->sk_bound_dev_if, | 360 | struct flowi fl = { .oif = sk->sk_bound_dev_if, |
361 | .mark = sk->sk_mark, | ||
361 | .nl_u = { .ip4_u = | 362 | .nl_u = { .ip4_u = |
362 | { .daddr = ((opt && opt->srr) ? | 363 | { .daddr = ((opt && opt->srr) ? |
363 | opt->faddr : | 364 | opt->faddr : |
@@ -367,7 +368,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, | |||
367 | .proto = sk->sk_protocol, | 368 | .proto = sk->sk_protocol, |
368 | .flags = inet_sk_flowi_flags(sk), | 369 | .flags = inet_sk_flowi_flags(sk), |
369 | .uli_u = { .ports = | 370 | .uli_u = { .ports = |
370 | { .sport = inet_sk(sk)->sport, | 371 | { .sport = inet_sk(sk)->inet_sport, |
371 | .dport = ireq->rmt_port } } }; | 372 | .dport = ireq->rmt_port } } }; |
372 | struct net *net = sock_net(sk); | 373 | struct net *net = sock_net(sk); |
373 | 374 | ||
@@ -528,9 +529,11 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, | |||
528 | syn_ack_recalc(req, thresh, max_retries, | 529 | syn_ack_recalc(req, thresh, max_retries, |
529 | queue->rskq_defer_accept, | 530 | queue->rskq_defer_accept, |
530 | &expire, &resend); | 531 | &expire, &resend); |
532 | if (req->rsk_ops->syn_ack_timeout) | ||
533 | req->rsk_ops->syn_ack_timeout(parent, req); | ||
531 | if (!expire && | 534 | if (!expire && |
532 | (!resend || | 535 | (!resend || |
533 | !req->rsk_ops->rtx_syn_ack(parent, req) || | 536 | !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || |
534 | inet_rsk(req)->acked)) { | 537 | inet_rsk(req)->acked)) { |
535 | unsigned long timeo; | 538 | unsigned long timeo; |
536 | 539 | ||
@@ -574,9 +577,9 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, | |||
574 | newsk->sk_state = TCP_SYN_RECV; | 577 | newsk->sk_state = TCP_SYN_RECV; |
575 | newicsk->icsk_bind_hash = NULL; | 578 | newicsk->icsk_bind_hash = NULL; |
576 | 579 | ||
577 | inet_sk(newsk)->dport = inet_rsk(req)->rmt_port; | 580 | inet_sk(newsk)->inet_dport = inet_rsk(req)->rmt_port; |
578 | inet_sk(newsk)->num = ntohs(inet_rsk(req)->loc_port); | 581 | inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->loc_port); |
579 | inet_sk(newsk)->sport = inet_rsk(req)->loc_port; | 582 | inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port; |
580 | newsk->sk_write_space = sk_stream_write_space; | 583 | newsk->sk_write_space = sk_stream_write_space; |
581 | 584 | ||
582 | newicsk->icsk_retransmits = 0; | 585 | newicsk->icsk_retransmits = 0; |
@@ -607,8 +610,8 @@ void inet_csk_destroy_sock(struct sock *sk) | |||
607 | /* It cannot be in hash table! */ | 610 | /* It cannot be in hash table! */ |
608 | WARN_ON(!sk_unhashed(sk)); | 611 | WARN_ON(!sk_unhashed(sk)); |
609 | 612 | ||
610 | /* If it has not 0 inet_sk(sk)->num, it must be bound */ | 613 | /* If it has not 0 inet_sk(sk)->inet_num, it must be bound */ |
611 | WARN_ON(inet_sk(sk)->num && !inet_csk(sk)->icsk_bind_hash); | 614 | WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash); |
612 | 615 | ||
613 | sk->sk_prot->destroy(sk); | 616 | sk->sk_prot->destroy(sk); |
614 | 617 | ||
@@ -643,8 +646,8 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) | |||
643 | * after validation is complete. | 646 | * after validation is complete. |
644 | */ | 647 | */ |
645 | sk->sk_state = TCP_LISTEN; | 648 | sk->sk_state = TCP_LISTEN; |
646 | if (!sk->sk_prot->get_port(sk, inet->num)) { | 649 | if (!sk->sk_prot->get_port(sk, inet->inet_num)) { |
647 | inet->sport = htons(inet->num); | 650 | inet->inet_sport = htons(inet->inet_num); |
648 | 651 | ||
649 | sk_dst_reset(sk); | 652 | sk_dst_reset(sk); |
650 | sk->sk_prot->hash(sk); | 653 | sk->sk_prot->hash(sk); |
@@ -720,8 +723,8 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) | |||
720 | const struct inet_sock *inet = inet_sk(sk); | 723 | const struct inet_sock *inet = inet_sk(sk); |
721 | 724 | ||
722 | sin->sin_family = AF_INET; | 725 | sin->sin_family = AF_INET; |
723 | sin->sin_addr.s_addr = inet->daddr; | 726 | sin->sin_addr.s_addr = inet->inet_daddr; |
724 | sin->sin_port = inet->dport; | 727 | sin->sin_port = inet->inet_dport; |
725 | } | 728 | } |
726 | 729 | ||
727 | EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); | 730 | EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index a706a47f4dbb..e5fa2ddce320 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/types.h> | 14 | #include <linux/types.h> |
15 | #include <linux/fcntl.h> | 15 | #include <linux/fcntl.h> |
16 | #include <linux/random.h> | 16 | #include <linux/random.h> |
17 | #include <linux/slab.h> | ||
17 | #include <linux/cache.h> | 18 | #include <linux/cache.h> |
18 | #include <linux/init.h> | 19 | #include <linux/init.h> |
19 | #include <linux/time.h> | 20 | #include <linux/time.h> |
@@ -116,10 +117,10 @@ static int inet_csk_diag_fill(struct sock *sk, | |||
116 | r->id.idiag_cookie[0] = (u32)(unsigned long)sk; | 117 | r->id.idiag_cookie[0] = (u32)(unsigned long)sk; |
117 | r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); | 118 | r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); |
118 | 119 | ||
119 | r->id.idiag_sport = inet->sport; | 120 | r->id.idiag_sport = inet->inet_sport; |
120 | r->id.idiag_dport = inet->dport; | 121 | r->id.idiag_dport = inet->inet_dport; |
121 | r->id.idiag_src[0] = inet->rcv_saddr; | 122 | r->id.idiag_src[0] = inet->inet_rcv_saddr; |
122 | r->id.idiag_dst[0] = inet->daddr; | 123 | r->id.idiag_dst[0] = inet->inet_daddr; |
123 | 124 | ||
124 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | 125 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) |
125 | if (r->idiag_family == AF_INET6) { | 126 | if (r->idiag_family == AF_INET6) { |
@@ -368,7 +369,7 @@ static int inet_diag_bc_run(const void *bc, int len, | |||
368 | yes = entry->sport >= op[1].no; | 369 | yes = entry->sport >= op[1].no; |
369 | break; | 370 | break; |
370 | case INET_DIAG_BC_S_LE: | 371 | case INET_DIAG_BC_S_LE: |
371 | yes = entry->dport <= op[1].no; | 372 | yes = entry->sport <= op[1].no; |
372 | break; | 373 | break; |
373 | case INET_DIAG_BC_D_GE: | 374 | case INET_DIAG_BC_D_GE: |
374 | yes = entry->dport >= op[1].no; | 375 | yes = entry->dport >= op[1].no; |
@@ -504,11 +505,11 @@ static int inet_csk_diag_dump(struct sock *sk, | |||
504 | } else | 505 | } else |
505 | #endif | 506 | #endif |
506 | { | 507 | { |
507 | entry.saddr = &inet->rcv_saddr; | 508 | entry.saddr = &inet->inet_rcv_saddr; |
508 | entry.daddr = &inet->daddr; | 509 | entry.daddr = &inet->inet_daddr; |
509 | } | 510 | } |
510 | entry.sport = inet->num; | 511 | entry.sport = inet->inet_num; |
511 | entry.dport = ntohs(inet->dport); | 512 | entry.dport = ntohs(inet->inet_dport); |
512 | entry.userlocks = sk->sk_userlocks; | 513 | entry.userlocks = sk->sk_userlocks; |
513 | 514 | ||
514 | if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) | 515 | if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) |
@@ -584,7 +585,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, | |||
584 | if (tmo < 0) | 585 | if (tmo < 0) |
585 | tmo = 0; | 586 | tmo = 0; |
586 | 587 | ||
587 | r->id.idiag_sport = inet->sport; | 588 | r->id.idiag_sport = inet->inet_sport; |
588 | r->id.idiag_dport = ireq->rmt_port; | 589 | r->id.idiag_dport = ireq->rmt_port; |
589 | r->id.idiag_src[0] = ireq->loc_addr; | 590 | r->id.idiag_src[0] = ireq->loc_addr; |
590 | r->id.idiag_dst[0] = ireq->rmt_addr; | 591 | r->id.idiag_dst[0] = ireq->rmt_addr; |
@@ -639,7 +640,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, | |||
639 | 640 | ||
640 | if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { | 641 | if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { |
641 | bc = (struct rtattr *)(r + 1); | 642 | bc = (struct rtattr *)(r + 1); |
642 | entry.sport = inet->num; | 643 | entry.sport = inet->inet_num; |
643 | entry.userlocks = sk->sk_userlocks; | 644 | entry.userlocks = sk->sk_userlocks; |
644 | } | 645 | } |
645 | 646 | ||
@@ -732,7 +733,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
732 | continue; | 733 | continue; |
733 | } | 734 | } |
734 | 735 | ||
735 | if (r->id.idiag_sport != inet->sport && | 736 | if (r->id.idiag_sport != inet->inet_sport && |
736 | r->id.idiag_sport) | 737 | r->id.idiag_sport) |
737 | goto next_listen; | 738 | goto next_listen; |
738 | 739 | ||
@@ -774,7 +775,7 @@ skip_listen_ht: | |||
774 | if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) | 775 | if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) |
775 | goto unlock; | 776 | goto unlock; |
776 | 777 | ||
777 | for (i = s_i; i < hashinfo->ehash_size; i++) { | 778 | for (i = s_i; i <= hashinfo->ehash_mask; i++) { |
778 | struct inet_ehash_bucket *head = &hashinfo->ehash[i]; | 779 | struct inet_ehash_bucket *head = &hashinfo->ehash[i]; |
779 | spinlock_t *lock = inet_ehash_lockp(hashinfo, i); | 780 | spinlock_t *lock = inet_ehash_lockp(hashinfo, i); |
780 | struct sock *sk; | 781 | struct sock *sk; |
@@ -797,10 +798,10 @@ skip_listen_ht: | |||
797 | goto next_normal; | 798 | goto next_normal; |
798 | if (!(r->idiag_states & (1 << sk->sk_state))) | 799 | if (!(r->idiag_states & (1 << sk->sk_state))) |
799 | goto next_normal; | 800 | goto next_normal; |
800 | if (r->id.idiag_sport != inet->sport && | 801 | if (r->id.idiag_sport != inet->inet_sport && |
801 | r->id.idiag_sport) | 802 | r->id.idiag_sport) |
802 | goto next_normal; | 803 | goto next_normal; |
803 | if (r->id.idiag_dport != inet->dport && | 804 | if (r->id.idiag_dport != inet->inet_dport && |
804 | r->id.idiag_dport) | 805 | r->id.idiag_dport) |
805 | goto next_normal; | 806 | goto next_normal; |
806 | if (inet_csk_diag_dump(sk, skb, cb) < 0) { | 807 | if (inet_csk_diag_dump(sk, skb, cb) < 0) { |
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index eaf3e2c8646a..a2ca6aed763b 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/random.h> | 19 | #include <linux/random.h> |
20 | #include <linux/skbuff.h> | 20 | #include <linux/skbuff.h> |
21 | #include <linux/rtnetlink.h> | 21 | #include <linux/rtnetlink.h> |
22 | #include <linux/slab.h> | ||
22 | 23 | ||
23 | #include <net/inet_frag.h> | 24 | #include <net/inet_frag.h> |
24 | 25 | ||
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 625cc5f64c94..2b79377b468d 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -64,7 +64,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, | |||
64 | 64 | ||
65 | atomic_inc(&hashinfo->bsockets); | 65 | atomic_inc(&hashinfo->bsockets); |
66 | 66 | ||
67 | inet_sk(sk)->num = snum; | 67 | inet_sk(sk)->inet_num = snum; |
68 | sk_add_bind_node(sk, &tb->owners); | 68 | sk_add_bind_node(sk, &tb->owners); |
69 | tb->num_owners++; | 69 | tb->num_owners++; |
70 | inet_csk(sk)->icsk_bind_hash = tb; | 70 | inet_csk(sk)->icsk_bind_hash = tb; |
@@ -76,7 +76,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, | |||
76 | static void __inet_put_port(struct sock *sk) | 76 | static void __inet_put_port(struct sock *sk) |
77 | { | 77 | { |
78 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; | 78 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
79 | const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->num, | 79 | const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num, |
80 | hashinfo->bhash_size); | 80 | hashinfo->bhash_size); |
81 | struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; | 81 | struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; |
82 | struct inet_bind_bucket *tb; | 82 | struct inet_bind_bucket *tb; |
@@ -88,7 +88,7 @@ static void __inet_put_port(struct sock *sk) | |||
88 | __sk_del_bind_node(sk); | 88 | __sk_del_bind_node(sk); |
89 | tb->num_owners--; | 89 | tb->num_owners--; |
90 | inet_csk(sk)->icsk_bind_hash = NULL; | 90 | inet_csk(sk)->icsk_bind_hash = NULL; |
91 | inet_sk(sk)->num = 0; | 91 | inet_sk(sk)->inet_num = 0; |
92 | inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); | 92 | inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); |
93 | spin_unlock(&head->lock); | 93 | spin_unlock(&head->lock); |
94 | } | 94 | } |
@@ -105,7 +105,7 @@ EXPORT_SYMBOL(inet_put_port); | |||
105 | void __inet_inherit_port(struct sock *sk, struct sock *child) | 105 | void __inet_inherit_port(struct sock *sk, struct sock *child) |
106 | { | 106 | { |
107 | struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; | 107 | struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; |
108 | const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->num, | 108 | const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->inet_num, |
109 | table->bhash_size); | 109 | table->bhash_size); |
110 | struct inet_bind_hashbucket *head = &table->bhash[bhash]; | 110 | struct inet_bind_hashbucket *head = &table->bhash[bhash]; |
111 | struct inet_bind_bucket *tb; | 111 | struct inet_bind_bucket *tb; |
@@ -126,9 +126,9 @@ static inline int compute_score(struct sock *sk, struct net *net, | |||
126 | int score = -1; | 126 | int score = -1; |
127 | struct inet_sock *inet = inet_sk(sk); | 127 | struct inet_sock *inet = inet_sk(sk); |
128 | 128 | ||
129 | if (net_eq(sock_net(sk), net) && inet->num == hnum && | 129 | if (net_eq(sock_net(sk), net) && inet->inet_num == hnum && |
130 | !ipv6_only_sock(sk)) { | 130 | !ipv6_only_sock(sk)) { |
131 | __be32 rcv_saddr = inet->rcv_saddr; | 131 | __be32 rcv_saddr = inet->inet_rcv_saddr; |
132 | score = sk->sk_family == PF_INET ? 1 : 0; | 132 | score = sk->sk_family == PF_INET ? 1 : 0; |
133 | if (rcv_saddr) { | 133 | if (rcv_saddr) { |
134 | if (rcv_saddr != daddr) | 134 | if (rcv_saddr != daddr) |
@@ -209,7 +209,7 @@ struct sock * __inet_lookup_established(struct net *net, | |||
209 | * have wildcards anyways. | 209 | * have wildcards anyways. |
210 | */ | 210 | */ |
211 | unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); | 211 | unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); |
212 | unsigned int slot = hash & (hashinfo->ehash_size - 1); | 212 | unsigned int slot = hash & hashinfo->ehash_mask; |
213 | struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; | 213 | struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; |
214 | 214 | ||
215 | rcu_read_lock(); | 215 | rcu_read_lock(); |
@@ -273,18 +273,20 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, | |||
273 | { | 273 | { |
274 | struct inet_hashinfo *hinfo = death_row->hashinfo; | 274 | struct inet_hashinfo *hinfo = death_row->hashinfo; |
275 | struct inet_sock *inet = inet_sk(sk); | 275 | struct inet_sock *inet = inet_sk(sk); |
276 | __be32 daddr = inet->rcv_saddr; | 276 | __be32 daddr = inet->inet_rcv_saddr; |
277 | __be32 saddr = inet->daddr; | 277 | __be32 saddr = inet->inet_daddr; |
278 | int dif = sk->sk_bound_dev_if; | 278 | int dif = sk->sk_bound_dev_if; |
279 | INET_ADDR_COOKIE(acookie, saddr, daddr) | 279 | INET_ADDR_COOKIE(acookie, saddr, daddr) |
280 | const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); | 280 | const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); |
281 | struct net *net = sock_net(sk); | 281 | struct net *net = sock_net(sk); |
282 | unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); | 282 | unsigned int hash = inet_ehashfn(net, daddr, lport, |
283 | saddr, inet->inet_dport); | ||
283 | struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); | 284 | struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); |
284 | spinlock_t *lock = inet_ehash_lockp(hinfo, hash); | 285 | spinlock_t *lock = inet_ehash_lockp(hinfo, hash); |
285 | struct sock *sk2; | 286 | struct sock *sk2; |
286 | const struct hlist_nulls_node *node; | 287 | const struct hlist_nulls_node *node; |
287 | struct inet_timewait_sock *tw; | 288 | struct inet_timewait_sock *tw; |
289 | int twrefcnt = 0; | ||
288 | 290 | ||
289 | spin_lock(lock); | 291 | spin_lock(lock); |
290 | 292 | ||
@@ -312,25 +314,28 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, | |||
312 | unique: | 314 | unique: |
313 | /* Must record num and sport now. Otherwise we will see | 315 | /* Must record num and sport now. Otherwise we will see |
314 | * in hash table socket with a funny identity. */ | 316 | * in hash table socket with a funny identity. */ |
315 | inet->num = lport; | 317 | inet->inet_num = lport; |
316 | inet->sport = htons(lport); | 318 | inet->inet_sport = htons(lport); |
317 | sk->sk_hash = hash; | 319 | sk->sk_hash = hash; |
318 | WARN_ON(!sk_unhashed(sk)); | 320 | WARN_ON(!sk_unhashed(sk)); |
319 | __sk_nulls_add_node_rcu(sk, &head->chain); | 321 | __sk_nulls_add_node_rcu(sk, &head->chain); |
322 | if (tw) { | ||
323 | twrefcnt = inet_twsk_unhash(tw); | ||
324 | NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); | ||
325 | } | ||
320 | spin_unlock(lock); | 326 | spin_unlock(lock); |
327 | if (twrefcnt) | ||
328 | inet_twsk_put(tw); | ||
321 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 329 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
322 | 330 | ||
323 | if (twp) { | 331 | if (twp) { |
324 | *twp = tw; | 332 | *twp = tw; |
325 | NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); | ||
326 | } else if (tw) { | 333 | } else if (tw) { |
327 | /* Silly. Should hash-dance instead... */ | 334 | /* Silly. Should hash-dance instead... */ |
328 | inet_twsk_deschedule(tw, death_row); | 335 | inet_twsk_deschedule(tw, death_row); |
329 | NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); | ||
330 | 336 | ||
331 | inet_twsk_put(tw); | 337 | inet_twsk_put(tw); |
332 | } | 338 | } |
333 | |||
334 | return 0; | 339 | return 0; |
335 | 340 | ||
336 | not_unique: | 341 | not_unique: |
@@ -341,16 +346,18 @@ not_unique: | |||
341 | static inline u32 inet_sk_port_offset(const struct sock *sk) | 346 | static inline u32 inet_sk_port_offset(const struct sock *sk) |
342 | { | 347 | { |
343 | const struct inet_sock *inet = inet_sk(sk); | 348 | const struct inet_sock *inet = inet_sk(sk); |
344 | return secure_ipv4_port_ephemeral(inet->rcv_saddr, inet->daddr, | 349 | return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr, |
345 | inet->dport); | 350 | inet->inet_daddr, |
351 | inet->inet_dport); | ||
346 | } | 352 | } |
347 | 353 | ||
348 | void __inet_hash_nolisten(struct sock *sk) | 354 | int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw) |
349 | { | 355 | { |
350 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; | 356 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
351 | struct hlist_nulls_head *list; | 357 | struct hlist_nulls_head *list; |
352 | spinlock_t *lock; | 358 | spinlock_t *lock; |
353 | struct inet_ehash_bucket *head; | 359 | struct inet_ehash_bucket *head; |
360 | int twrefcnt = 0; | ||
354 | 361 | ||
355 | WARN_ON(!sk_unhashed(sk)); | 362 | WARN_ON(!sk_unhashed(sk)); |
356 | 363 | ||
@@ -361,8 +368,13 @@ void __inet_hash_nolisten(struct sock *sk) | |||
361 | 368 | ||
362 | spin_lock(lock); | 369 | spin_lock(lock); |
363 | __sk_nulls_add_node_rcu(sk, list); | 370 | __sk_nulls_add_node_rcu(sk, list); |
371 | if (tw) { | ||
372 | WARN_ON(sk->sk_hash != tw->tw_hash); | ||
373 | twrefcnt = inet_twsk_unhash(tw); | ||
374 | } | ||
364 | spin_unlock(lock); | 375 | spin_unlock(lock); |
365 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 376 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
377 | return twrefcnt; | ||
366 | } | 378 | } |
367 | EXPORT_SYMBOL_GPL(__inet_hash_nolisten); | 379 | EXPORT_SYMBOL_GPL(__inet_hash_nolisten); |
368 | 380 | ||
@@ -372,7 +384,7 @@ static void __inet_hash(struct sock *sk) | |||
372 | struct inet_listen_hashbucket *ilb; | 384 | struct inet_listen_hashbucket *ilb; |
373 | 385 | ||
374 | if (sk->sk_state != TCP_LISTEN) { | 386 | if (sk->sk_state != TCP_LISTEN) { |
375 | __inet_hash_nolisten(sk); | 387 | __inet_hash_nolisten(sk, NULL); |
376 | return; | 388 | return; |
377 | } | 389 | } |
378 | 390 | ||
@@ -421,14 +433,15 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, | |||
421 | struct sock *sk, u32 port_offset, | 433 | struct sock *sk, u32 port_offset, |
422 | int (*check_established)(struct inet_timewait_death_row *, | 434 | int (*check_established)(struct inet_timewait_death_row *, |
423 | struct sock *, __u16, struct inet_timewait_sock **), | 435 | struct sock *, __u16, struct inet_timewait_sock **), |
424 | void (*hash)(struct sock *sk)) | 436 | int (*hash)(struct sock *sk, struct inet_timewait_sock *twp)) |
425 | { | 437 | { |
426 | struct inet_hashinfo *hinfo = death_row->hashinfo; | 438 | struct inet_hashinfo *hinfo = death_row->hashinfo; |
427 | const unsigned short snum = inet_sk(sk)->num; | 439 | const unsigned short snum = inet_sk(sk)->inet_num; |
428 | struct inet_bind_hashbucket *head; | 440 | struct inet_bind_hashbucket *head; |
429 | struct inet_bind_bucket *tb; | 441 | struct inet_bind_bucket *tb; |
430 | int ret; | 442 | int ret; |
431 | struct net *net = sock_net(sk); | 443 | struct net *net = sock_net(sk); |
444 | int twrefcnt = 1; | ||
432 | 445 | ||
433 | if (!snum) { | 446 | if (!snum) { |
434 | int i, remaining, low, high, port; | 447 | int i, remaining, low, high, port; |
@@ -452,7 +465,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, | |||
452 | * unique enough. | 465 | * unique enough. |
453 | */ | 466 | */ |
454 | inet_bind_bucket_for_each(tb, node, &head->chain) { | 467 | inet_bind_bucket_for_each(tb, node, &head->chain) { |
455 | if (ib_net(tb) == net && tb->port == port) { | 468 | if (net_eq(ib_net(tb), net) && |
469 | tb->port == port) { | ||
456 | if (tb->fastreuse >= 0) | 470 | if (tb->fastreuse >= 0) |
457 | goto next_port; | 471 | goto next_port; |
458 | WARN_ON(hlist_empty(&tb->owners)); | 472 | WARN_ON(hlist_empty(&tb->owners)); |
@@ -485,14 +499,19 @@ ok: | |||
485 | /* Head lock still held and bh's disabled */ | 499 | /* Head lock still held and bh's disabled */ |
486 | inet_bind_hash(sk, tb, port); | 500 | inet_bind_hash(sk, tb, port); |
487 | if (sk_unhashed(sk)) { | 501 | if (sk_unhashed(sk)) { |
488 | inet_sk(sk)->sport = htons(port); | 502 | inet_sk(sk)->inet_sport = htons(port); |
489 | hash(sk); | 503 | twrefcnt += hash(sk, tw); |
490 | } | 504 | } |
505 | if (tw) | ||
506 | twrefcnt += inet_twsk_bind_unhash(tw, hinfo); | ||
491 | spin_unlock(&head->lock); | 507 | spin_unlock(&head->lock); |
492 | 508 | ||
493 | if (tw) { | 509 | if (tw) { |
494 | inet_twsk_deschedule(tw, death_row); | 510 | inet_twsk_deschedule(tw, death_row); |
495 | inet_twsk_put(tw); | 511 | while (twrefcnt) { |
512 | twrefcnt--; | ||
513 | inet_twsk_put(tw); | ||
514 | } | ||
496 | } | 515 | } |
497 | 516 | ||
498 | ret = 0; | 517 | ret = 0; |
@@ -503,7 +522,7 @@ ok: | |||
503 | tb = inet_csk(sk)->icsk_bind_hash; | 522 | tb = inet_csk(sk)->icsk_bind_hash; |
504 | spin_lock_bh(&head->lock); | 523 | spin_lock_bh(&head->lock); |
505 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { | 524 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { |
506 | hash(sk); | 525 | hash(sk, NULL); |
507 | spin_unlock_bh(&head->lock); | 526 | spin_unlock_bh(&head->lock); |
508 | return 0; | 527 | return 0; |
509 | } else { | 528 | } else { |
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index 6a667dae315e..47038cb6c138 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c | |||
@@ -64,15 +64,15 @@ static int lro_tcp_ip_check(struct iphdr *iph, struct tcphdr *tcph, | |||
64 | if (iph->ihl != IPH_LEN_WO_OPTIONS) | 64 | if (iph->ihl != IPH_LEN_WO_OPTIONS) |
65 | return -1; | 65 | return -1; |
66 | 66 | ||
67 | if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack | 67 | if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack || |
68 | || tcph->rst || tcph->syn || tcph->fin) | 68 | tcph->rst || tcph->syn || tcph->fin) |
69 | return -1; | 69 | return -1; |
70 | 70 | ||
71 | if (INET_ECN_is_ce(ipv4_get_dsfield(iph))) | 71 | if (INET_ECN_is_ce(ipv4_get_dsfield(iph))) |
72 | return -1; | 72 | return -1; |
73 | 73 | ||
74 | if (tcph->doff != TCPH_LEN_WO_OPTIONS | 74 | if (tcph->doff != TCPH_LEN_WO_OPTIONS && |
75 | && tcph->doff != TCPH_LEN_W_TIMESTAMP) | 75 | tcph->doff != TCPH_LEN_W_TIMESTAMP) |
76 | return -1; | 76 | return -1; |
77 | 77 | ||
78 | /* check tcp options (only timestamp allowed) */ | 78 | /* check tcp options (only timestamp allowed) */ |
@@ -262,10 +262,10 @@ static int lro_check_tcp_conn(struct net_lro_desc *lro_desc, | |||
262 | struct iphdr *iph, | 262 | struct iphdr *iph, |
263 | struct tcphdr *tcph) | 263 | struct tcphdr *tcph) |
264 | { | 264 | { |
265 | if ((lro_desc->iph->saddr != iph->saddr) | 265 | if ((lro_desc->iph->saddr != iph->saddr) || |
266 | || (lro_desc->iph->daddr != iph->daddr) | 266 | (lro_desc->iph->daddr != iph->daddr) || |
267 | || (lro_desc->tcph->source != tcph->source) | 267 | (lro_desc->tcph->source != tcph->source) || |
268 | || (lro_desc->tcph->dest != tcph->dest)) | 268 | (lro_desc->tcph->dest != tcph->dest)) |
269 | return -1; | 269 | return -1; |
270 | return 0; | 270 | return 0; |
271 | } | 271 | } |
@@ -339,9 +339,9 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, | |||
339 | u64 flags; | 339 | u64 flags; |
340 | int vlan_hdr_len = 0; | 340 | int vlan_hdr_len = 0; |
341 | 341 | ||
342 | if (!lro_mgr->get_skb_header | 342 | if (!lro_mgr->get_skb_header || |
343 | || lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph, | 343 | lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph, |
344 | &flags, priv)) | 344 | &flags, priv)) |
345 | goto out; | 345 | goto out; |
346 | 346 | ||
347 | if (!(flags & LRO_IPV4) || !(flags & LRO_TCP)) | 347 | if (!(flags & LRO_IPV4) || !(flags & LRO_TCP)) |
@@ -351,8 +351,8 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, | |||
351 | if (!lro_desc) | 351 | if (!lro_desc) |
352 | goto out; | 352 | goto out; |
353 | 353 | ||
354 | if ((skb->protocol == htons(ETH_P_8021Q)) | 354 | if ((skb->protocol == htons(ETH_P_8021Q)) && |
355 | && !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) | 355 | !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) |
356 | vlan_hdr_len = VLAN_HLEN; | 356 | vlan_hdr_len = VLAN_HLEN; |
357 | 357 | ||
358 | if (!lro_desc->active) { /* start new lro session */ | 358 | if (!lro_desc->active) { /* start new lro session */ |
@@ -446,9 +446,9 @@ static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, | |||
446 | int hdr_len = LRO_MAX_PG_HLEN; | 446 | int hdr_len = LRO_MAX_PG_HLEN; |
447 | int vlan_hdr_len = 0; | 447 | int vlan_hdr_len = 0; |
448 | 448 | ||
449 | if (!lro_mgr->get_frag_header | 449 | if (!lro_mgr->get_frag_header || |
450 | || lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph, | 450 | lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph, |
451 | (void *)&tcph, &flags, priv)) { | 451 | (void *)&tcph, &flags, priv)) { |
452 | mac_hdr = page_address(frags->page) + frags->page_offset; | 452 | mac_hdr = page_address(frags->page) + frags->page_offset; |
453 | goto out1; | 453 | goto out1; |
454 | } | 454 | } |
@@ -472,8 +472,8 @@ static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, | |||
472 | if (!skb) | 472 | if (!skb) |
473 | goto out; | 473 | goto out; |
474 | 474 | ||
475 | if ((skb->protocol == htons(ETH_P_8021Q)) | 475 | if ((skb->protocol == htons(ETH_P_8021Q)) && |
476 | && !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) | 476 | !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) |
477 | vlan_hdr_len = VLAN_HLEN; | 477 | vlan_hdr_len = VLAN_HLEN; |
478 | 478 | ||
479 | iph = (void *)(skb->data + vlan_hdr_len); | 479 | iph = (void *)(skb->data + vlan_hdr_len); |
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 13f0781f35cd..c5af909cf701 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c | |||
@@ -10,44 +10,92 @@ | |||
10 | 10 | ||
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/kmemcheck.h> | 12 | #include <linux/kmemcheck.h> |
13 | #include <linux/slab.h> | ||
13 | #include <net/inet_hashtables.h> | 14 | #include <net/inet_hashtables.h> |
14 | #include <net/inet_timewait_sock.h> | 15 | #include <net/inet_timewait_sock.h> |
15 | #include <net/ip.h> | 16 | #include <net/ip.h> |
16 | 17 | ||
18 | |||
19 | /** | ||
20 | * inet_twsk_unhash - unhash a timewait socket from established hash | ||
21 | * @tw: timewait socket | ||
22 | * | ||
23 | * unhash a timewait socket from established hash, if hashed. | ||
24 | * ehash lock must be held by caller. | ||
25 | * Returns 1 if caller should call inet_twsk_put() after lock release. | ||
26 | */ | ||
27 | int inet_twsk_unhash(struct inet_timewait_sock *tw) | ||
28 | { | ||
29 | if (hlist_nulls_unhashed(&tw->tw_node)) | ||
30 | return 0; | ||
31 | |||
32 | hlist_nulls_del_rcu(&tw->tw_node); | ||
33 | sk_nulls_node_init(&tw->tw_node); | ||
34 | /* | ||
35 | * We cannot call inet_twsk_put() ourself under lock, | ||
36 | * caller must call it for us. | ||
37 | */ | ||
38 | return 1; | ||
39 | } | ||
40 | |||
41 | /** | ||
42 | * inet_twsk_bind_unhash - unhash a timewait socket from bind hash | ||
43 | * @tw: timewait socket | ||
44 | * @hashinfo: hashinfo pointer | ||
45 | * | ||
46 | * unhash a timewait socket from bind hash, if hashed. | ||
47 | * bind hash lock must be held by caller. | ||
48 | * Returns 1 if caller should call inet_twsk_put() after lock release. | ||
49 | */ | ||
50 | int inet_twsk_bind_unhash(struct inet_timewait_sock *tw, | ||
51 | struct inet_hashinfo *hashinfo) | ||
52 | { | ||
53 | struct inet_bind_bucket *tb = tw->tw_tb; | ||
54 | |||
55 | if (!tb) | ||
56 | return 0; | ||
57 | |||
58 | __hlist_del(&tw->tw_bind_node); | ||
59 | tw->tw_tb = NULL; | ||
60 | inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); | ||
61 | /* | ||
62 | * We cannot call inet_twsk_put() ourself under lock, | ||
63 | * caller must call it for us. | ||
64 | */ | ||
65 | return 1; | ||
66 | } | ||
67 | |||
17 | /* Must be called with locally disabled BHs. */ | 68 | /* Must be called with locally disabled BHs. */ |
18 | static void __inet_twsk_kill(struct inet_timewait_sock *tw, | 69 | static void __inet_twsk_kill(struct inet_timewait_sock *tw, |
19 | struct inet_hashinfo *hashinfo) | 70 | struct inet_hashinfo *hashinfo) |
20 | { | 71 | { |
21 | struct inet_bind_hashbucket *bhead; | 72 | struct inet_bind_hashbucket *bhead; |
22 | struct inet_bind_bucket *tb; | 73 | int refcnt; |
23 | /* Unlink from established hashes. */ | 74 | /* Unlink from established hashes. */ |
24 | spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); | 75 | spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); |
25 | 76 | ||
26 | spin_lock(lock); | 77 | spin_lock(lock); |
27 | if (hlist_nulls_unhashed(&tw->tw_node)) { | 78 | refcnt = inet_twsk_unhash(tw); |
28 | spin_unlock(lock); | ||
29 | return; | ||
30 | } | ||
31 | hlist_nulls_del_rcu(&tw->tw_node); | ||
32 | sk_nulls_node_init(&tw->tw_node); | ||
33 | spin_unlock(lock); | 79 | spin_unlock(lock); |
34 | 80 | ||
35 | /* Disassociate with bind bucket. */ | 81 | /* Disassociate with bind bucket. */ |
36 | bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, | 82 | bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, |
37 | hashinfo->bhash_size)]; | 83 | hashinfo->bhash_size)]; |
84 | |||
38 | spin_lock(&bhead->lock); | 85 | spin_lock(&bhead->lock); |
39 | tb = tw->tw_tb; | 86 | refcnt += inet_twsk_bind_unhash(tw, hashinfo); |
40 | __hlist_del(&tw->tw_bind_node); | ||
41 | tw->tw_tb = NULL; | ||
42 | inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); | ||
43 | spin_unlock(&bhead->lock); | 87 | spin_unlock(&bhead->lock); |
88 | |||
44 | #ifdef SOCK_REFCNT_DEBUG | 89 | #ifdef SOCK_REFCNT_DEBUG |
45 | if (atomic_read(&tw->tw_refcnt) != 1) { | 90 | if (atomic_read(&tw->tw_refcnt) != 1) { |
46 | printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n", | 91 | printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n", |
47 | tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); | 92 | tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); |
48 | } | 93 | } |
49 | #endif | 94 | #endif |
50 | inet_twsk_put(tw); | 95 | while (refcnt) { |
96 | inet_twsk_put(tw); | ||
97 | refcnt--; | ||
98 | } | ||
51 | } | 99 | } |
52 | 100 | ||
53 | static noinline void inet_twsk_free(struct inet_timewait_sock *tw) | 101 | static noinline void inet_twsk_free(struct inet_timewait_sock *tw) |
@@ -86,7 +134,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, | |||
86 | Note, that any socket with inet->num != 0 MUST be bound in | 134 | Note, that any socket with inet->num != 0 MUST be bound in |
87 | binding cache, even if it is closed. | 135 | binding cache, even if it is closed. |
88 | */ | 136 | */ |
89 | bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num, | 137 | bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, |
90 | hashinfo->bhash_size)]; | 138 | hashinfo->bhash_size)]; |
91 | spin_lock(&bhead->lock); | 139 | spin_lock(&bhead->lock); |
92 | tw->tw_tb = icsk->icsk_bind_hash; | 140 | tw->tw_tb = icsk->icsk_bind_hash; |
@@ -101,16 +149,24 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, | |||
101 | * Should be done before removing sk from established chain | 149 | * Should be done before removing sk from established chain |
102 | * because readers are lockless and search established first. | 150 | * because readers are lockless and search established first. |
103 | */ | 151 | */ |
104 | atomic_inc(&tw->tw_refcnt); | ||
105 | inet_twsk_add_node_rcu(tw, &ehead->twchain); | 152 | inet_twsk_add_node_rcu(tw, &ehead->twchain); |
106 | 153 | ||
107 | /* Step 3: Remove SK from established hash. */ | 154 | /* Step 3: Remove SK from established hash. */ |
108 | if (__sk_nulls_del_node_init_rcu(sk)) | 155 | if (__sk_nulls_del_node_init_rcu(sk)) |
109 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | 156 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
110 | 157 | ||
158 | /* | ||
159 | * Notes : | ||
160 | * - We initially set tw_refcnt to 0 in inet_twsk_alloc() | ||
161 | * - We add one reference for the bhash link | ||
162 | * - We add one reference for the ehash link | ||
163 | * - We want this refcnt update done before allowing other | ||
164 | * threads to find this tw in ehash chain. | ||
165 | */ | ||
166 | atomic_add(1 + 1 + 1, &tw->tw_refcnt); | ||
167 | |||
111 | spin_unlock(lock); | 168 | spin_unlock(lock); |
112 | } | 169 | } |
113 | |||
114 | EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); | 170 | EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); |
115 | 171 | ||
116 | struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) | 172 | struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) |
@@ -124,14 +180,14 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat | |||
124 | kmemcheck_annotate_bitfield(tw, flags); | 180 | kmemcheck_annotate_bitfield(tw, flags); |
125 | 181 | ||
126 | /* Give us an identity. */ | 182 | /* Give us an identity. */ |
127 | tw->tw_daddr = inet->daddr; | 183 | tw->tw_daddr = inet->inet_daddr; |
128 | tw->tw_rcv_saddr = inet->rcv_saddr; | 184 | tw->tw_rcv_saddr = inet->inet_rcv_saddr; |
129 | tw->tw_bound_dev_if = sk->sk_bound_dev_if; | 185 | tw->tw_bound_dev_if = sk->sk_bound_dev_if; |
130 | tw->tw_num = inet->num; | 186 | tw->tw_num = inet->inet_num; |
131 | tw->tw_state = TCP_TIME_WAIT; | 187 | tw->tw_state = TCP_TIME_WAIT; |
132 | tw->tw_substate = state; | 188 | tw->tw_substate = state; |
133 | tw->tw_sport = inet->sport; | 189 | tw->tw_sport = inet->inet_sport; |
134 | tw->tw_dport = inet->dport; | 190 | tw->tw_dport = inet->inet_dport; |
135 | tw->tw_family = sk->sk_family; | 191 | tw->tw_family = sk->sk_family; |
136 | tw->tw_reuse = sk->sk_reuse; | 192 | tw->tw_reuse = sk->sk_reuse; |
137 | tw->tw_hash = sk->sk_hash; | 193 | tw->tw_hash = sk->sk_hash; |
@@ -139,14 +195,18 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat | |||
139 | tw->tw_transparent = inet->transparent; | 195 | tw->tw_transparent = inet->transparent; |
140 | tw->tw_prot = sk->sk_prot_creator; | 196 | tw->tw_prot = sk->sk_prot_creator; |
141 | twsk_net_set(tw, hold_net(sock_net(sk))); | 197 | twsk_net_set(tw, hold_net(sock_net(sk))); |
142 | atomic_set(&tw->tw_refcnt, 1); | 198 | /* |
199 | * Because we use RCU lookups, we should not set tw_refcnt | ||
200 | * to a non null value before everything is setup for this | ||
201 | * timewait socket. | ||
202 | */ | ||
203 | atomic_set(&tw->tw_refcnt, 0); | ||
143 | inet_twsk_dead_node_init(tw); | 204 | inet_twsk_dead_node_init(tw); |
144 | __module_get(tw->tw_prot->owner); | 205 | __module_get(tw->tw_prot->owner); |
145 | } | 206 | } |
146 | 207 | ||
147 | return tw; | 208 | return tw; |
148 | } | 209 | } |
149 | |||
150 | EXPORT_SYMBOL_GPL(inet_twsk_alloc); | 210 | EXPORT_SYMBOL_GPL(inet_twsk_alloc); |
151 | 211 | ||
152 | /* Returns non-zero if quota exceeded. */ | 212 | /* Returns non-zero if quota exceeded. */ |
@@ -225,7 +285,6 @@ void inet_twdr_hangman(unsigned long data) | |||
225 | out: | 285 | out: |
226 | spin_unlock(&twdr->death_lock); | 286 | spin_unlock(&twdr->death_lock); |
227 | } | 287 | } |
228 | |||
229 | EXPORT_SYMBOL_GPL(inet_twdr_hangman); | 288 | EXPORT_SYMBOL_GPL(inet_twdr_hangman); |
230 | 289 | ||
231 | void inet_twdr_twkill_work(struct work_struct *work) | 290 | void inet_twdr_twkill_work(struct work_struct *work) |
@@ -256,7 +315,6 @@ void inet_twdr_twkill_work(struct work_struct *work) | |||
256 | spin_unlock_bh(&twdr->death_lock); | 315 | spin_unlock_bh(&twdr->death_lock); |
257 | } | 316 | } |
258 | } | 317 | } |
259 | |||
260 | EXPORT_SYMBOL_GPL(inet_twdr_twkill_work); | 318 | EXPORT_SYMBOL_GPL(inet_twdr_twkill_work); |
261 | 319 | ||
262 | /* These are always called from BH context. See callers in | 320 | /* These are always called from BH context. See callers in |
@@ -276,7 +334,6 @@ void inet_twsk_deschedule(struct inet_timewait_sock *tw, | |||
276 | spin_unlock(&twdr->death_lock); | 334 | spin_unlock(&twdr->death_lock); |
277 | __inet_twsk_kill(tw, twdr->hashinfo); | 335 | __inet_twsk_kill(tw, twdr->hashinfo); |
278 | } | 336 | } |
279 | |||
280 | EXPORT_SYMBOL(inet_twsk_deschedule); | 337 | EXPORT_SYMBOL(inet_twsk_deschedule); |
281 | 338 | ||
282 | void inet_twsk_schedule(struct inet_timewait_sock *tw, | 339 | void inet_twsk_schedule(struct inet_timewait_sock *tw, |
@@ -357,7 +414,6 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw, | |||
357 | mod_timer(&twdr->tw_timer, jiffies + twdr->period); | 414 | mod_timer(&twdr->tw_timer, jiffies + twdr->period); |
358 | spin_unlock(&twdr->death_lock); | 415 | spin_unlock(&twdr->death_lock); |
359 | } | 416 | } |
360 | |||
361 | EXPORT_SYMBOL_GPL(inet_twsk_schedule); | 417 | EXPORT_SYMBOL_GPL(inet_twsk_schedule); |
362 | 418 | ||
363 | void inet_twdr_twcal_tick(unsigned long data) | 419 | void inet_twdr_twcal_tick(unsigned long data) |
@@ -418,40 +474,48 @@ out: | |||
418 | #endif | 474 | #endif |
419 | spin_unlock(&twdr->death_lock); | 475 | spin_unlock(&twdr->death_lock); |
420 | } | 476 | } |
421 | |||
422 | EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); | 477 | EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); |
423 | 478 | ||
424 | void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, | 479 | void inet_twsk_purge(struct inet_hashinfo *hashinfo, |
425 | struct inet_timewait_death_row *twdr, int family) | 480 | struct inet_timewait_death_row *twdr, int family) |
426 | { | 481 | { |
427 | struct inet_timewait_sock *tw; | 482 | struct inet_timewait_sock *tw; |
428 | struct sock *sk; | 483 | struct sock *sk; |
429 | struct hlist_nulls_node *node; | 484 | struct hlist_nulls_node *node; |
430 | int h; | 485 | unsigned int slot; |
431 | 486 | ||
432 | local_bh_disable(); | 487 | for (slot = 0; slot <= hashinfo->ehash_mask; slot++) { |
433 | for (h = 0; h < (hashinfo->ehash_size); h++) { | 488 | struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; |
434 | struct inet_ehash_bucket *head = | 489 | restart_rcu: |
435 | inet_ehash_bucket(hashinfo, h); | 490 | rcu_read_lock(); |
436 | spinlock_t *lock = inet_ehash_lockp(hashinfo, h); | ||
437 | restart: | 491 | restart: |
438 | spin_lock(lock); | 492 | sk_nulls_for_each_rcu(sk, node, &head->twchain) { |
439 | sk_nulls_for_each(sk, node, &head->twchain) { | ||
440 | |||
441 | tw = inet_twsk(sk); | 493 | tw = inet_twsk(sk); |
442 | if (!net_eq(twsk_net(tw), net) || | 494 | if ((tw->tw_family != family) || |
443 | tw->tw_family != family) | 495 | atomic_read(&twsk_net(tw)->count)) |
444 | continue; | 496 | continue; |
445 | 497 | ||
446 | atomic_inc(&tw->tw_refcnt); | 498 | if (unlikely(!atomic_inc_not_zero(&tw->tw_refcnt))) |
447 | spin_unlock(lock); | 499 | continue; |
500 | |||
501 | if (unlikely((tw->tw_family != family) || | ||
502 | atomic_read(&twsk_net(tw)->count))) { | ||
503 | inet_twsk_put(tw); | ||
504 | goto restart; | ||
505 | } | ||
506 | |||
507 | rcu_read_unlock(); | ||
448 | inet_twsk_deschedule(tw, twdr); | 508 | inet_twsk_deschedule(tw, twdr); |
449 | inet_twsk_put(tw); | 509 | inet_twsk_put(tw); |
450 | 510 | goto restart_rcu; | |
451 | goto restart; | ||
452 | } | 511 | } |
453 | spin_unlock(lock); | 512 | /* If the nulls value we got at the end of this lookup is |
513 | * not the expected one, we must restart lookup. | ||
514 | * We probably met an item that was moved to another chain. | ||
515 | */ | ||
516 | if (get_nulls_value(node) != slot) | ||
517 | goto restart; | ||
518 | rcu_read_unlock(); | ||
454 | } | 519 | } |
455 | local_bh_enable(); | ||
456 | } | 520 | } |
457 | EXPORT_SYMBOL_GPL(inet_twsk_purge); | 521 | EXPORT_SYMBOL_GPL(inet_twsk_purge); |
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index b1fbe18feb5a..6bcfe52a9c87 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
@@ -67,9 +67,6 @@ | |||
67 | * ip_id_count: idlock | 67 | * ip_id_count: idlock |
68 | */ | 68 | */ |
69 | 69 | ||
70 | /* Exported for inet_getid inline function. */ | ||
71 | DEFINE_SPINLOCK(inet_peer_idlock); | ||
72 | |||
73 | static struct kmem_cache *peer_cachep __read_mostly; | 70 | static struct kmem_cache *peer_cachep __read_mostly; |
74 | 71 | ||
75 | #define node_height(x) x->avl_height | 72 | #define node_height(x) x->avl_height |
@@ -390,7 +387,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create) | |||
390 | n->v4daddr = daddr; | 387 | n->v4daddr = daddr; |
391 | atomic_set(&n->refcnt, 1); | 388 | atomic_set(&n->refcnt, 1); |
392 | atomic_set(&n->rid, 0); | 389 | atomic_set(&n->rid, 0); |
393 | n->ip_id_count = secure_ip_id(daddr); | 390 | atomic_set(&n->ip_id_count, secure_ip_id(daddr)); |
394 | n->tcp_ts_stamp = 0; | 391 | n->tcp_ts_stamp = 0; |
395 | 392 | ||
396 | write_lock_bh(&peer_pool_lock); | 393 | write_lock_bh(&peer_pool_lock); |
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index a2991bc8e32e..af10942b326c 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/ip.h> | 25 | #include <linux/ip.h> |
26 | #include <linux/icmp.h> | 26 | #include <linux/icmp.h> |
27 | #include <linux/netdevice.h> | 27 | #include <linux/netdevice.h> |
28 | #include <linux/slab.h> | ||
28 | #include <net/sock.h> | 29 | #include <net/sock.h> |
29 | #include <net/ip.h> | 30 | #include <net/ip.h> |
30 | #include <net/tcp.h> | 31 | #include <net/tcp.h> |
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index d3fe10be7219..75347ea70ea0 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -32,6 +32,9 @@ | |||
32 | #include <linux/netdevice.h> | 32 | #include <linux/netdevice.h> |
33 | #include <linux/jhash.h> | 33 | #include <linux/jhash.h> |
34 | #include <linux/random.h> | 34 | #include <linux/random.h> |
35 | #include <linux/slab.h> | ||
36 | #include <net/route.h> | ||
37 | #include <net/dst.h> | ||
35 | #include <net/sock.h> | 38 | #include <net/sock.h> |
36 | #include <net/ip.h> | 39 | #include <net/ip.h> |
37 | #include <net/icmp.h> | 40 | #include <net/icmp.h> |
@@ -205,11 +208,35 @@ static void ip_expire(unsigned long arg) | |||
205 | if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { | 208 | if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { |
206 | struct sk_buff *head = qp->q.fragments; | 209 | struct sk_buff *head = qp->q.fragments; |
207 | 210 | ||
208 | /* Send an ICMP "Fragment Reassembly Timeout" message. */ | 211 | rcu_read_lock(); |
209 | if ((head->dev = dev_get_by_index(net, qp->iif)) != NULL) { | 212 | head->dev = dev_get_by_index_rcu(net, qp->iif); |
210 | icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); | 213 | if (!head->dev) |
211 | dev_put(head->dev); | 214 | goto out_rcu_unlock; |
215 | |||
216 | /* | ||
217 | * Only search router table for the head fragment, | ||
218 | * when defraging timeout at PRE_ROUTING HOOK. | ||
219 | */ | ||
220 | if (qp->user == IP_DEFRAG_CONNTRACK_IN && !skb_dst(head)) { | ||
221 | const struct iphdr *iph = ip_hdr(head); | ||
222 | int err = ip_route_input(head, iph->daddr, iph->saddr, | ||
223 | iph->tos, head->dev); | ||
224 | if (unlikely(err)) | ||
225 | goto out_rcu_unlock; | ||
226 | |||
227 | /* | ||
228 | * Only an end host needs to send an ICMP | ||
229 | * "Fragment Reassembly Timeout" message, per RFC792. | ||
230 | */ | ||
231 | if (skb_rtable(head)->rt_type != RTN_LOCAL) | ||
232 | goto out_rcu_unlock; | ||
233 | |||
212 | } | 234 | } |
235 | |||
236 | /* Send an ICMP "Fragment Reassembly Timeout" message. */ | ||
237 | icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); | ||
238 | out_rcu_unlock: | ||
239 | rcu_read_unlock(); | ||
213 | } | 240 | } |
214 | out: | 241 | out: |
215 | spin_unlock(&qp->q.lock); | 242 | spin_unlock(&qp->q.lock); |
@@ -603,7 +630,6 @@ static int zero; | |||
603 | 630 | ||
604 | static struct ctl_table ip4_frags_ns_ctl_table[] = { | 631 | static struct ctl_table ip4_frags_ns_ctl_table[] = { |
605 | { | 632 | { |
606 | .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH, | ||
607 | .procname = "ipfrag_high_thresh", | 633 | .procname = "ipfrag_high_thresh", |
608 | .data = &init_net.ipv4.frags.high_thresh, | 634 | .data = &init_net.ipv4.frags.high_thresh, |
609 | .maxlen = sizeof(int), | 635 | .maxlen = sizeof(int), |
@@ -611,7 +637,6 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { | |||
611 | .proc_handler = proc_dointvec | 637 | .proc_handler = proc_dointvec |
612 | }, | 638 | }, |
613 | { | 639 | { |
614 | .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH, | ||
615 | .procname = "ipfrag_low_thresh", | 640 | .procname = "ipfrag_low_thresh", |
616 | .data = &init_net.ipv4.frags.low_thresh, | 641 | .data = &init_net.ipv4.frags.low_thresh, |
617 | .maxlen = sizeof(int), | 642 | .maxlen = sizeof(int), |
@@ -619,26 +644,22 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { | |||
619 | .proc_handler = proc_dointvec | 644 | .proc_handler = proc_dointvec |
620 | }, | 645 | }, |
621 | { | 646 | { |
622 | .ctl_name = NET_IPV4_IPFRAG_TIME, | ||
623 | .procname = "ipfrag_time", | 647 | .procname = "ipfrag_time", |
624 | .data = &init_net.ipv4.frags.timeout, | 648 | .data = &init_net.ipv4.frags.timeout, |
625 | .maxlen = sizeof(int), | 649 | .maxlen = sizeof(int), |
626 | .mode = 0644, | 650 | .mode = 0644, |
627 | .proc_handler = proc_dointvec_jiffies, | 651 | .proc_handler = proc_dointvec_jiffies, |
628 | .strategy = sysctl_jiffies | ||
629 | }, | 652 | }, |
630 | { } | 653 | { } |
631 | }; | 654 | }; |
632 | 655 | ||
633 | static struct ctl_table ip4_frags_ctl_table[] = { | 656 | static struct ctl_table ip4_frags_ctl_table[] = { |
634 | { | 657 | { |
635 | .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL, | ||
636 | .procname = "ipfrag_secret_interval", | 658 | .procname = "ipfrag_secret_interval", |
637 | .data = &ip4_frags.secret_interval, | 659 | .data = &ip4_frags.secret_interval, |
638 | .maxlen = sizeof(int), | 660 | .maxlen = sizeof(int), |
639 | .mode = 0644, | 661 | .mode = 0644, |
640 | .proc_handler = proc_dointvec_jiffies, | 662 | .proc_handler = proc_dointvec_jiffies, |
641 | .strategy = sysctl_jiffies | ||
642 | }, | 663 | }, |
643 | { | 664 | { |
644 | .procname = "ipfrag_max_dist", | 665 | .procname = "ipfrag_max_dist", |
@@ -651,13 +672,13 @@ static struct ctl_table ip4_frags_ctl_table[] = { | |||
651 | { } | 672 | { } |
652 | }; | 673 | }; |
653 | 674 | ||
654 | static int ip4_frags_ns_ctl_register(struct net *net) | 675 | static int __net_init ip4_frags_ns_ctl_register(struct net *net) |
655 | { | 676 | { |
656 | struct ctl_table *table; | 677 | struct ctl_table *table; |
657 | struct ctl_table_header *hdr; | 678 | struct ctl_table_header *hdr; |
658 | 679 | ||
659 | table = ip4_frags_ns_ctl_table; | 680 | table = ip4_frags_ns_ctl_table; |
660 | if (net != &init_net) { | 681 | if (!net_eq(net, &init_net)) { |
661 | table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL); | 682 | table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL); |
662 | if (table == NULL) | 683 | if (table == NULL) |
663 | goto err_alloc; | 684 | goto err_alloc; |
@@ -675,13 +696,13 @@ static int ip4_frags_ns_ctl_register(struct net *net) | |||
675 | return 0; | 696 | return 0; |
676 | 697 | ||
677 | err_reg: | 698 | err_reg: |
678 | if (net != &init_net) | 699 | if (!net_eq(net, &init_net)) |
679 | kfree(table); | 700 | kfree(table); |
680 | err_alloc: | 701 | err_alloc: |
681 | return -ENOMEM; | 702 | return -ENOMEM; |
682 | } | 703 | } |
683 | 704 | ||
684 | static void ip4_frags_ns_ctl_unregister(struct net *net) | 705 | static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net) |
685 | { | 706 | { |
686 | struct ctl_table *table; | 707 | struct ctl_table *table; |
687 | 708 | ||
@@ -709,7 +730,7 @@ static inline void ip4_frags_ctl_register(void) | |||
709 | } | 730 | } |
710 | #endif | 731 | #endif |
711 | 732 | ||
712 | static int ipv4_frags_init_net(struct net *net) | 733 | static int __net_init ipv4_frags_init_net(struct net *net) |
713 | { | 734 | { |
714 | /* | 735 | /* |
715 | * Fragment cache limits. We will commit 256K at one time. Should we | 736 | * Fragment cache limits. We will commit 256K at one time. Should we |
@@ -731,7 +752,7 @@ static int ipv4_frags_init_net(struct net *net) | |||
731 | return ip4_frags_ns_ctl_register(net); | 752 | return ip4_frags_ns_ctl_register(net); |
732 | } | 753 | } |
733 | 754 | ||
734 | static void ipv4_frags_exit_net(struct net *net) | 755 | static void __net_exit ipv4_frags_exit_net(struct net *net) |
735 | { | 756 | { |
736 | ip4_frags_ns_ctl_unregister(net); | 757 | ip4_frags_ns_ctl_unregister(net); |
737 | inet_frags_exit_net(&net->ipv4.frags, &ip4_frags); | 758 | inet_frags_exit_net(&net->ipv4.frags, &ip4_frags); |
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 143333852624..fe381d12ecdd 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/types.h> | 15 | #include <linux/types.h> |
16 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
17 | #include <linux/slab.h> | ||
17 | #include <asm/uaccess.h> | 18 | #include <asm/uaccess.h> |
18 | #include <linux/skbuff.h> | 19 | #include <linux/skbuff.h> |
19 | #include <linux/netdevice.h> | 20 | #include <linux/netdevice.h> |
@@ -125,7 +126,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev); | |||
125 | 126 | ||
126 | #define HASH_SIZE 16 | 127 | #define HASH_SIZE 16 |
127 | 128 | ||
128 | static int ipgre_net_id; | 129 | static int ipgre_net_id __read_mostly; |
129 | struct ipgre_net { | 130 | struct ipgre_net { |
130 | struct ip_tunnel *tunnels[4][HASH_SIZE]; | 131 | struct ip_tunnel *tunnels[4][HASH_SIZE]; |
131 | 132 | ||
@@ -156,8 +157,13 @@ struct ipgre_net { | |||
156 | #define tunnels_r tunnels[2] | 157 | #define tunnels_r tunnels[2] |
157 | #define tunnels_l tunnels[1] | 158 | #define tunnels_l tunnels[1] |
158 | #define tunnels_wc tunnels[0] | 159 | #define tunnels_wc tunnels[0] |
160 | /* | ||
161 | * Locking : hash tables are protected by RCU and a spinlock | ||
162 | */ | ||
163 | static DEFINE_SPINLOCK(ipgre_lock); | ||
159 | 164 | ||
160 | static DEFINE_RWLOCK(ipgre_lock); | 165 | #define for_each_ip_tunnel_rcu(start) \ |
166 | for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) | ||
161 | 167 | ||
162 | /* Given src, dst and key, find appropriate for input tunnel. */ | 168 | /* Given src, dst and key, find appropriate for input tunnel. */ |
163 | 169 | ||
@@ -175,7 +181,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, | |||
175 | ARPHRD_ETHER : ARPHRD_IPGRE; | 181 | ARPHRD_ETHER : ARPHRD_IPGRE; |
176 | int score, cand_score = 4; | 182 | int score, cand_score = 4; |
177 | 183 | ||
178 | for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) { | 184 | for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) { |
179 | if (local != t->parms.iph.saddr || | 185 | if (local != t->parms.iph.saddr || |
180 | remote != t->parms.iph.daddr || | 186 | remote != t->parms.iph.daddr || |
181 | key != t->parms.i_key || | 187 | key != t->parms.i_key || |
@@ -200,7 +206,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, | |||
200 | } | 206 | } |
201 | } | 207 | } |
202 | 208 | ||
203 | for (t = ign->tunnels_r[h0^h1]; t; t = t->next) { | 209 | for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) { |
204 | if (remote != t->parms.iph.daddr || | 210 | if (remote != t->parms.iph.daddr || |
205 | key != t->parms.i_key || | 211 | key != t->parms.i_key || |
206 | !(t->dev->flags & IFF_UP)) | 212 | !(t->dev->flags & IFF_UP)) |
@@ -224,7 +230,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, | |||
224 | } | 230 | } |
225 | } | 231 | } |
226 | 232 | ||
227 | for (t = ign->tunnels_l[h1]; t; t = t->next) { | 233 | for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) { |
228 | if ((local != t->parms.iph.saddr && | 234 | if ((local != t->parms.iph.saddr && |
229 | (local != t->parms.iph.daddr || | 235 | (local != t->parms.iph.daddr || |
230 | !ipv4_is_multicast(local))) || | 236 | !ipv4_is_multicast(local))) || |
@@ -250,7 +256,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, | |||
250 | } | 256 | } |
251 | } | 257 | } |
252 | 258 | ||
253 | for (t = ign->tunnels_wc[h1]; t; t = t->next) { | 259 | for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) { |
254 | if (t->parms.i_key != key || | 260 | if (t->parms.i_key != key || |
255 | !(t->dev->flags & IFF_UP)) | 261 | !(t->dev->flags & IFF_UP)) |
256 | continue; | 262 | continue; |
@@ -276,8 +282,9 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, | |||
276 | if (cand != NULL) | 282 | if (cand != NULL) |
277 | return cand; | 283 | return cand; |
278 | 284 | ||
279 | if (ign->fb_tunnel_dev->flags & IFF_UP) | 285 | dev = ign->fb_tunnel_dev; |
280 | return netdev_priv(ign->fb_tunnel_dev); | 286 | if (dev->flags & IFF_UP) |
287 | return netdev_priv(dev); | ||
281 | 288 | ||
282 | return NULL; | 289 | return NULL; |
283 | } | 290 | } |
@@ -311,10 +318,10 @@ static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) | |||
311 | { | 318 | { |
312 | struct ip_tunnel **tp = ipgre_bucket(ign, t); | 319 | struct ip_tunnel **tp = ipgre_bucket(ign, t); |
313 | 320 | ||
321 | spin_lock_bh(&ipgre_lock); | ||
314 | t->next = *tp; | 322 | t->next = *tp; |
315 | write_lock_bh(&ipgre_lock); | 323 | rcu_assign_pointer(*tp, t); |
316 | *tp = t; | 324 | spin_unlock_bh(&ipgre_lock); |
317 | write_unlock_bh(&ipgre_lock); | ||
318 | } | 325 | } |
319 | 326 | ||
320 | static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) | 327 | static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) |
@@ -323,9 +330,9 @@ static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) | |||
323 | 330 | ||
324 | for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) { | 331 | for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) { |
325 | if (t == *tp) { | 332 | if (t == *tp) { |
326 | write_lock_bh(&ipgre_lock); | 333 | spin_lock_bh(&ipgre_lock); |
327 | *tp = t->next; | 334 | *tp = t->next; |
328 | write_unlock_bh(&ipgre_lock); | 335 | spin_unlock_bh(&ipgre_lock); |
329 | break; | 336 | break; |
330 | } | 337 | } |
331 | } | 338 | } |
@@ -476,7 +483,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) | |||
476 | break; | 483 | break; |
477 | } | 484 | } |
478 | 485 | ||
479 | read_lock(&ipgre_lock); | 486 | rcu_read_lock(); |
480 | t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, | 487 | t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, |
481 | flags & GRE_KEY ? | 488 | flags & GRE_KEY ? |
482 | *(((__be32 *)p) + (grehlen / 4) - 1) : 0, | 489 | *(((__be32 *)p) + (grehlen / 4) - 1) : 0, |
@@ -494,7 +501,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) | |||
494 | t->err_count = 1; | 501 | t->err_count = 1; |
495 | t->err_time = jiffies; | 502 | t->err_time = jiffies; |
496 | out: | 503 | out: |
497 | read_unlock(&ipgre_lock); | 504 | rcu_read_unlock(); |
498 | return; | 505 | return; |
499 | } | 506 | } |
500 | 507 | ||
@@ -573,7 +580,7 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
573 | 580 | ||
574 | gre_proto = *(__be16 *)(h + 2); | 581 | gre_proto = *(__be16 *)(h + 2); |
575 | 582 | ||
576 | read_lock(&ipgre_lock); | 583 | rcu_read_lock(); |
577 | if ((tunnel = ipgre_tunnel_lookup(skb->dev, | 584 | if ((tunnel = ipgre_tunnel_lookup(skb->dev, |
578 | iph->saddr, iph->daddr, key, | 585 | iph->saddr, iph->daddr, key, |
579 | gre_proto))) { | 586 | gre_proto))) { |
@@ -647,13 +654,13 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
647 | ipgre_ecn_decapsulate(iph, skb); | 654 | ipgre_ecn_decapsulate(iph, skb); |
648 | 655 | ||
649 | netif_rx(skb); | 656 | netif_rx(skb); |
650 | read_unlock(&ipgre_lock); | 657 | rcu_read_unlock(); |
651 | return(0); | 658 | return(0); |
652 | } | 659 | } |
653 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | 660 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); |
654 | 661 | ||
655 | drop: | 662 | drop: |
656 | read_unlock(&ipgre_lock); | 663 | rcu_read_unlock(); |
657 | drop_nolock: | 664 | drop_nolock: |
658 | kfree_skb(skb); | 665 | kfree_skb(skb); |
659 | return(0); | 666 | return(0); |
@@ -662,7 +669,8 @@ drop_nolock: | |||
662 | static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | 669 | static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) |
663 | { | 670 | { |
664 | struct ip_tunnel *tunnel = netdev_priv(dev); | 671 | struct ip_tunnel *tunnel = netdev_priv(dev); |
665 | struct net_device_stats *stats = &tunnel->dev->stats; | 672 | struct net_device_stats *stats = &dev->stats; |
673 | struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); | ||
666 | struct iphdr *old_iph = ip_hdr(skb); | 674 | struct iphdr *old_iph = ip_hdr(skb); |
667 | struct iphdr *tiph; | 675 | struct iphdr *tiph; |
668 | u8 tos; | 676 | u8 tos; |
@@ -786,7 +794,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
786 | } | 794 | } |
787 | 795 | ||
788 | if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { | 796 | if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { |
789 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); | 797 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
790 | ip_rt_put(rt); | 798 | ip_rt_put(rt); |
791 | goto tx_error; | 799 | goto tx_error; |
792 | } | 800 | } |
@@ -803,14 +811,16 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
803 | tunnel->err_count = 0; | 811 | tunnel->err_count = 0; |
804 | } | 812 | } |
805 | 813 | ||
806 | max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen; | 814 | max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len; |
807 | 815 | ||
808 | if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| | 816 | if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| |
809 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { | 817 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { |
810 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); | 818 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); |
819 | if (max_headroom > dev->needed_headroom) | ||
820 | dev->needed_headroom = max_headroom; | ||
811 | if (!new_skb) { | 821 | if (!new_skb) { |
812 | ip_rt_put(rt); | 822 | ip_rt_put(rt); |
813 | stats->tx_dropped++; | 823 | txq->tx_dropped++; |
814 | dev_kfree_skb(skb); | 824 | dev_kfree_skb(skb); |
815 | return NETDEV_TX_OK; | 825 | return NETDEV_TX_OK; |
816 | } | 826 | } |
@@ -1137,12 +1147,9 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, | |||
1137 | 1147 | ||
1138 | if (saddr) | 1148 | if (saddr) |
1139 | memcpy(&iph->saddr, saddr, 4); | 1149 | memcpy(&iph->saddr, saddr, 4); |
1140 | 1150 | if (daddr) | |
1141 | if (daddr) { | ||
1142 | memcpy(&iph->daddr, daddr, 4); | 1151 | memcpy(&iph->daddr, daddr, 4); |
1143 | return t->hlen; | 1152 | if (iph->daddr) |
1144 | } | ||
1145 | if (iph->daddr && !ipv4_is_multicast(iph->daddr)) | ||
1146 | return t->hlen; | 1153 | return t->hlen; |
1147 | 1154 | ||
1148 | return -t->hlen; | 1155 | return -t->hlen; |
@@ -1283,33 +1290,27 @@ static const struct net_protocol ipgre_protocol = { | |||
1283 | .netns_ok = 1, | 1290 | .netns_ok = 1, |
1284 | }; | 1291 | }; |
1285 | 1292 | ||
1286 | static void ipgre_destroy_tunnels(struct ipgre_net *ign) | 1293 | static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) |
1287 | { | 1294 | { |
1288 | int prio; | 1295 | int prio; |
1289 | 1296 | ||
1290 | for (prio = 0; prio < 4; prio++) { | 1297 | for (prio = 0; prio < 4; prio++) { |
1291 | int h; | 1298 | int h; |
1292 | for (h = 0; h < HASH_SIZE; h++) { | 1299 | for (h = 0; h < HASH_SIZE; h++) { |
1293 | struct ip_tunnel *t; | 1300 | struct ip_tunnel *t = ign->tunnels[prio][h]; |
1294 | while ((t = ign->tunnels[prio][h]) != NULL) | 1301 | |
1295 | unregister_netdevice(t->dev); | 1302 | while (t != NULL) { |
1303 | unregister_netdevice_queue(t->dev, head); | ||
1304 | t = t->next; | ||
1305 | } | ||
1296 | } | 1306 | } |
1297 | } | 1307 | } |
1298 | } | 1308 | } |
1299 | 1309 | ||
1300 | static int ipgre_init_net(struct net *net) | 1310 | static int __net_init ipgre_init_net(struct net *net) |
1301 | { | 1311 | { |
1312 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); | ||
1302 | int err; | 1313 | int err; |
1303 | struct ipgre_net *ign; | ||
1304 | |||
1305 | err = -ENOMEM; | ||
1306 | ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL); | ||
1307 | if (ign == NULL) | ||
1308 | goto err_alloc; | ||
1309 | |||
1310 | err = net_assign_generic(net, ipgre_net_id, ign); | ||
1311 | if (err < 0) | ||
1312 | goto err_assign; | ||
1313 | 1314 | ||
1314 | ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", | 1315 | ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", |
1315 | ipgre_tunnel_setup); | 1316 | ipgre_tunnel_setup); |
@@ -1330,27 +1331,26 @@ static int ipgre_init_net(struct net *net) | |||
1330 | err_reg_dev: | 1331 | err_reg_dev: |
1331 | free_netdev(ign->fb_tunnel_dev); | 1332 | free_netdev(ign->fb_tunnel_dev); |
1332 | err_alloc_dev: | 1333 | err_alloc_dev: |
1333 | /* nothing */ | ||
1334 | err_assign: | ||
1335 | kfree(ign); | ||
1336 | err_alloc: | ||
1337 | return err; | 1334 | return err; |
1338 | } | 1335 | } |
1339 | 1336 | ||
1340 | static void ipgre_exit_net(struct net *net) | 1337 | static void __net_exit ipgre_exit_net(struct net *net) |
1341 | { | 1338 | { |
1342 | struct ipgre_net *ign; | 1339 | struct ipgre_net *ign; |
1340 | LIST_HEAD(list); | ||
1343 | 1341 | ||
1344 | ign = net_generic(net, ipgre_net_id); | 1342 | ign = net_generic(net, ipgre_net_id); |
1345 | rtnl_lock(); | 1343 | rtnl_lock(); |
1346 | ipgre_destroy_tunnels(ign); | 1344 | ipgre_destroy_tunnels(ign, &list); |
1345 | unregister_netdevice_many(&list); | ||
1347 | rtnl_unlock(); | 1346 | rtnl_unlock(); |
1348 | kfree(ign); | ||
1349 | } | 1347 | } |
1350 | 1348 | ||
1351 | static struct pernet_operations ipgre_net_ops = { | 1349 | static struct pernet_operations ipgre_net_ops = { |
1352 | .init = ipgre_init_net, | 1350 | .init = ipgre_init_net, |
1353 | .exit = ipgre_exit_net, | 1351 | .exit = ipgre_exit_net, |
1352 | .id = &ipgre_net_id, | ||
1353 | .size = sizeof(struct ipgre_net), | ||
1354 | }; | 1354 | }; |
1355 | 1355 | ||
1356 | static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) | 1356 | static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) |
@@ -1471,7 +1471,7 @@ static void ipgre_tap_setup(struct net_device *dev) | |||
1471 | dev->features |= NETIF_F_NETNS_LOCAL; | 1471 | dev->features |= NETIF_F_NETNS_LOCAL; |
1472 | } | 1472 | } |
1473 | 1473 | ||
1474 | static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[], | 1474 | static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], |
1475 | struct nlattr *data[]) | 1475 | struct nlattr *data[]) |
1476 | { | 1476 | { |
1477 | struct ip_tunnel *nt; | 1477 | struct ip_tunnel *nt; |
@@ -1665,15 +1665,16 @@ static int __init ipgre_init(void) | |||
1665 | 1665 | ||
1666 | printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); | 1666 | printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); |
1667 | 1667 | ||
1668 | if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) { | 1668 | err = register_pernet_device(&ipgre_net_ops); |
1669 | if (err < 0) | ||
1670 | return err; | ||
1671 | |||
1672 | err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE); | ||
1673 | if (err < 0) { | ||
1669 | printk(KERN_INFO "ipgre init: can't add protocol\n"); | 1674 | printk(KERN_INFO "ipgre init: can't add protocol\n"); |
1670 | return -EAGAIN; | 1675 | goto add_proto_failed; |
1671 | } | 1676 | } |
1672 | 1677 | ||
1673 | err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops); | ||
1674 | if (err < 0) | ||
1675 | goto gen_device_failed; | ||
1676 | |||
1677 | err = rtnl_link_register(&ipgre_link_ops); | 1678 | err = rtnl_link_register(&ipgre_link_ops); |
1678 | if (err < 0) | 1679 | if (err < 0) |
1679 | goto rtnl_link_failed; | 1680 | goto rtnl_link_failed; |
@@ -1688,9 +1689,9 @@ out: | |||
1688 | tap_ops_failed: | 1689 | tap_ops_failed: |
1689 | rtnl_link_unregister(&ipgre_link_ops); | 1690 | rtnl_link_unregister(&ipgre_link_ops); |
1690 | rtnl_link_failed: | 1691 | rtnl_link_failed: |
1691 | unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); | ||
1692 | gen_device_failed: | ||
1693 | inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); | 1692 | inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); |
1693 | add_proto_failed: | ||
1694 | unregister_pernet_device(&ipgre_net_ops); | ||
1694 | goto out; | 1695 | goto out; |
1695 | } | 1696 | } |
1696 | 1697 | ||
@@ -1698,9 +1699,9 @@ static void __exit ipgre_fini(void) | |||
1698 | { | 1699 | { |
1699 | rtnl_link_unregister(&ipgre_tap_ops); | 1700 | rtnl_link_unregister(&ipgre_tap_ops); |
1700 | rtnl_link_unregister(&ipgre_link_ops); | 1701 | rtnl_link_unregister(&ipgre_link_ops); |
1701 | unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); | ||
1702 | if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) | 1702 | if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) |
1703 | printk(KERN_INFO "ipgre close: can't remove protocol\n"); | 1703 | printk(KERN_INFO "ipgre close: can't remove protocol\n"); |
1704 | unregister_pernet_device(&ipgre_net_ops); | ||
1704 | } | 1705 | } |
1705 | 1706 | ||
1706 | module_init(ipgre_init); | 1707 | module_init(ipgre_init); |
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 6c98b43badf4..f8ab7a380d4a 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
@@ -119,6 +119,7 @@ | |||
119 | #include <linux/kernel.h> | 119 | #include <linux/kernel.h> |
120 | #include <linux/string.h> | 120 | #include <linux/string.h> |
121 | #include <linux/errno.h> | 121 | #include <linux/errno.h> |
122 | #include <linux/slab.h> | ||
122 | 123 | ||
123 | #include <linux/net.h> | 124 | #include <linux/net.h> |
124 | #include <linux/socket.h> | 125 | #include <linux/socket.h> |
@@ -161,10 +162,10 @@ int ip_call_ra_chain(struct sk_buff *skb) | |||
161 | /* If socket is bound to an interface, only report | 162 | /* If socket is bound to an interface, only report |
162 | * the packet if it came from that interface. | 163 | * the packet if it came from that interface. |
163 | */ | 164 | */ |
164 | if (sk && inet_sk(sk)->num == protocol && | 165 | if (sk && inet_sk(sk)->inet_num == protocol && |
165 | (!sk->sk_bound_dev_if || | 166 | (!sk->sk_bound_dev_if || |
166 | sk->sk_bound_dev_if == dev->ifindex) && | 167 | sk->sk_bound_dev_if == dev->ifindex) && |
167 | sock_net(sk) == dev_net(dev)) { | 168 | net_eq(sock_net(sk), dev_net(dev))) { |
168 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { | 169 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { |
169 | if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { | 170 | if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { |
170 | read_unlock(&ip_ra_lock); | 171 | read_unlock(&ip_ra_lock); |
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 94bf105ef3c9..4c09a31fd140 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c | |||
@@ -11,6 +11,7 @@ | |||
11 | 11 | ||
12 | #include <linux/capability.h> | 12 | #include <linux/capability.h> |
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/slab.h> | ||
14 | #include <linux/types.h> | 15 | #include <linux/types.h> |
15 | #include <asm/uaccess.h> | 16 | #include <asm/uaccess.h> |
16 | #include <linux/skbuff.h> | 17 | #include <linux/skbuff.h> |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index f9895180f481..d1bcc9f21d4f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <linux/string.h> | 51 | #include <linux/string.h> |
52 | #include <linux/errno.h> | 52 | #include <linux/errno.h> |
53 | #include <linux/highmem.h> | 53 | #include <linux/highmem.h> |
54 | #include <linux/slab.h> | ||
54 | 55 | ||
55 | #include <linux/socket.h> | 56 | #include <linux/socket.h> |
56 | #include <linux/sockios.h> | 57 | #include <linux/sockios.h> |
@@ -119,7 +120,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb) | |||
119 | newskb->pkt_type = PACKET_LOOPBACK; | 120 | newskb->pkt_type = PACKET_LOOPBACK; |
120 | newskb->ip_summed = CHECKSUM_UNNECESSARY; | 121 | newskb->ip_summed = CHECKSUM_UNNECESSARY; |
121 | WARN_ON(!skb_dst(newskb)); | 122 | WARN_ON(!skb_dst(newskb)); |
122 | netif_rx(newskb); | 123 | netif_rx_ni(newskb); |
123 | return 0; | 124 | return 0; |
124 | } | 125 | } |
125 | 126 | ||
@@ -254,7 +255,7 @@ int ip_mc_output(struct sk_buff *skb) | |||
254 | */ | 255 | */ |
255 | 256 | ||
256 | if (rt->rt_flags&RTCF_MULTICAST) { | 257 | if (rt->rt_flags&RTCF_MULTICAST) { |
257 | if ((!sk || inet_sk(sk)->mc_loop) | 258 | if (sk_mc_loop(sk) |
258 | #ifdef CONFIG_IP_MROUTE | 259 | #ifdef CONFIG_IP_MROUTE |
259 | /* Small optimization: do not loopback not local frames, | 260 | /* Small optimization: do not loopback not local frames, |
260 | which returned after forwarding; they will be dropped | 261 | which returned after forwarding; they will be dropped |
@@ -264,9 +265,11 @@ int ip_mc_output(struct sk_buff *skb) | |||
264 | 265 | ||
265 | This check is duplicated in ip_mr_input at the moment. | 266 | This check is duplicated in ip_mr_input at the moment. |
266 | */ | 267 | */ |
267 | && ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED)) | 268 | && |
269 | ((rt->rt_flags & RTCF_LOCAL) || | ||
270 | !(IPCB(skb)->flags & IPSKB_FORWARDED)) | ||
268 | #endif | 271 | #endif |
269 | ) { | 272 | ) { |
270 | struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); | 273 | struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); |
271 | if (newskb) | 274 | if (newskb) |
272 | NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb, | 275 | NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb, |
@@ -329,7 +332,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) | |||
329 | __be32 daddr; | 332 | __be32 daddr; |
330 | 333 | ||
331 | /* Use correct destination address if we have options. */ | 334 | /* Use correct destination address if we have options. */ |
332 | daddr = inet->daddr; | 335 | daddr = inet->inet_daddr; |
333 | if(opt && opt->srr) | 336 | if(opt && opt->srr) |
334 | daddr = opt->faddr; | 337 | daddr = opt->faddr; |
335 | 338 | ||
@@ -338,13 +341,13 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) | |||
338 | .mark = sk->sk_mark, | 341 | .mark = sk->sk_mark, |
339 | .nl_u = { .ip4_u = | 342 | .nl_u = { .ip4_u = |
340 | { .daddr = daddr, | 343 | { .daddr = daddr, |
341 | .saddr = inet->saddr, | 344 | .saddr = inet->inet_saddr, |
342 | .tos = RT_CONN_FLAGS(sk) } }, | 345 | .tos = RT_CONN_FLAGS(sk) } }, |
343 | .proto = sk->sk_protocol, | 346 | .proto = sk->sk_protocol, |
344 | .flags = inet_sk_flowi_flags(sk), | 347 | .flags = inet_sk_flowi_flags(sk), |
345 | .uli_u = { .ports = | 348 | .uli_u = { .ports = |
346 | { .sport = inet->sport, | 349 | { .sport = inet->inet_sport, |
347 | .dport = inet->dport } } }; | 350 | .dport = inet->inet_dport } } }; |
348 | 351 | ||
349 | /* If this fails, retransmit mechanism of transport layer will | 352 | /* If this fails, retransmit mechanism of transport layer will |
350 | * keep trying until route appears or the connection times | 353 | * keep trying until route appears or the connection times |
@@ -379,7 +382,7 @@ packet_routed: | |||
379 | 382 | ||
380 | if (opt && opt->optlen) { | 383 | if (opt && opt->optlen) { |
381 | iph->ihl += opt->optlen >> 2; | 384 | iph->ihl += opt->optlen >> 2; |
382 | ip_options_build(skb, opt, inet->daddr, rt, 0); | 385 | ip_options_build(skb, opt, inet->inet_daddr, rt, 0); |
383 | } | 386 | } |
384 | 387 | ||
385 | ip_select_ident_more(iph, &rt->u.dst, sk, | 388 | ip_select_ident_more(iph, &rt->u.dst, sk, |
@@ -501,8 +504,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
501 | if (skb->sk) { | 504 | if (skb->sk) { |
502 | frag->sk = skb->sk; | 505 | frag->sk = skb->sk; |
503 | frag->destructor = sock_wfree; | 506 | frag->destructor = sock_wfree; |
504 | truesizes += frag->truesize; | ||
505 | } | 507 | } |
508 | truesizes += frag->truesize; | ||
506 | } | 509 | } |
507 | 510 | ||
508 | /* Everything is OK. Generate! */ | 511 | /* Everything is OK. Generate! */ |
@@ -846,7 +849,8 @@ int ip_append_data(struct sock *sk, | |||
846 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; | 849 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; |
847 | 850 | ||
848 | if (inet->cork.length + length > 0xFFFF - fragheaderlen) { | 851 | if (inet->cork.length + length > 0xFFFF - fragheaderlen) { |
849 | ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen); | 852 | ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, |
853 | mtu-exthdrlen); | ||
850 | return -EMSGSIZE; | 854 | return -EMSGSIZE; |
851 | } | 855 | } |
852 | 856 | ||
@@ -1100,7 +1104,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, | |||
1100 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; | 1104 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; |
1101 | 1105 | ||
1102 | if (inet->cork.length + size > 0xFFFF - fragheaderlen) { | 1106 | if (inet->cork.length + size > 0xFFFF - fragheaderlen) { |
1103 | ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu); | 1107 | ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, mtu); |
1104 | return -EMSGSIZE; | 1108 | return -EMSGSIZE; |
1105 | } | 1109 | } |
1106 | 1110 | ||
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index e982b5c1ee17..1e64dabbd232 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/icmp.h> | 23 | #include <linux/icmp.h> |
24 | #include <linux/inetdevice.h> | 24 | #include <linux/inetdevice.h> |
25 | #include <linux/netdevice.h> | 25 | #include <linux/netdevice.h> |
26 | #include <linux/slab.h> | ||
26 | #include <net/sock.h> | 27 | #include <net/sock.h> |
27 | #include <net/ip.h> | 28 | #include <net/ip.h> |
28 | #include <net/icmp.h> | 29 | #include <net/icmp.h> |
@@ -245,7 +246,7 @@ int ip_ra_control(struct sock *sk, unsigned char on, | |||
245 | { | 246 | { |
246 | struct ip_ra_chain *ra, *new_ra, **rap; | 247 | struct ip_ra_chain *ra, *new_ra, **rap; |
247 | 248 | ||
248 | if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num == IPPROTO_RAW) | 249 | if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW) |
249 | return -EINVAL; | 250 | return -EINVAL; |
250 | 251 | ||
251 | new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; | 252 | new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; |
@@ -451,7 +452,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
451 | (1<<IP_TTL) | (1<<IP_HDRINCL) | | 452 | (1<<IP_TTL) | (1<<IP_HDRINCL) | |
452 | (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | | 453 | (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | |
453 | (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | | 454 | (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | |
454 | (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT))) || | 455 | (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) | |
456 | (1<<IP_MINTTL))) || | ||
455 | optname == IP_MULTICAST_TTL || | 457 | optname == IP_MULTICAST_TTL || |
456 | optname == IP_MULTICAST_ALL || | 458 | optname == IP_MULTICAST_ALL || |
457 | optname == IP_MULTICAST_LOOP || | 459 | optname == IP_MULTICAST_LOOP || |
@@ -480,7 +482,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
480 | case IP_OPTIONS: | 482 | case IP_OPTIONS: |
481 | { | 483 | { |
482 | struct ip_options *opt = NULL; | 484 | struct ip_options *opt = NULL; |
483 | if (optlen > 40 || optlen < 0) | 485 | if (optlen > 40) |
484 | goto e_inval; | 486 | goto e_inval; |
485 | err = ip_options_get_from_user(sock_net(sk), &opt, | 487 | err = ip_options_get_from_user(sock_net(sk), &opt, |
486 | optval, optlen); | 488 | optval, optlen); |
@@ -492,7 +494,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
492 | if (sk->sk_family == PF_INET || | 494 | if (sk->sk_family == PF_INET || |
493 | (!((1 << sk->sk_state) & | 495 | (!((1 << sk->sk_state) & |
494 | (TCPF_LISTEN | TCPF_CLOSE)) && | 496 | (TCPF_LISTEN | TCPF_CLOSE)) && |
495 | inet->daddr != LOOPBACK4_IPV6)) { | 497 | inet->inet_daddr != LOOPBACK4_IPV6)) { |
496 | #endif | 498 | #endif |
497 | if (inet->opt) | 499 | if (inet->opt) |
498 | icsk->icsk_ext_hdr_len -= inet->opt->optlen; | 500 | icsk->icsk_ext_hdr_len -= inet->opt->optlen; |
@@ -575,7 +577,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
575 | inet->hdrincl = val ? 1 : 0; | 577 | inet->hdrincl = val ? 1 : 0; |
576 | break; | 578 | break; |
577 | case IP_MTU_DISCOVER: | 579 | case IP_MTU_DISCOVER: |
578 | if (val < 0 || val > 3) | 580 | if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE) |
579 | goto e_inval; | 581 | goto e_inval; |
580 | inet->pmtudisc = val; | 582 | inet->pmtudisc = val; |
581 | break; | 583 | break; |
@@ -936,6 +938,14 @@ mc_msf_out: | |||
936 | inet->transparent = !!val; | 938 | inet->transparent = !!val; |
937 | break; | 939 | break; |
938 | 940 | ||
941 | case IP_MINTTL: | ||
942 | if (optlen < 1) | ||
943 | goto e_inval; | ||
944 | if (val < 0 || val > 255) | ||
945 | goto e_inval; | ||
946 | inet->min_ttl = val; | ||
947 | break; | ||
948 | |||
939 | default: | 949 | default: |
940 | err = -ENOPROTOOPT; | 950 | err = -ENOPROTOOPT; |
941 | break; | 951 | break; |
@@ -1180,8 +1190,8 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, | |||
1180 | if (inet->cmsg_flags & IP_CMSG_PKTINFO) { | 1190 | if (inet->cmsg_flags & IP_CMSG_PKTINFO) { |
1181 | struct in_pktinfo info; | 1191 | struct in_pktinfo info; |
1182 | 1192 | ||
1183 | info.ipi_addr.s_addr = inet->rcv_saddr; | 1193 | info.ipi_addr.s_addr = inet->inet_rcv_saddr; |
1184 | info.ipi_spec_dst.s_addr = inet->rcv_saddr; | 1194 | info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr; |
1185 | info.ipi_ifindex = inet->mc_index; | 1195 | info.ipi_ifindex = inet->mc_index; |
1186 | put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); | 1196 | put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); |
1187 | } | 1197 | } |
@@ -1198,6 +1208,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, | |||
1198 | case IP_TRANSPARENT: | 1208 | case IP_TRANSPARENT: |
1199 | val = inet->transparent; | 1209 | val = inet->transparent; |
1200 | break; | 1210 | break; |
1211 | case IP_MINTTL: | ||
1212 | val = inet->min_ttl; | ||
1213 | break; | ||
1201 | default: | 1214 | default: |
1202 | release_sock(sk); | 1215 | release_sock(sk); |
1203 | return -ENOPROTOOPT; | 1216 | return -ENOPROTOOPT; |
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 38fbf04150ae..629067571f02 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c | |||
@@ -25,6 +25,7 @@ | |||
25 | 25 | ||
26 | static void ipcomp4_err(struct sk_buff *skb, u32 info) | 26 | static void ipcomp4_err(struct sk_buff *skb, u32 info) |
27 | { | 27 | { |
28 | struct net *net = dev_net(skb->dev); | ||
28 | __be32 spi; | 29 | __be32 spi; |
29 | struct iphdr *iph = (struct iphdr *)skb->data; | 30 | struct iphdr *iph = (struct iphdr *)skb->data; |
30 | struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); | 31 | struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); |
@@ -35,7 +36,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) | |||
35 | return; | 36 | return; |
36 | 37 | ||
37 | spi = htonl(ntohs(ipch->cpi)); | 38 | spi = htonl(ntohs(ipch->cpi)); |
38 | x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, | 39 | x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, |
39 | spi, IPPROTO_COMP, AF_INET); | 40 | spi, IPPROTO_COMP, AF_INET); |
40 | if (!x) | 41 | if (!x) |
41 | return; | 42 | return; |
@@ -47,9 +48,10 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) | |||
47 | /* We always hold one tunnel user reference to indicate a tunnel */ | 48 | /* We always hold one tunnel user reference to indicate a tunnel */ |
48 | static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) | 49 | static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) |
49 | { | 50 | { |
51 | struct net *net = xs_net(x); | ||
50 | struct xfrm_state *t; | 52 | struct xfrm_state *t; |
51 | 53 | ||
52 | t = xfrm_state_alloc(&init_net); | 54 | t = xfrm_state_alloc(net); |
53 | if (t == NULL) | 55 | if (t == NULL) |
54 | goto out; | 56 | goto out; |
55 | 57 | ||
@@ -61,6 +63,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) | |||
61 | t->props.mode = x->props.mode; | 63 | t->props.mode = x->props.mode; |
62 | t->props.saddr.a4 = x->props.saddr.a4; | 64 | t->props.saddr.a4 = x->props.saddr.a4; |
63 | t->props.flags = x->props.flags; | 65 | t->props.flags = x->props.flags; |
66 | memcpy(&t->mark, &x->mark, sizeof(t->mark)); | ||
64 | 67 | ||
65 | if (xfrm_init_state(t)) | 68 | if (xfrm_init_state(t)) |
66 | goto error; | 69 | goto error; |
@@ -82,10 +85,12 @@ error: | |||
82 | */ | 85 | */ |
83 | static int ipcomp_tunnel_attach(struct xfrm_state *x) | 86 | static int ipcomp_tunnel_attach(struct xfrm_state *x) |
84 | { | 87 | { |
88 | struct net *net = xs_net(x); | ||
85 | int err = 0; | 89 | int err = 0; |
86 | struct xfrm_state *t; | 90 | struct xfrm_state *t; |
91 | u32 mark = x->mark.v & x->mark.m; | ||
87 | 92 | ||
88 | t = xfrm_state_lookup(&init_net, (xfrm_address_t *)&x->id.daddr.a4, | 93 | t = xfrm_state_lookup(net, mark, (xfrm_address_t *)&x->id.daddr.a4, |
89 | x->props.saddr.a4, IPPROTO_IPIP, AF_INET); | 94 | x->props.saddr.a4, IPPROTO_IPIP, AF_INET); |
90 | if (!t) { | 95 | if (!t) { |
91 | t = ipcomp_tunnel_create(x); | 96 | t = ipcomp_tunnel_create(x); |
@@ -124,16 +129,12 @@ static int ipcomp4_init_state(struct xfrm_state *x) | |||
124 | if (x->props.mode == XFRM_MODE_TUNNEL) { | 129 | if (x->props.mode == XFRM_MODE_TUNNEL) { |
125 | err = ipcomp_tunnel_attach(x); | 130 | err = ipcomp_tunnel_attach(x); |
126 | if (err) | 131 | if (err) |
127 | goto error_tunnel; | 132 | goto out; |
128 | } | 133 | } |
129 | 134 | ||
130 | err = 0; | 135 | err = 0; |
131 | out: | 136 | out: |
132 | return err; | 137 | return err; |
133 | |||
134 | error_tunnel: | ||
135 | ipcomp_destroy(x); | ||
136 | goto out; | ||
137 | } | 138 | } |
138 | 139 | ||
139 | static const struct xfrm_type ipcomp_type = { | 140 | static const struct xfrm_type ipcomp_type = { |
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index f8d04c256454..067ce9e043dc 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
@@ -53,6 +53,7 @@ | |||
53 | #include <linux/root_dev.h> | 53 | #include <linux/root_dev.h> |
54 | #include <linux/delay.h> | 54 | #include <linux/delay.h> |
55 | #include <linux/nfs_fs.h> | 55 | #include <linux/nfs_fs.h> |
56 | #include <linux/slab.h> | ||
56 | #include <net/net_namespace.h> | 57 | #include <net/net_namespace.h> |
57 | #include <net/arp.h> | 58 | #include <net/arp.h> |
58 | #include <net/ip.h> | 59 | #include <net/ip.h> |
@@ -187,6 +188,16 @@ struct ic_device { | |||
187 | static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */ | 188 | static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */ |
188 | static struct net_device *ic_dev __initdata = NULL; /* Selected device */ | 189 | static struct net_device *ic_dev __initdata = NULL; /* Selected device */ |
189 | 190 | ||
191 | static bool __init ic_device_match(struct net_device *dev) | ||
192 | { | ||
193 | if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : | ||
194 | (!(dev->flags & IFF_LOOPBACK) && | ||
195 | (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) && | ||
196 | strncmp(dev->name, "dummy", 5))) | ||
197 | return true; | ||
198 | return false; | ||
199 | } | ||
200 | |||
190 | static int __init ic_open_devs(void) | 201 | static int __init ic_open_devs(void) |
191 | { | 202 | { |
192 | struct ic_device *d, **last; | 203 | struct ic_device *d, **last; |
@@ -207,10 +218,7 @@ static int __init ic_open_devs(void) | |||
207 | for_each_netdev(&init_net, dev) { | 218 | for_each_netdev(&init_net, dev) { |
208 | if (dev->flags & IFF_LOOPBACK) | 219 | if (dev->flags & IFF_LOOPBACK) |
209 | continue; | 220 | continue; |
210 | if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : | 221 | if (ic_device_match(dev)) { |
211 | (!(dev->flags & IFF_LOOPBACK) && | ||
212 | (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) && | ||
213 | strncmp(dev->name, "dummy", 5))) { | ||
214 | int able = 0; | 222 | int able = 0; |
215 | if (dev->mtu >= 364) | 223 | if (dev->mtu >= 364) |
216 | able |= IC_BOOTP; | 224 | able |= IC_BOOTP; |
@@ -228,7 +236,7 @@ static int __init ic_open_devs(void) | |||
228 | } | 236 | } |
229 | if (!(d = kmalloc(sizeof(struct ic_device), GFP_KERNEL))) { | 237 | if (!(d = kmalloc(sizeof(struct ic_device), GFP_KERNEL))) { |
230 | rtnl_unlock(); | 238 | rtnl_unlock(); |
231 | return -1; | 239 | return -ENOMEM; |
232 | } | 240 | } |
233 | d->dev = dev; | 241 | d->dev = dev; |
234 | *last = d; | 242 | *last = d; |
@@ -253,7 +261,7 @@ static int __init ic_open_devs(void) | |||
253 | printk(KERN_ERR "IP-Config: Device `%s' not found.\n", user_dev_name); | 261 | printk(KERN_ERR "IP-Config: Device `%s' not found.\n", user_dev_name); |
254 | else | 262 | else |
255 | printk(KERN_ERR "IP-Config: No network devices available.\n"); | 263 | printk(KERN_ERR "IP-Config: No network devices available.\n"); |
256 | return -1; | 264 | return -ENODEV; |
257 | } | 265 | } |
258 | return 0; | 266 | return 0; |
259 | } | 267 | } |
@@ -1172,10 +1180,9 @@ static int __init ic_dynamic(void) | |||
1172 | schedule_timeout_uninterruptible(1); | 1180 | schedule_timeout_uninterruptible(1); |
1173 | #ifdef IPCONFIG_DHCP | 1181 | #ifdef IPCONFIG_DHCP |
1174 | /* DHCP isn't done until we get a DHCPACK. */ | 1182 | /* DHCP isn't done until we get a DHCPACK. */ |
1175 | if ((ic_got_reply & IC_BOOTP) | 1183 | if ((ic_got_reply & IC_BOOTP) && |
1176 | && (ic_proto_enabled & IC_USE_DHCP) | 1184 | (ic_proto_enabled & IC_USE_DHCP) && |
1177 | && ic_dhcp_msgtype != DHCPACK) | 1185 | ic_dhcp_msgtype != DHCPACK) { |
1178 | { | ||
1179 | ic_got_reply = 0; | 1186 | ic_got_reply = 0; |
1180 | printk(","); | 1187 | printk(","); |
1181 | continue; | 1188 | continue; |
@@ -1304,6 +1311,32 @@ __be32 __init root_nfs_parse_addr(char *name) | |||
1304 | return addr; | 1311 | return addr; |
1305 | } | 1312 | } |
1306 | 1313 | ||
1314 | #define DEVICE_WAIT_MAX 12 /* 12 seconds */ | ||
1315 | |||
1316 | static int __init wait_for_devices(void) | ||
1317 | { | ||
1318 | int i; | ||
1319 | |||
1320 | msleep(CONF_PRE_OPEN); | ||
1321 | for (i = 0; i < DEVICE_WAIT_MAX; i++) { | ||
1322 | struct net_device *dev; | ||
1323 | int found = 0; | ||
1324 | |||
1325 | rtnl_lock(); | ||
1326 | for_each_netdev(&init_net, dev) { | ||
1327 | if (ic_device_match(dev)) { | ||
1328 | found = 1; | ||
1329 | break; | ||
1330 | } | ||
1331 | } | ||
1332 | rtnl_unlock(); | ||
1333 | if (found) | ||
1334 | return 0; | ||
1335 | ssleep(1); | ||
1336 | } | ||
1337 | return -ENODEV; | ||
1338 | } | ||
1339 | |||
1307 | /* | 1340 | /* |
1308 | * IP Autoconfig dispatcher. | 1341 | * IP Autoconfig dispatcher. |
1309 | */ | 1342 | */ |
@@ -1314,6 +1347,7 @@ static int __init ip_auto_config(void) | |||
1314 | #ifdef IPCONFIG_DYNAMIC | 1347 | #ifdef IPCONFIG_DYNAMIC |
1315 | int retries = CONF_OPEN_RETRIES; | 1348 | int retries = CONF_OPEN_RETRIES; |
1316 | #endif | 1349 | #endif |
1350 | int err; | ||
1317 | 1351 | ||
1318 | #ifdef CONFIG_PROC_FS | 1352 | #ifdef CONFIG_PROC_FS |
1319 | proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); | 1353 | proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); |
@@ -1326,12 +1360,15 @@ static int __init ip_auto_config(void) | |||
1326 | #ifdef IPCONFIG_DYNAMIC | 1360 | #ifdef IPCONFIG_DYNAMIC |
1327 | try_try_again: | 1361 | try_try_again: |
1328 | #endif | 1362 | #endif |
1329 | /* Give hardware a chance to settle */ | 1363 | /* Wait for devices to appear */ |
1330 | msleep(CONF_PRE_OPEN); | 1364 | err = wait_for_devices(); |
1365 | if (err) | ||
1366 | return err; | ||
1331 | 1367 | ||
1332 | /* Setup all network devices */ | 1368 | /* Setup all network devices */ |
1333 | if (ic_open_devs() < 0) | 1369 | err = ic_open_devs(); |
1334 | return -1; | 1370 | if (err) |
1371 | return err; | ||
1335 | 1372 | ||
1336 | /* Give drivers a chance to settle */ | 1373 | /* Give drivers a chance to settle */ |
1337 | ssleep(CONF_POST_OPEN); | 1374 | ssleep(CONF_POST_OPEN); |
@@ -1344,9 +1381,9 @@ static int __init ip_auto_config(void) | |||
1344 | */ | 1381 | */ |
1345 | if (ic_myaddr == NONE || | 1382 | if (ic_myaddr == NONE || |
1346 | #ifdef CONFIG_ROOT_NFS | 1383 | #ifdef CONFIG_ROOT_NFS |
1347 | (root_server_addr == NONE | 1384 | (root_server_addr == NONE && |
1348 | && ic_servaddr == NONE | 1385 | ic_servaddr == NONE && |
1349 | && ROOT_DEV == Root_NFS) || | 1386 | ROOT_DEV == Root_NFS) || |
1350 | #endif | 1387 | #endif |
1351 | ic_first_dev->next) { | 1388 | ic_first_dev->next) { |
1352 | #ifdef IPCONFIG_DYNAMIC | 1389 | #ifdef IPCONFIG_DYNAMIC |
@@ -1447,7 +1484,7 @@ late_initcall(ip_auto_config); | |||
1447 | 1484 | ||
1448 | /* | 1485 | /* |
1449 | * Decode any IP configuration options in the "ip=" or "nfsaddrs=" kernel | 1486 | * Decode any IP configuration options in the "ip=" or "nfsaddrs=" kernel |
1450 | * command line parameter. See Documentation/filesystems/nfsroot.txt. | 1487 | * command line parameter. See Documentation/filesystems/nfs/nfsroot.txt. |
1451 | */ | 1488 | */ |
1452 | static int __init ic_proto_name(char *name) | 1489 | static int __init ic_proto_name(char *name) |
1453 | { | 1490 | { |
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index ae40ed1ba560..0b27b14dcc9d 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -95,6 +95,7 @@ | |||
95 | #include <linux/module.h> | 95 | #include <linux/module.h> |
96 | #include <linux/types.h> | 96 | #include <linux/types.h> |
97 | #include <linux/kernel.h> | 97 | #include <linux/kernel.h> |
98 | #include <linux/slab.h> | ||
98 | #include <asm/uaccess.h> | 99 | #include <asm/uaccess.h> |
99 | #include <linux/skbuff.h> | 100 | #include <linux/skbuff.h> |
100 | #include <linux/netdevice.h> | 101 | #include <linux/netdevice.h> |
@@ -119,7 +120,7 @@ | |||
119 | #define HASH_SIZE 16 | 120 | #define HASH_SIZE 16 |
120 | #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) | 121 | #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) |
121 | 122 | ||
122 | static int ipip_net_id; | 123 | static int ipip_net_id __read_mostly; |
123 | struct ipip_net { | 124 | struct ipip_net { |
124 | struct ip_tunnel *tunnels_r_l[HASH_SIZE]; | 125 | struct ip_tunnel *tunnels_r_l[HASH_SIZE]; |
125 | struct ip_tunnel *tunnels_r[HASH_SIZE]; | 126 | struct ip_tunnel *tunnels_r[HASH_SIZE]; |
@@ -130,11 +131,16 @@ struct ipip_net { | |||
130 | struct net_device *fb_tunnel_dev; | 131 | struct net_device *fb_tunnel_dev; |
131 | }; | 132 | }; |
132 | 133 | ||
133 | static void ipip_fb_tunnel_init(struct net_device *dev); | ||
134 | static void ipip_tunnel_init(struct net_device *dev); | 134 | static void ipip_tunnel_init(struct net_device *dev); |
135 | static void ipip_tunnel_setup(struct net_device *dev); | 135 | static void ipip_tunnel_setup(struct net_device *dev); |
136 | 136 | ||
137 | static DEFINE_RWLOCK(ipip_lock); | 137 | /* |
138 | * Locking : hash tables are protected by RCU and a spinlock | ||
139 | */ | ||
140 | static DEFINE_SPINLOCK(ipip_lock); | ||
141 | |||
142 | #define for_each_ip_tunnel_rcu(start) \ | ||
143 | for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) | ||
138 | 144 | ||
139 | static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, | 145 | static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, |
140 | __be32 remote, __be32 local) | 146 | __be32 remote, __be32 local) |
@@ -144,20 +150,21 @@ static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, | |||
144 | struct ip_tunnel *t; | 150 | struct ip_tunnel *t; |
145 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | 151 | struct ipip_net *ipn = net_generic(net, ipip_net_id); |
146 | 152 | ||
147 | for (t = ipn->tunnels_r_l[h0^h1]; t; t = t->next) { | 153 | for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1]) |
148 | if (local == t->parms.iph.saddr && | 154 | if (local == t->parms.iph.saddr && |
149 | remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) | 155 | remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) |
150 | return t; | 156 | return t; |
151 | } | 157 | |
152 | for (t = ipn->tunnels_r[h0]; t; t = t->next) { | 158 | for_each_ip_tunnel_rcu(ipn->tunnels_r[h0]) |
153 | if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) | 159 | if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) |
154 | return t; | 160 | return t; |
155 | } | 161 | |
156 | for (t = ipn->tunnels_l[h1]; t; t = t->next) { | 162 | for_each_ip_tunnel_rcu(ipn->tunnels_l[h1]) |
157 | if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) | 163 | if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) |
158 | return t; | 164 | return t; |
159 | } | 165 | |
160 | if ((t = ipn->tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP)) | 166 | t = rcu_dereference(ipn->tunnels_wc[0]); |
167 | if (t && (t->dev->flags&IFF_UP)) | ||
161 | return t; | 168 | return t; |
162 | return NULL; | 169 | return NULL; |
163 | } | 170 | } |
@@ -193,9 +200,9 @@ static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) | |||
193 | 200 | ||
194 | for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) { | 201 | for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) { |
195 | if (t == *tp) { | 202 | if (t == *tp) { |
196 | write_lock_bh(&ipip_lock); | 203 | spin_lock_bh(&ipip_lock); |
197 | *tp = t->next; | 204 | *tp = t->next; |
198 | write_unlock_bh(&ipip_lock); | 205 | spin_unlock_bh(&ipip_lock); |
199 | break; | 206 | break; |
200 | } | 207 | } |
201 | } | 208 | } |
@@ -205,10 +212,10 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) | |||
205 | { | 212 | { |
206 | struct ip_tunnel **tp = ipip_bucket(ipn, t); | 213 | struct ip_tunnel **tp = ipip_bucket(ipn, t); |
207 | 214 | ||
215 | spin_lock_bh(&ipip_lock); | ||
208 | t->next = *tp; | 216 | t->next = *tp; |
209 | write_lock_bh(&ipip_lock); | 217 | rcu_assign_pointer(*tp, t); |
210 | *tp = t; | 218 | spin_unlock_bh(&ipip_lock); |
211 | write_unlock_bh(&ipip_lock); | ||
212 | } | 219 | } |
213 | 220 | ||
214 | static struct ip_tunnel * ipip_tunnel_locate(struct net *net, | 221 | static struct ip_tunnel * ipip_tunnel_locate(struct net *net, |
@@ -267,9 +274,9 @@ static void ipip_tunnel_uninit(struct net_device *dev) | |||
267 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | 274 | struct ipip_net *ipn = net_generic(net, ipip_net_id); |
268 | 275 | ||
269 | if (dev == ipn->fb_tunnel_dev) { | 276 | if (dev == ipn->fb_tunnel_dev) { |
270 | write_lock_bh(&ipip_lock); | 277 | spin_lock_bh(&ipip_lock); |
271 | ipn->tunnels_wc[0] = NULL; | 278 | ipn->tunnels_wc[0] = NULL; |
272 | write_unlock_bh(&ipip_lock); | 279 | spin_unlock_bh(&ipip_lock); |
273 | } else | 280 | } else |
274 | ipip_tunnel_unlink(ipn, netdev_priv(dev)); | 281 | ipip_tunnel_unlink(ipn, netdev_priv(dev)); |
275 | dev_put(dev); | 282 | dev_put(dev); |
@@ -318,7 +325,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) | |||
318 | 325 | ||
319 | err = -ENOENT; | 326 | err = -ENOENT; |
320 | 327 | ||
321 | read_lock(&ipip_lock); | 328 | rcu_read_lock(); |
322 | t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); | 329 | t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); |
323 | if (t == NULL || t->parms.iph.daddr == 0) | 330 | if (t == NULL || t->parms.iph.daddr == 0) |
324 | goto out; | 331 | goto out; |
@@ -333,7 +340,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) | |||
333 | t->err_count = 1; | 340 | t->err_count = 1; |
334 | t->err_time = jiffies; | 341 | t->err_time = jiffies; |
335 | out: | 342 | out: |
336 | read_unlock(&ipip_lock); | 343 | rcu_read_unlock(); |
337 | return err; | 344 | return err; |
338 | } | 345 | } |
339 | 346 | ||
@@ -351,11 +358,11 @@ static int ipip_rcv(struct sk_buff *skb) | |||
351 | struct ip_tunnel *tunnel; | 358 | struct ip_tunnel *tunnel; |
352 | const struct iphdr *iph = ip_hdr(skb); | 359 | const struct iphdr *iph = ip_hdr(skb); |
353 | 360 | ||
354 | read_lock(&ipip_lock); | 361 | rcu_read_lock(); |
355 | if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev), | 362 | if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev), |
356 | iph->saddr, iph->daddr)) != NULL) { | 363 | iph->saddr, iph->daddr)) != NULL) { |
357 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { | 364 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { |
358 | read_unlock(&ipip_lock); | 365 | rcu_read_unlock(); |
359 | kfree_skb(skb); | 366 | kfree_skb(skb); |
360 | return 0; | 367 | return 0; |
361 | } | 368 | } |
@@ -374,10 +381,10 @@ static int ipip_rcv(struct sk_buff *skb) | |||
374 | nf_reset(skb); | 381 | nf_reset(skb); |
375 | ipip_ecn_decapsulate(iph, skb); | 382 | ipip_ecn_decapsulate(iph, skb); |
376 | netif_rx(skb); | 383 | netif_rx(skb); |
377 | read_unlock(&ipip_lock); | 384 | rcu_read_unlock(); |
378 | return 0; | 385 | return 0; |
379 | } | 386 | } |
380 | read_unlock(&ipip_lock); | 387 | rcu_read_unlock(); |
381 | 388 | ||
382 | return -1; | 389 | return -1; |
383 | } | 390 | } |
@@ -390,7 +397,8 @@ static int ipip_rcv(struct sk_buff *skb) | |||
390 | static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | 397 | static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) |
391 | { | 398 | { |
392 | struct ip_tunnel *tunnel = netdev_priv(dev); | 399 | struct ip_tunnel *tunnel = netdev_priv(dev); |
393 | struct net_device_stats *stats = &tunnel->dev->stats; | 400 | struct net_device_stats *stats = &dev->stats; |
401 | struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); | ||
394 | struct iphdr *tiph = &tunnel->parms.iph; | 402 | struct iphdr *tiph = &tunnel->parms.iph; |
395 | u8 tos = tunnel->parms.iph.tos; | 403 | u8 tos = tunnel->parms.iph.tos; |
396 | __be16 df = tiph->frag_off; | 404 | __be16 df = tiph->frag_off; |
@@ -480,7 +488,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
480 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); | 488 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); |
481 | if (!new_skb) { | 489 | if (!new_skb) { |
482 | ip_rt_put(rt); | 490 | ip_rt_put(rt); |
483 | stats->tx_dropped++; | 491 | txq->tx_dropped++; |
484 | dev_kfree_skb(skb); | 492 | dev_kfree_skb(skb); |
485 | return NETDEV_TX_OK; | 493 | return NETDEV_TX_OK; |
486 | } | 494 | } |
@@ -722,7 +730,7 @@ static void ipip_tunnel_init(struct net_device *dev) | |||
722 | ipip_tunnel_bind_dev(dev); | 730 | ipip_tunnel_bind_dev(dev); |
723 | } | 731 | } |
724 | 732 | ||
725 | static void ipip_fb_tunnel_init(struct net_device *dev) | 733 | static void __net_init ipip_fb_tunnel_init(struct net_device *dev) |
726 | { | 734 | { |
727 | struct ip_tunnel *tunnel = netdev_priv(dev); | 735 | struct ip_tunnel *tunnel = netdev_priv(dev); |
728 | struct iphdr *iph = &tunnel->parms.iph; | 736 | struct iphdr *iph = &tunnel->parms.iph; |
@@ -748,33 +756,27 @@ static struct xfrm_tunnel ipip_handler = { | |||
748 | static const char banner[] __initconst = | 756 | static const char banner[] __initconst = |
749 | KERN_INFO "IPv4 over IPv4 tunneling driver\n"; | 757 | KERN_INFO "IPv4 over IPv4 tunneling driver\n"; |
750 | 758 | ||
751 | static void ipip_destroy_tunnels(struct ipip_net *ipn) | 759 | static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head) |
752 | { | 760 | { |
753 | int prio; | 761 | int prio; |
754 | 762 | ||
755 | for (prio = 1; prio < 4; prio++) { | 763 | for (prio = 1; prio < 4; prio++) { |
756 | int h; | 764 | int h; |
757 | for (h = 0; h < HASH_SIZE; h++) { | 765 | for (h = 0; h < HASH_SIZE; h++) { |
758 | struct ip_tunnel *t; | 766 | struct ip_tunnel *t = ipn->tunnels[prio][h]; |
759 | while ((t = ipn->tunnels[prio][h]) != NULL) | 767 | |
760 | unregister_netdevice(t->dev); | 768 | while (t != NULL) { |
769 | unregister_netdevice_queue(t->dev, head); | ||
770 | t = t->next; | ||
771 | } | ||
761 | } | 772 | } |
762 | } | 773 | } |
763 | } | 774 | } |
764 | 775 | ||
765 | static int ipip_init_net(struct net *net) | 776 | static int __net_init ipip_init_net(struct net *net) |
766 | { | 777 | { |
778 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
767 | int err; | 779 | int err; |
768 | struct ipip_net *ipn; | ||
769 | |||
770 | err = -ENOMEM; | ||
771 | ipn = kzalloc(sizeof(struct ipip_net), GFP_KERNEL); | ||
772 | if (ipn == NULL) | ||
773 | goto err_alloc; | ||
774 | |||
775 | err = net_assign_generic(net, ipip_net_id, ipn); | ||
776 | if (err < 0) | ||
777 | goto err_assign; | ||
778 | 780 | ||
779 | ipn->tunnels[0] = ipn->tunnels_wc; | 781 | ipn->tunnels[0] = ipn->tunnels_wc; |
780 | ipn->tunnels[1] = ipn->tunnels_l; | 782 | ipn->tunnels[1] = ipn->tunnels_l; |
@@ -801,27 +803,26 @@ err_reg_dev: | |||
801 | free_netdev(ipn->fb_tunnel_dev); | 803 | free_netdev(ipn->fb_tunnel_dev); |
802 | err_alloc_dev: | 804 | err_alloc_dev: |
803 | /* nothing */ | 805 | /* nothing */ |
804 | err_assign: | ||
805 | kfree(ipn); | ||
806 | err_alloc: | ||
807 | return err; | 806 | return err; |
808 | } | 807 | } |
809 | 808 | ||
810 | static void ipip_exit_net(struct net *net) | 809 | static void __net_exit ipip_exit_net(struct net *net) |
811 | { | 810 | { |
812 | struct ipip_net *ipn; | 811 | struct ipip_net *ipn = net_generic(net, ipip_net_id); |
812 | LIST_HEAD(list); | ||
813 | 813 | ||
814 | ipn = net_generic(net, ipip_net_id); | ||
815 | rtnl_lock(); | 814 | rtnl_lock(); |
816 | ipip_destroy_tunnels(ipn); | 815 | ipip_destroy_tunnels(ipn, &list); |
817 | unregister_netdevice(ipn->fb_tunnel_dev); | 816 | unregister_netdevice_queue(ipn->fb_tunnel_dev, &list); |
817 | unregister_netdevice_many(&list); | ||
818 | rtnl_unlock(); | 818 | rtnl_unlock(); |
819 | kfree(ipn); | ||
820 | } | 819 | } |
821 | 820 | ||
822 | static struct pernet_operations ipip_net_ops = { | 821 | static struct pernet_operations ipip_net_ops = { |
823 | .init = ipip_init_net, | 822 | .init = ipip_init_net, |
824 | .exit = ipip_exit_net, | 823 | .exit = ipip_exit_net, |
824 | .id = &ipip_net_id, | ||
825 | .size = sizeof(struct ipip_net), | ||
825 | }; | 826 | }; |
826 | 827 | ||
827 | static int __init ipip_init(void) | 828 | static int __init ipip_init(void) |
@@ -830,15 +831,14 @@ static int __init ipip_init(void) | |||
830 | 831 | ||
831 | printk(banner); | 832 | printk(banner); |
832 | 833 | ||
833 | if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) { | 834 | err = register_pernet_device(&ipip_net_ops); |
835 | if (err < 0) | ||
836 | return err; | ||
837 | err = xfrm4_tunnel_register(&ipip_handler, AF_INET); | ||
838 | if (err < 0) { | ||
839 | unregister_pernet_device(&ipip_net_ops); | ||
834 | printk(KERN_INFO "ipip init: can't register tunnel\n"); | 840 | printk(KERN_INFO "ipip init: can't register tunnel\n"); |
835 | return -EAGAIN; | ||
836 | } | 841 | } |
837 | |||
838 | err = register_pernet_gen_device(&ipip_net_id, &ipip_net_ops); | ||
839 | if (err) | ||
840 | xfrm4_tunnel_deregister(&ipip_handler, AF_INET); | ||
841 | |||
842 | return err; | 842 | return err; |
843 | } | 843 | } |
844 | 844 | ||
@@ -847,7 +847,7 @@ static void __exit ipip_fini(void) | |||
847 | if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) | 847 | if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) |
848 | printk(KERN_INFO "ipip close: can't deregister tunnel\n"); | 848 | printk(KERN_INFO "ipip close: can't deregister tunnel\n"); |
849 | 849 | ||
850 | unregister_pernet_gen_device(ipip_net_id, &ipip_net_ops); | 850 | unregister_pernet_device(&ipip_net_ops); |
851 | } | 851 | } |
852 | 852 | ||
853 | module_init(ipip_init); | 853 | module_init(ipip_init); |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 99508d66a642..ec19a890c9a0 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/mroute.h> | 47 | #include <linux/mroute.h> |
48 | #include <linux/init.h> | 48 | #include <linux/init.h> |
49 | #include <linux/if_ether.h> | 49 | #include <linux/if_ether.h> |
50 | #include <linux/slab.h> | ||
50 | #include <net/net_namespace.h> | 51 | #include <net/net_namespace.h> |
51 | #include <net/ip.h> | 52 | #include <net/ip.h> |
52 | #include <net/protocol.h> | 53 | #include <net/protocol.h> |
@@ -275,7 +276,8 @@ failure: | |||
275 | * @notify: Set to 1, if the caller is a notifier_call | 276 | * @notify: Set to 1, if the caller is a notifier_call |
276 | */ | 277 | */ |
277 | 278 | ||
278 | static int vif_delete(struct net *net, int vifi, int notify) | 279 | static int vif_delete(struct net *net, int vifi, int notify, |
280 | struct list_head *head) | ||
279 | { | 281 | { |
280 | struct vif_device *v; | 282 | struct vif_device *v; |
281 | struct net_device *dev; | 283 | struct net_device *dev; |
@@ -319,7 +321,7 @@ static int vif_delete(struct net *net, int vifi, int notify) | |||
319 | } | 321 | } |
320 | 322 | ||
321 | if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify) | 323 | if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify) |
322 | unregister_netdevice(dev); | 324 | unregister_netdevice_queue(dev, head); |
323 | 325 | ||
324 | dev_put(dev); | 326 | dev_put(dev); |
325 | return 0; | 327 | return 0; |
@@ -469,8 +471,18 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock) | |||
469 | return err; | 471 | return err; |
470 | } | 472 | } |
471 | break; | 473 | break; |
474 | |||
475 | case VIFF_USE_IFINDEX: | ||
472 | case 0: | 476 | case 0: |
473 | dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); | 477 | if (vifc->vifc_flags == VIFF_USE_IFINDEX) { |
478 | dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); | ||
479 | if (dev && dev->ip_ptr == NULL) { | ||
480 | dev_put(dev); | ||
481 | return -EADDRNOTAVAIL; | ||
482 | } | ||
483 | } else | ||
484 | dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); | ||
485 | |||
474 | if (!dev) | 486 | if (!dev) |
475 | return -EADDRNOTAVAIL; | 487 | return -EADDRNOTAVAIL; |
476 | err = dev_set_allmulti(dev, 1); | 488 | err = dev_set_allmulti(dev, 1); |
@@ -742,7 +754,8 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb) | |||
742 | c->next = mfc_unres_queue; | 754 | c->next = mfc_unres_queue; |
743 | mfc_unres_queue = c; | 755 | mfc_unres_queue = c; |
744 | 756 | ||
745 | mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires); | 757 | if (atomic_read(&net->ipv4.cache_resolve_queue_len) == 1) |
758 | mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires); | ||
746 | } | 759 | } |
747 | 760 | ||
748 | /* | 761 | /* |
@@ -791,6 +804,9 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock) | |||
791 | int line; | 804 | int line; |
792 | struct mfc_cache *uc, *c, **cp; | 805 | struct mfc_cache *uc, *c, **cp; |
793 | 806 | ||
807 | if (mfc->mfcc_parent >= MAXVIFS) | ||
808 | return -ENFILE; | ||
809 | |||
794 | line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); | 810 | line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); |
795 | 811 | ||
796 | for (cp = &net->ipv4.mfc_cache_array[line]; | 812 | for (cp = &net->ipv4.mfc_cache_array[line]; |
@@ -862,14 +878,16 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock) | |||
862 | static void mroute_clean_tables(struct net *net) | 878 | static void mroute_clean_tables(struct net *net) |
863 | { | 879 | { |
864 | int i; | 880 | int i; |
881 | LIST_HEAD(list); | ||
865 | 882 | ||
866 | /* | 883 | /* |
867 | * Shut down all active vif entries | 884 | * Shut down all active vif entries |
868 | */ | 885 | */ |
869 | for (i = 0; i < net->ipv4.maxvif; i++) { | 886 | for (i = 0; i < net->ipv4.maxvif; i++) { |
870 | if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC)) | 887 | if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC)) |
871 | vif_delete(net, i, 0); | 888 | vif_delete(net, i, 0, &list); |
872 | } | 889 | } |
890 | unregister_netdevice_many(&list); | ||
873 | 891 | ||
874 | /* | 892 | /* |
875 | * Wipe the cache | 893 | * Wipe the cache |
@@ -948,7 +966,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi | |||
948 | switch (optname) { | 966 | switch (optname) { |
949 | case MRT_INIT: | 967 | case MRT_INIT: |
950 | if (sk->sk_type != SOCK_RAW || | 968 | if (sk->sk_type != SOCK_RAW || |
951 | inet_sk(sk)->num != IPPROTO_IGMP) | 969 | inet_sk(sk)->inet_num != IPPROTO_IGMP) |
952 | return -EOPNOTSUPP; | 970 | return -EOPNOTSUPP; |
953 | if (optlen != sizeof(int)) | 971 | if (optlen != sizeof(int)) |
954 | return -ENOPROTOOPT; | 972 | return -ENOPROTOOPT; |
@@ -985,7 +1003,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi | |||
985 | if (optname == MRT_ADD_VIF) { | 1003 | if (optname == MRT_ADD_VIF) { |
986 | ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk); | 1004 | ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk); |
987 | } else { | 1005 | } else { |
988 | ret = vif_delete(net, vif.vifc_vifi, 0); | 1006 | ret = vif_delete(net, vif.vifc_vifi, 0, NULL); |
989 | } | 1007 | } |
990 | rtnl_unlock(); | 1008 | rtnl_unlock(); |
991 | return ret; | 1009 | return ret; |
@@ -1148,17 +1166,16 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v | |||
1148 | struct net *net = dev_net(dev); | 1166 | struct net *net = dev_net(dev); |
1149 | struct vif_device *v; | 1167 | struct vif_device *v; |
1150 | int ct; | 1168 | int ct; |
1151 | 1169 | LIST_HEAD(list); | |
1152 | if (!net_eq(dev_net(dev), net)) | ||
1153 | return NOTIFY_DONE; | ||
1154 | 1170 | ||
1155 | if (event != NETDEV_UNREGISTER) | 1171 | if (event != NETDEV_UNREGISTER) |
1156 | return NOTIFY_DONE; | 1172 | return NOTIFY_DONE; |
1157 | v = &net->ipv4.vif_table[0]; | 1173 | v = &net->ipv4.vif_table[0]; |
1158 | for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) { | 1174 | for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) { |
1159 | if (v->dev == dev) | 1175 | if (v->dev == dev) |
1160 | vif_delete(net, ct, 1); | 1176 | vif_delete(net, ct, 1, &list); |
1161 | } | 1177 | } |
1178 | unregister_netdevice_many(&list); | ||
1162 | return NOTIFY_DONE; | 1179 | return NOTIFY_DONE; |
1163 | } | 1180 | } |
1164 | 1181 | ||
@@ -1601,17 +1618,20 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) | |||
1601 | int ct; | 1618 | int ct; |
1602 | struct rtnexthop *nhp; | 1619 | struct rtnexthop *nhp; |
1603 | struct net *net = mfc_net(c); | 1620 | struct net *net = mfc_net(c); |
1604 | struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev; | ||
1605 | u8 *b = skb_tail_pointer(skb); | 1621 | u8 *b = skb_tail_pointer(skb); |
1606 | struct rtattr *mp_head; | 1622 | struct rtattr *mp_head; |
1607 | 1623 | ||
1608 | if (dev) | 1624 | /* If cache is unresolved, don't try to parse IIF and OIF */ |
1609 | RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex); | 1625 | if (c->mfc_parent > MAXVIFS) |
1626 | return -ENOENT; | ||
1627 | |||
1628 | if (VIF_EXISTS(net, c->mfc_parent)) | ||
1629 | RTA_PUT(skb, RTA_IIF, 4, &net->ipv4.vif_table[c->mfc_parent].dev->ifindex); | ||
1610 | 1630 | ||
1611 | mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); | 1631 | mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); |
1612 | 1632 | ||
1613 | for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { | 1633 | for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { |
1614 | if (c->mfc_un.res.ttls[ct] < 255) { | 1634 | if (VIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) { |
1615 | if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) | 1635 | if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) |
1616 | goto rtattr_failure; | 1636 | goto rtattr_failure; |
1617 | nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); | 1637 | nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); |
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 1725dc0ef688..82fb43c5c59e 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/netfilter_ipv4.h> | 4 | #include <linux/netfilter_ipv4.h> |
5 | #include <linux/ip.h> | 5 | #include <linux/ip.h> |
6 | #include <linux/skbuff.h> | 6 | #include <linux/skbuff.h> |
7 | #include <linux/gfp.h> | ||
7 | #include <net/route.h> | 8 | #include <net/route.h> |
8 | #include <net/xfrm.h> | 9 | #include <net/xfrm.h> |
9 | #include <net/ip.h> | 10 | #include <net/ip.h> |
@@ -155,10 +156,10 @@ static int nf_ip_reroute(struct sk_buff *skb, | |||
155 | if (entry->hook == NF_INET_LOCAL_OUT) { | 156 | if (entry->hook == NF_INET_LOCAL_OUT) { |
156 | const struct iphdr *iph = ip_hdr(skb); | 157 | const struct iphdr *iph = ip_hdr(skb); |
157 | 158 | ||
158 | if (!(iph->tos == rt_info->tos | 159 | if (!(iph->tos == rt_info->tos && |
159 | && skb->mark == rt_info->mark | 160 | skb->mark == rt_info->mark && |
160 | && iph->daddr == rt_info->daddr | 161 | iph->daddr == rt_info->daddr && |
161 | && iph->saddr == rt_info->saddr)) | 162 | iph->saddr == rt_info->saddr)) |
162 | return ip_route_me_harder(skb, RTN_UNSPEC); | 163 | return ip_route_me_harder(skb, RTN_UNSPEC); |
163 | } | 164 | } |
164 | return 0; | 165 | return 0; |
@@ -248,9 +249,9 @@ module_exit(ipv4_netfilter_fini); | |||
248 | 249 | ||
249 | #ifdef CONFIG_SYSCTL | 250 | #ifdef CONFIG_SYSCTL |
250 | struct ctl_path nf_net_ipv4_netfilter_sysctl_path[] = { | 251 | struct ctl_path nf_net_ipv4_netfilter_sysctl_path[] = { |
251 | { .procname = "net", .ctl_name = CTL_NET, }, | 252 | { .procname = "net", }, |
252 | { .procname = "ipv4", .ctl_name = NET_IPV4, }, | 253 | { .procname = "ipv4", }, |
253 | { .procname = "netfilter", .ctl_name = NET_IPV4_NETFILTER, }, | 254 | { .procname = "netfilter", }, |
254 | { } | 255 | { } |
255 | }; | 256 | }; |
256 | EXPORT_SYMBOL_GPL(nf_net_ipv4_netfilter_sysctl_path); | 257 | EXPORT_SYMBOL_GPL(nf_net_ipv4_netfilter_sysctl_path); |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 27774c99d888..f07d77f65751 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -27,6 +27,7 @@ | |||
27 | 27 | ||
28 | #include <linux/netfilter/x_tables.h> | 28 | #include <linux/netfilter/x_tables.h> |
29 | #include <linux/netfilter_arp/arp_tables.h> | 29 | #include <linux/netfilter_arp/arp_tables.h> |
30 | #include "../../netfilter/xt_repldata.h" | ||
30 | 31 | ||
31 | MODULE_LICENSE("GPL"); | 32 | MODULE_LICENSE("GPL"); |
32 | MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); | 33 | MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); |
@@ -58,6 +59,12 @@ do { \ | |||
58 | #define ARP_NF_ASSERT(x) | 59 | #define ARP_NF_ASSERT(x) |
59 | #endif | 60 | #endif |
60 | 61 | ||
62 | void *arpt_alloc_initial_table(const struct xt_table *info) | ||
63 | { | ||
64 | return xt_alloc_initial_table(arpt, ARPT); | ||
65 | } | ||
66 | EXPORT_SYMBOL_GPL(arpt_alloc_initial_table); | ||
67 | |||
61 | static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, | 68 | static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, |
62 | const char *hdr_addr, int len) | 69 | const char *hdr_addr, int len) |
63 | { | 70 | { |
@@ -226,7 +233,14 @@ arpt_error(struct sk_buff *skb, const struct xt_target_param *par) | |||
226 | return NF_DROP; | 233 | return NF_DROP; |
227 | } | 234 | } |
228 | 235 | ||
229 | static inline struct arpt_entry *get_entry(void *base, unsigned int offset) | 236 | static inline const struct arpt_entry_target * |
237 | arpt_get_target_c(const struct arpt_entry *e) | ||
238 | { | ||
239 | return arpt_get_target((struct arpt_entry *)e); | ||
240 | } | ||
241 | |||
242 | static inline struct arpt_entry * | ||
243 | get_entry(const void *base, unsigned int offset) | ||
230 | { | 244 | { |
231 | return (struct arpt_entry *)(base + offset); | 245 | return (struct arpt_entry *)(base + offset); |
232 | } | 246 | } |
@@ -273,7 +287,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
273 | 287 | ||
274 | arp = arp_hdr(skb); | 288 | arp = arp_hdr(skb); |
275 | do { | 289 | do { |
276 | struct arpt_entry_target *t; | 290 | const struct arpt_entry_target *t; |
277 | int hdr_len; | 291 | int hdr_len; |
278 | 292 | ||
279 | if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { | 293 | if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { |
@@ -285,7 +299,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
285 | (2 * skb->dev->addr_len); | 299 | (2 * skb->dev->addr_len); |
286 | ADD_COUNTER(e->counters, hdr_len, 1); | 300 | ADD_COUNTER(e->counters, hdr_len, 1); |
287 | 301 | ||
288 | t = arpt_get_target(e); | 302 | t = arpt_get_target_c(e); |
289 | 303 | ||
290 | /* Standard target? */ | 304 | /* Standard target? */ |
291 | if (!t->u.kernel.target->target) { | 305 | if (!t->u.kernel.target->target) { |
@@ -351,7 +365,7 @@ static inline bool unconditional(const struct arpt_arp *arp) | |||
351 | /* Figures out from what hook each rule can be called: returns 0 if | 365 | /* Figures out from what hook each rule can be called: returns 0 if |
352 | * there are loops. Puts hook bitmask in comefrom. | 366 | * there are loops. Puts hook bitmask in comefrom. |
353 | */ | 367 | */ |
354 | static int mark_source_chains(struct xt_table_info *newinfo, | 368 | static int mark_source_chains(const struct xt_table_info *newinfo, |
355 | unsigned int valid_hooks, void *entry0) | 369 | unsigned int valid_hooks, void *entry0) |
356 | { | 370 | { |
357 | unsigned int hook; | 371 | unsigned int hook; |
@@ -372,7 +386,7 @@ static int mark_source_chains(struct xt_table_info *newinfo, | |||
372 | 386 | ||
373 | for (;;) { | 387 | for (;;) { |
374 | const struct arpt_standard_target *t | 388 | const struct arpt_standard_target *t |
375 | = (void *)arpt_get_target(e); | 389 | = (void *)arpt_get_target_c(e); |
376 | int visited = e->comefrom & (1 << hook); | 390 | int visited = e->comefrom & (1 << hook); |
377 | 391 | ||
378 | if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) { | 392 | if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) { |
@@ -384,11 +398,11 @@ static int mark_source_chains(struct xt_table_info *newinfo, | |||
384 | |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); | 398 | |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); |
385 | 399 | ||
386 | /* Unconditional return/END. */ | 400 | /* Unconditional return/END. */ |
387 | if ((e->target_offset == sizeof(struct arpt_entry) | 401 | if ((e->target_offset == sizeof(struct arpt_entry) && |
388 | && (strcmp(t->target.u.user.name, | 402 | (strcmp(t->target.u.user.name, |
389 | ARPT_STANDARD_TARGET) == 0) | 403 | ARPT_STANDARD_TARGET) == 0) && |
390 | && t->verdict < 0 | 404 | t->verdict < 0 && unconditional(&e->arp)) || |
391 | && unconditional(&e->arp)) || visited) { | 405 | visited) { |
392 | unsigned int oldpos, size; | 406 | unsigned int oldpos, size; |
393 | 407 | ||
394 | if ((strcmp(t->target.u.user.name, | 408 | if ((strcmp(t->target.u.user.name, |
@@ -427,8 +441,8 @@ static int mark_source_chains(struct xt_table_info *newinfo, | |||
427 | int newpos = t->verdict; | 441 | int newpos = t->verdict; |
428 | 442 | ||
429 | if (strcmp(t->target.u.user.name, | 443 | if (strcmp(t->target.u.user.name, |
430 | ARPT_STANDARD_TARGET) == 0 | 444 | ARPT_STANDARD_TARGET) == 0 && |
431 | && newpos >= 0) { | 445 | newpos >= 0) { |
432 | if (newpos > newinfo->size - | 446 | if (newpos > newinfo->size - |
433 | sizeof(struct arpt_entry)) { | 447 | sizeof(struct arpt_entry)) { |
434 | duprintf("mark_source_chains: " | 448 | duprintf("mark_source_chains: " |
@@ -456,7 +470,7 @@ static int mark_source_chains(struct xt_table_info *newinfo, | |||
456 | return 1; | 470 | return 1; |
457 | } | 471 | } |
458 | 472 | ||
459 | static inline int check_entry(struct arpt_entry *e, const char *name) | 473 | static inline int check_entry(const struct arpt_entry *e, const char *name) |
460 | { | 474 | { |
461 | const struct arpt_entry_target *t; | 475 | const struct arpt_entry_target *t; |
462 | 476 | ||
@@ -468,7 +482,7 @@ static inline int check_entry(struct arpt_entry *e, const char *name) | |||
468 | if (e->target_offset + sizeof(struct arpt_entry_target) > e->next_offset) | 482 | if (e->target_offset + sizeof(struct arpt_entry_target) > e->next_offset) |
469 | return -EINVAL; | 483 | return -EINVAL; |
470 | 484 | ||
471 | t = arpt_get_target(e); | 485 | t = arpt_get_target_c(e); |
472 | if (e->target_offset + t->u.target_size > e->next_offset) | 486 | if (e->target_offset + t->u.target_size > e->next_offset) |
473 | return -EINVAL; | 487 | return -EINVAL; |
474 | 488 | ||
@@ -498,8 +512,7 @@ static inline int check_target(struct arpt_entry *e, const char *name) | |||
498 | } | 512 | } |
499 | 513 | ||
500 | static inline int | 514 | static inline int |
501 | find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, | 515 | find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) |
502 | unsigned int *i) | ||
503 | { | 516 | { |
504 | struct arpt_entry_target *t; | 517 | struct arpt_entry_target *t; |
505 | struct xt_target *target; | 518 | struct xt_target *target; |
@@ -524,8 +537,6 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, | |||
524 | ret = check_target(e, name); | 537 | ret = check_target(e, name); |
525 | if (ret) | 538 | if (ret) |
526 | goto err; | 539 | goto err; |
527 | |||
528 | (*i)++; | ||
529 | return 0; | 540 | return 0; |
530 | err: | 541 | err: |
531 | module_put(t->u.kernel.target->me); | 542 | module_put(t->u.kernel.target->me); |
@@ -533,14 +544,14 @@ out: | |||
533 | return ret; | 544 | return ret; |
534 | } | 545 | } |
535 | 546 | ||
536 | static bool check_underflow(struct arpt_entry *e) | 547 | static bool check_underflow(const struct arpt_entry *e) |
537 | { | 548 | { |
538 | const struct arpt_entry_target *t; | 549 | const struct arpt_entry_target *t; |
539 | unsigned int verdict; | 550 | unsigned int verdict; |
540 | 551 | ||
541 | if (!unconditional(&e->arp)) | 552 | if (!unconditional(&e->arp)) |
542 | return false; | 553 | return false; |
543 | t = arpt_get_target(e); | 554 | t = arpt_get_target_c(e); |
544 | if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) | 555 | if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) |
545 | return false; | 556 | return false; |
546 | verdict = ((struct arpt_standard_target *)t)->verdict; | 557 | verdict = ((struct arpt_standard_target *)t)->verdict; |
@@ -550,17 +561,16 @@ static bool check_underflow(struct arpt_entry *e) | |||
550 | 561 | ||
551 | static inline int check_entry_size_and_hooks(struct arpt_entry *e, | 562 | static inline int check_entry_size_and_hooks(struct arpt_entry *e, |
552 | struct xt_table_info *newinfo, | 563 | struct xt_table_info *newinfo, |
553 | unsigned char *base, | 564 | const unsigned char *base, |
554 | unsigned char *limit, | 565 | const unsigned char *limit, |
555 | const unsigned int *hook_entries, | 566 | const unsigned int *hook_entries, |
556 | const unsigned int *underflows, | 567 | const unsigned int *underflows, |
557 | unsigned int valid_hooks, | 568 | unsigned int valid_hooks) |
558 | unsigned int *i) | ||
559 | { | 569 | { |
560 | unsigned int h; | 570 | unsigned int h; |
561 | 571 | ||
562 | if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 | 572 | if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || |
563 | || (unsigned char *)e + sizeof(struct arpt_entry) >= limit) { | 573 | (unsigned char *)e + sizeof(struct arpt_entry) >= limit) { |
564 | duprintf("Bad offset %p\n", e); | 574 | duprintf("Bad offset %p\n", e); |
565 | return -EINVAL; | 575 | return -EINVAL; |
566 | } | 576 | } |
@@ -592,19 +602,14 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, | |||
592 | /* Clear counters and comefrom */ | 602 | /* Clear counters and comefrom */ |
593 | e->counters = ((struct xt_counters) { 0, 0 }); | 603 | e->counters = ((struct xt_counters) { 0, 0 }); |
594 | e->comefrom = 0; | 604 | e->comefrom = 0; |
595 | |||
596 | (*i)++; | ||
597 | return 0; | 605 | return 0; |
598 | } | 606 | } |
599 | 607 | ||
600 | static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) | 608 | static inline void cleanup_entry(struct arpt_entry *e) |
601 | { | 609 | { |
602 | struct xt_tgdtor_param par; | 610 | struct xt_tgdtor_param par; |
603 | struct arpt_entry_target *t; | 611 | struct arpt_entry_target *t; |
604 | 612 | ||
605 | if (i && (*i)-- == 0) | ||
606 | return 1; | ||
607 | |||
608 | t = arpt_get_target(e); | 613 | t = arpt_get_target(e); |
609 | par.target = t->u.kernel.target; | 614 | par.target = t->u.kernel.target; |
610 | par.targinfo = t->data; | 615 | par.targinfo = t->data; |
@@ -612,26 +617,20 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) | |||
612 | if (par.target->destroy != NULL) | 617 | if (par.target->destroy != NULL) |
613 | par.target->destroy(&par); | 618 | par.target->destroy(&par); |
614 | module_put(par.target->me); | 619 | module_put(par.target->me); |
615 | return 0; | ||
616 | } | 620 | } |
617 | 621 | ||
618 | /* Checks and translates the user-supplied table segment (held in | 622 | /* Checks and translates the user-supplied table segment (held in |
619 | * newinfo). | 623 | * newinfo). |
620 | */ | 624 | */ |
621 | static int translate_table(const char *name, | 625 | static int translate_table(struct xt_table_info *newinfo, void *entry0, |
622 | unsigned int valid_hooks, | 626 | const struct arpt_replace *repl) |
623 | struct xt_table_info *newinfo, | ||
624 | void *entry0, | ||
625 | unsigned int size, | ||
626 | unsigned int number, | ||
627 | const unsigned int *hook_entries, | ||
628 | const unsigned int *underflows) | ||
629 | { | 627 | { |
628 | struct arpt_entry *iter; | ||
630 | unsigned int i; | 629 | unsigned int i; |
631 | int ret; | 630 | int ret = 0; |
632 | 631 | ||
633 | newinfo->size = size; | 632 | newinfo->size = repl->size; |
634 | newinfo->number = number; | 633 | newinfo->number = repl->num_entries; |
635 | 634 | ||
636 | /* Init all hooks to impossible value. */ | 635 | /* Init all hooks to impossible value. */ |
637 | for (i = 0; i < NF_ARP_NUMHOOKS; i++) { | 636 | for (i = 0; i < NF_ARP_NUMHOOKS; i++) { |
@@ -643,52 +642,63 @@ static int translate_table(const char *name, | |||
643 | i = 0; | 642 | i = 0; |
644 | 643 | ||
645 | /* Walk through entries, checking offsets. */ | 644 | /* Walk through entries, checking offsets. */ |
646 | ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size, | 645 | xt_entry_foreach(iter, entry0, newinfo->size) { |
647 | check_entry_size_and_hooks, | 646 | ret = check_entry_size_and_hooks(iter, newinfo, entry0, |
648 | newinfo, | 647 | entry0 + repl->size, |
649 | entry0, | 648 | repl->hook_entry, |
650 | entry0 + size, | 649 | repl->underflow, |
651 | hook_entries, underflows, valid_hooks, &i); | 650 | repl->valid_hooks); |
651 | if (ret != 0) | ||
652 | break; | ||
653 | ++i; | ||
654 | } | ||
652 | duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); | 655 | duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); |
653 | if (ret != 0) | 656 | if (ret != 0) |
654 | return ret; | 657 | return ret; |
655 | 658 | ||
656 | if (i != number) { | 659 | if (i != repl->num_entries) { |
657 | duprintf("translate_table: %u not %u entries\n", | 660 | duprintf("translate_table: %u not %u entries\n", |
658 | i, number); | 661 | i, repl->num_entries); |
659 | return -EINVAL; | 662 | return -EINVAL; |
660 | } | 663 | } |
661 | 664 | ||
662 | /* Check hooks all assigned */ | 665 | /* Check hooks all assigned */ |
663 | for (i = 0; i < NF_ARP_NUMHOOKS; i++) { | 666 | for (i = 0; i < NF_ARP_NUMHOOKS; i++) { |
664 | /* Only hooks which are valid */ | 667 | /* Only hooks which are valid */ |
665 | if (!(valid_hooks & (1 << i))) | 668 | if (!(repl->valid_hooks & (1 << i))) |
666 | continue; | 669 | continue; |
667 | if (newinfo->hook_entry[i] == 0xFFFFFFFF) { | 670 | if (newinfo->hook_entry[i] == 0xFFFFFFFF) { |
668 | duprintf("Invalid hook entry %u %u\n", | 671 | duprintf("Invalid hook entry %u %u\n", |
669 | i, hook_entries[i]); | 672 | i, repl->hook_entry[i]); |
670 | return -EINVAL; | 673 | return -EINVAL; |
671 | } | 674 | } |
672 | if (newinfo->underflow[i] == 0xFFFFFFFF) { | 675 | if (newinfo->underflow[i] == 0xFFFFFFFF) { |
673 | duprintf("Invalid underflow %u %u\n", | 676 | duprintf("Invalid underflow %u %u\n", |
674 | i, underflows[i]); | 677 | i, repl->underflow[i]); |
675 | return -EINVAL; | 678 | return -EINVAL; |
676 | } | 679 | } |
677 | } | 680 | } |
678 | 681 | ||
679 | if (!mark_source_chains(newinfo, valid_hooks, entry0)) { | 682 | if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) { |
680 | duprintf("Looping hook\n"); | 683 | duprintf("Looping hook\n"); |
681 | return -ELOOP; | 684 | return -ELOOP; |
682 | } | 685 | } |
683 | 686 | ||
684 | /* Finally, each sanity check must pass */ | 687 | /* Finally, each sanity check must pass */ |
685 | i = 0; | 688 | i = 0; |
686 | ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size, | 689 | xt_entry_foreach(iter, entry0, newinfo->size) { |
687 | find_check_entry, name, size, &i); | 690 | ret = find_check_entry(iter, repl->name, repl->size); |
691 | if (ret != 0) | ||
692 | break; | ||
693 | ++i; | ||
694 | } | ||
688 | 695 | ||
689 | if (ret != 0) { | 696 | if (ret != 0) { |
690 | ARPT_ENTRY_ITERATE(entry0, newinfo->size, | 697 | xt_entry_foreach(iter, entry0, newinfo->size) { |
691 | cleanup_entry, &i); | 698 | if (i-- == 0) |
699 | break; | ||
700 | cleanup_entry(iter); | ||
701 | } | ||
692 | return ret; | 702 | return ret; |
693 | } | 703 | } |
694 | 704 | ||
@@ -701,30 +711,10 @@ static int translate_table(const char *name, | |||
701 | return ret; | 711 | return ret; |
702 | } | 712 | } |
703 | 713 | ||
704 | /* Gets counters. */ | ||
705 | static inline int add_entry_to_counter(const struct arpt_entry *e, | ||
706 | struct xt_counters total[], | ||
707 | unsigned int *i) | ||
708 | { | ||
709 | ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); | ||
710 | |||
711 | (*i)++; | ||
712 | return 0; | ||
713 | } | ||
714 | |||
715 | static inline int set_entry_to_counter(const struct arpt_entry *e, | ||
716 | struct xt_counters total[], | ||
717 | unsigned int *i) | ||
718 | { | ||
719 | SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); | ||
720 | |||
721 | (*i)++; | ||
722 | return 0; | ||
723 | } | ||
724 | |||
725 | static void get_counters(const struct xt_table_info *t, | 714 | static void get_counters(const struct xt_table_info *t, |
726 | struct xt_counters counters[]) | 715 | struct xt_counters counters[]) |
727 | { | 716 | { |
717 | struct arpt_entry *iter; | ||
728 | unsigned int cpu; | 718 | unsigned int cpu; |
729 | unsigned int i; | 719 | unsigned int i; |
730 | unsigned int curcpu; | 720 | unsigned int curcpu; |
@@ -740,32 +730,32 @@ static void get_counters(const struct xt_table_info *t, | |||
740 | curcpu = smp_processor_id(); | 730 | curcpu = smp_processor_id(); |
741 | 731 | ||
742 | i = 0; | 732 | i = 0; |
743 | ARPT_ENTRY_ITERATE(t->entries[curcpu], | 733 | xt_entry_foreach(iter, t->entries[curcpu], t->size) { |
744 | t->size, | 734 | SET_COUNTER(counters[i], iter->counters.bcnt, |
745 | set_entry_to_counter, | 735 | iter->counters.pcnt); |
746 | counters, | 736 | ++i; |
747 | &i); | 737 | } |
748 | 738 | ||
749 | for_each_possible_cpu(cpu) { | 739 | for_each_possible_cpu(cpu) { |
750 | if (cpu == curcpu) | 740 | if (cpu == curcpu) |
751 | continue; | 741 | continue; |
752 | i = 0; | 742 | i = 0; |
753 | xt_info_wrlock(cpu); | 743 | xt_info_wrlock(cpu); |
754 | ARPT_ENTRY_ITERATE(t->entries[cpu], | 744 | xt_entry_foreach(iter, t->entries[cpu], t->size) { |
755 | t->size, | 745 | ADD_COUNTER(counters[i], iter->counters.bcnt, |
756 | add_entry_to_counter, | 746 | iter->counters.pcnt); |
757 | counters, | 747 | ++i; |
758 | &i); | 748 | } |
759 | xt_info_wrunlock(cpu); | 749 | xt_info_wrunlock(cpu); |
760 | } | 750 | } |
761 | local_bh_enable(); | 751 | local_bh_enable(); |
762 | } | 752 | } |
763 | 753 | ||
764 | static struct xt_counters *alloc_counters(struct xt_table *table) | 754 | static struct xt_counters *alloc_counters(const struct xt_table *table) |
765 | { | 755 | { |
766 | unsigned int countersize; | 756 | unsigned int countersize; |
767 | struct xt_counters *counters; | 757 | struct xt_counters *counters; |
768 | struct xt_table_info *private = table->private; | 758 | const struct xt_table_info *private = table->private; |
769 | 759 | ||
770 | /* We need atomic snapshot of counters: rest doesn't change | 760 | /* We need atomic snapshot of counters: rest doesn't change |
771 | * (other than comefrom, which userspace doesn't care | 761 | * (other than comefrom, which userspace doesn't care |
@@ -783,11 +773,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) | |||
783 | } | 773 | } |
784 | 774 | ||
785 | static int copy_entries_to_user(unsigned int total_size, | 775 | static int copy_entries_to_user(unsigned int total_size, |
786 | struct xt_table *table, | 776 | const struct xt_table *table, |
787 | void __user *userptr) | 777 | void __user *userptr) |
788 | { | 778 | { |
789 | unsigned int off, num; | 779 | unsigned int off, num; |
790 | struct arpt_entry *e; | 780 | const struct arpt_entry *e; |
791 | struct xt_counters *counters; | 781 | struct xt_counters *counters; |
792 | struct xt_table_info *private = table->private; | 782 | struct xt_table_info *private = table->private; |
793 | int ret = 0; | 783 | int ret = 0; |
@@ -807,7 +797,7 @@ static int copy_entries_to_user(unsigned int total_size, | |||
807 | /* FIXME: use iterator macros --RR */ | 797 | /* FIXME: use iterator macros --RR */ |
808 | /* ... then go back and fix counters and names */ | 798 | /* ... then go back and fix counters and names */ |
809 | for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ | 799 | for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ |
810 | struct arpt_entry_target *t; | 800 | const struct arpt_entry_target *t; |
811 | 801 | ||
812 | e = (struct arpt_entry *)(loc_cpu_entry + off); | 802 | e = (struct arpt_entry *)(loc_cpu_entry + off); |
813 | if (copy_to_user(userptr + off | 803 | if (copy_to_user(userptr + off |
@@ -818,7 +808,7 @@ static int copy_entries_to_user(unsigned int total_size, | |||
818 | goto free_counters; | 808 | goto free_counters; |
819 | } | 809 | } |
820 | 810 | ||
821 | t = arpt_get_target(e); | 811 | t = arpt_get_target_c(e); |
822 | if (copy_to_user(userptr + off + e->target_offset | 812 | if (copy_to_user(userptr + off + e->target_offset |
823 | + offsetof(struct arpt_entry_target, | 813 | + offsetof(struct arpt_entry_target, |
824 | u.user.name), | 814 | u.user.name), |
@@ -835,7 +825,7 @@ static int copy_entries_to_user(unsigned int total_size, | |||
835 | } | 825 | } |
836 | 826 | ||
837 | #ifdef CONFIG_COMPAT | 827 | #ifdef CONFIG_COMPAT |
838 | static void compat_standard_from_user(void *dst, void *src) | 828 | static void compat_standard_from_user(void *dst, const void *src) |
839 | { | 829 | { |
840 | int v = *(compat_int_t *)src; | 830 | int v = *(compat_int_t *)src; |
841 | 831 | ||
@@ -844,7 +834,7 @@ static void compat_standard_from_user(void *dst, void *src) | |||
844 | memcpy(dst, &v, sizeof(v)); | 834 | memcpy(dst, &v, sizeof(v)); |
845 | } | 835 | } |
846 | 836 | ||
847 | static int compat_standard_to_user(void __user *dst, void *src) | 837 | static int compat_standard_to_user(void __user *dst, const void *src) |
848 | { | 838 | { |
849 | compat_int_t cv = *(int *)src; | 839 | compat_int_t cv = *(int *)src; |
850 | 840 | ||
@@ -853,18 +843,18 @@ static int compat_standard_to_user(void __user *dst, void *src) | |||
853 | return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; | 843 | return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; |
854 | } | 844 | } |
855 | 845 | ||
856 | static int compat_calc_entry(struct arpt_entry *e, | 846 | static int compat_calc_entry(const struct arpt_entry *e, |
857 | const struct xt_table_info *info, | 847 | const struct xt_table_info *info, |
858 | void *base, struct xt_table_info *newinfo) | 848 | const void *base, struct xt_table_info *newinfo) |
859 | { | 849 | { |
860 | struct arpt_entry_target *t; | 850 | const struct arpt_entry_target *t; |
861 | unsigned int entry_offset; | 851 | unsigned int entry_offset; |
862 | int off, i, ret; | 852 | int off, i, ret; |
863 | 853 | ||
864 | off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); | 854 | off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); |
865 | entry_offset = (void *)e - base; | 855 | entry_offset = (void *)e - base; |
866 | 856 | ||
867 | t = arpt_get_target(e); | 857 | t = arpt_get_target_c(e); |
868 | off += xt_compat_target_offset(t->u.kernel.target); | 858 | off += xt_compat_target_offset(t->u.kernel.target); |
869 | newinfo->size -= off; | 859 | newinfo->size -= off; |
870 | ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); | 860 | ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); |
@@ -885,7 +875,9 @@ static int compat_calc_entry(struct arpt_entry *e, | |||
885 | static int compat_table_info(const struct xt_table_info *info, | 875 | static int compat_table_info(const struct xt_table_info *info, |
886 | struct xt_table_info *newinfo) | 876 | struct xt_table_info *newinfo) |
887 | { | 877 | { |
878 | struct arpt_entry *iter; | ||
888 | void *loc_cpu_entry; | 879 | void *loc_cpu_entry; |
880 | int ret; | ||
889 | 881 | ||
890 | if (!newinfo || !info) | 882 | if (!newinfo || !info) |
891 | return -EINVAL; | 883 | return -EINVAL; |
@@ -894,13 +886,17 @@ static int compat_table_info(const struct xt_table_info *info, | |||
894 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | 886 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); |
895 | newinfo->initial_entries = 0; | 887 | newinfo->initial_entries = 0; |
896 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; | 888 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; |
897 | return ARPT_ENTRY_ITERATE(loc_cpu_entry, info->size, | 889 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { |
898 | compat_calc_entry, info, loc_cpu_entry, | 890 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); |
899 | newinfo); | 891 | if (ret != 0) |
892 | return ret; | ||
893 | } | ||
894 | return 0; | ||
900 | } | 895 | } |
901 | #endif | 896 | #endif |
902 | 897 | ||
903 | static int get_info(struct net *net, void __user *user, int *len, int compat) | 898 | static int get_info(struct net *net, void __user *user, |
899 | const int *len, int compat) | ||
904 | { | 900 | { |
905 | char name[ARPT_TABLE_MAXNAMELEN]; | 901 | char name[ARPT_TABLE_MAXNAMELEN]; |
906 | struct xt_table *t; | 902 | struct xt_table *t; |
@@ -925,10 +921,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) | |||
925 | if (t && !IS_ERR(t)) { | 921 | if (t && !IS_ERR(t)) { |
926 | struct arpt_getinfo info; | 922 | struct arpt_getinfo info; |
927 | const struct xt_table_info *private = t->private; | 923 | const struct xt_table_info *private = t->private; |
928 | |||
929 | #ifdef CONFIG_COMPAT | 924 | #ifdef CONFIG_COMPAT |
925 | struct xt_table_info tmp; | ||
926 | |||
930 | if (compat) { | 927 | if (compat) { |
931 | struct xt_table_info tmp; | ||
932 | ret = compat_table_info(private, &tmp); | 928 | ret = compat_table_info(private, &tmp); |
933 | xt_compat_flush_offsets(NFPROTO_ARP); | 929 | xt_compat_flush_offsets(NFPROTO_ARP); |
934 | private = &tmp; | 930 | private = &tmp; |
@@ -959,7 +955,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) | |||
959 | } | 955 | } |
960 | 956 | ||
961 | static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, | 957 | static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, |
962 | int *len) | 958 | const int *len) |
963 | { | 959 | { |
964 | int ret; | 960 | int ret; |
965 | struct arpt_get_entries get; | 961 | struct arpt_get_entries get; |
@@ -1010,6 +1006,7 @@ static int __do_replace(struct net *net, const char *name, | |||
1010 | struct xt_table_info *oldinfo; | 1006 | struct xt_table_info *oldinfo; |
1011 | struct xt_counters *counters; | 1007 | struct xt_counters *counters; |
1012 | void *loc_cpu_old_entry; | 1008 | void *loc_cpu_old_entry; |
1009 | struct arpt_entry *iter; | ||
1013 | 1010 | ||
1014 | ret = 0; | 1011 | ret = 0; |
1015 | counters = vmalloc_node(num_counters * sizeof(struct xt_counters), | 1012 | counters = vmalloc_node(num_counters * sizeof(struct xt_counters), |
@@ -1053,8 +1050,8 @@ static int __do_replace(struct net *net, const char *name, | |||
1053 | 1050 | ||
1054 | /* Decrease module usage counts and free resource */ | 1051 | /* Decrease module usage counts and free resource */ |
1055 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; | 1052 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; |
1056 | ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, | 1053 | xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) |
1057 | NULL); | 1054 | cleanup_entry(iter); |
1058 | 1055 | ||
1059 | xt_free_table_info(oldinfo); | 1056 | xt_free_table_info(oldinfo); |
1060 | if (copy_to_user(counters_ptr, counters, | 1057 | if (copy_to_user(counters_ptr, counters, |
@@ -1073,12 +1070,14 @@ static int __do_replace(struct net *net, const char *name, | |||
1073 | return ret; | 1070 | return ret; |
1074 | } | 1071 | } |
1075 | 1072 | ||
1076 | static int do_replace(struct net *net, void __user *user, unsigned int len) | 1073 | static int do_replace(struct net *net, const void __user *user, |
1074 | unsigned int len) | ||
1077 | { | 1075 | { |
1078 | int ret; | 1076 | int ret; |
1079 | struct arpt_replace tmp; | 1077 | struct arpt_replace tmp; |
1080 | struct xt_table_info *newinfo; | 1078 | struct xt_table_info *newinfo; |
1081 | void *loc_cpu_entry; | 1079 | void *loc_cpu_entry; |
1080 | struct arpt_entry *iter; | ||
1082 | 1081 | ||
1083 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) | 1082 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) |
1084 | return -EFAULT; | 1083 | return -EFAULT; |
@@ -1099,9 +1098,7 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) | |||
1099 | goto free_newinfo; | 1098 | goto free_newinfo; |
1100 | } | 1099 | } |
1101 | 1100 | ||
1102 | ret = translate_table(tmp.name, tmp.valid_hooks, | 1101 | ret = translate_table(newinfo, loc_cpu_entry, &tmp); |
1103 | newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, | ||
1104 | tmp.hook_entry, tmp.underflow); | ||
1105 | if (ret != 0) | 1102 | if (ret != 0) |
1106 | goto free_newinfo; | 1103 | goto free_newinfo; |
1107 | 1104 | ||
@@ -1114,27 +1111,15 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) | |||
1114 | return 0; | 1111 | return 0; |
1115 | 1112 | ||
1116 | free_newinfo_untrans: | 1113 | free_newinfo_untrans: |
1117 | ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); | 1114 | xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) |
1115 | cleanup_entry(iter); | ||
1118 | free_newinfo: | 1116 | free_newinfo: |
1119 | xt_free_table_info(newinfo); | 1117 | xt_free_table_info(newinfo); |
1120 | return ret; | 1118 | return ret; |
1121 | } | 1119 | } |
1122 | 1120 | ||
1123 | /* We're lazy, and add to the first CPU; overflow works its fey magic | 1121 | static int do_add_counters(struct net *net, const void __user *user, |
1124 | * and everything is OK. */ | 1122 | unsigned int len, int compat) |
1125 | static int | ||
1126 | add_counter_to_entry(struct arpt_entry *e, | ||
1127 | const struct xt_counters addme[], | ||
1128 | unsigned int *i) | ||
1129 | { | ||
1130 | ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); | ||
1131 | |||
1132 | (*i)++; | ||
1133 | return 0; | ||
1134 | } | ||
1135 | |||
1136 | static int do_add_counters(struct net *net, void __user *user, unsigned int len, | ||
1137 | int compat) | ||
1138 | { | 1123 | { |
1139 | unsigned int i, curcpu; | 1124 | unsigned int i, curcpu; |
1140 | struct xt_counters_info tmp; | 1125 | struct xt_counters_info tmp; |
@@ -1147,6 +1132,7 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, | |||
1147 | const struct xt_table_info *private; | 1132 | const struct xt_table_info *private; |
1148 | int ret = 0; | 1133 | int ret = 0; |
1149 | void *loc_cpu_entry; | 1134 | void *loc_cpu_entry; |
1135 | struct arpt_entry *iter; | ||
1150 | #ifdef CONFIG_COMPAT | 1136 | #ifdef CONFIG_COMPAT |
1151 | struct compat_xt_counters_info compat_tmp; | 1137 | struct compat_xt_counters_info compat_tmp; |
1152 | 1138 | ||
@@ -1204,11 +1190,10 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, | |||
1204 | curcpu = smp_processor_id(); | 1190 | curcpu = smp_processor_id(); |
1205 | loc_cpu_entry = private->entries[curcpu]; | 1191 | loc_cpu_entry = private->entries[curcpu]; |
1206 | xt_info_wrlock(curcpu); | 1192 | xt_info_wrlock(curcpu); |
1207 | ARPT_ENTRY_ITERATE(loc_cpu_entry, | 1193 | xt_entry_foreach(iter, loc_cpu_entry, private->size) { |
1208 | private->size, | 1194 | ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); |
1209 | add_counter_to_entry, | 1195 | ++i; |
1210 | paddc, | 1196 | } |
1211 | &i); | ||
1212 | xt_info_wrunlock(curcpu); | 1197 | xt_info_wrunlock(curcpu); |
1213 | unlock_up_free: | 1198 | unlock_up_free: |
1214 | local_bh_enable(); | 1199 | local_bh_enable(); |
@@ -1221,28 +1206,22 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, | |||
1221 | } | 1206 | } |
1222 | 1207 | ||
1223 | #ifdef CONFIG_COMPAT | 1208 | #ifdef CONFIG_COMPAT |
1224 | static inline int | 1209 | static inline void compat_release_entry(struct compat_arpt_entry *e) |
1225 | compat_release_entry(struct compat_arpt_entry *e, unsigned int *i) | ||
1226 | { | 1210 | { |
1227 | struct arpt_entry_target *t; | 1211 | struct arpt_entry_target *t; |
1228 | 1212 | ||
1229 | if (i && (*i)-- == 0) | ||
1230 | return 1; | ||
1231 | |||
1232 | t = compat_arpt_get_target(e); | 1213 | t = compat_arpt_get_target(e); |
1233 | module_put(t->u.kernel.target->me); | 1214 | module_put(t->u.kernel.target->me); |
1234 | return 0; | ||
1235 | } | 1215 | } |
1236 | 1216 | ||
1237 | static inline int | 1217 | static inline int |
1238 | check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, | 1218 | check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, |
1239 | struct xt_table_info *newinfo, | 1219 | struct xt_table_info *newinfo, |
1240 | unsigned int *size, | 1220 | unsigned int *size, |
1241 | unsigned char *base, | 1221 | const unsigned char *base, |
1242 | unsigned char *limit, | 1222 | const unsigned char *limit, |
1243 | unsigned int *hook_entries, | 1223 | const unsigned int *hook_entries, |
1244 | unsigned int *underflows, | 1224 | const unsigned int *underflows, |
1245 | unsigned int *i, | ||
1246 | const char *name) | 1225 | const char *name) |
1247 | { | 1226 | { |
1248 | struct arpt_entry_target *t; | 1227 | struct arpt_entry_target *t; |
@@ -1251,8 +1230,8 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, | |||
1251 | int ret, off, h; | 1230 | int ret, off, h; |
1252 | 1231 | ||
1253 | duprintf("check_compat_entry_size_and_hooks %p\n", e); | 1232 | duprintf("check_compat_entry_size_and_hooks %p\n", e); |
1254 | if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 | 1233 | if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || |
1255 | || (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) { | 1234 | (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) { |
1256 | duprintf("Bad offset %p, limit = %p\n", e, limit); | 1235 | duprintf("Bad offset %p, limit = %p\n", e, limit); |
1257 | return -EINVAL; | 1236 | return -EINVAL; |
1258 | } | 1237 | } |
@@ -1302,8 +1281,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, | |||
1302 | /* Clear counters and comefrom */ | 1281 | /* Clear counters and comefrom */ |
1303 | memset(&e->counters, 0, sizeof(e->counters)); | 1282 | memset(&e->counters, 0, sizeof(e->counters)); |
1304 | e->comefrom = 0; | 1283 | e->comefrom = 0; |
1305 | |||
1306 | (*i)++; | ||
1307 | return 0; | 1284 | return 0; |
1308 | 1285 | ||
1309 | release_target: | 1286 | release_target: |
@@ -1347,19 +1324,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, | |||
1347 | return ret; | 1324 | return ret; |
1348 | } | 1325 | } |
1349 | 1326 | ||
1350 | static inline int compat_check_entry(struct arpt_entry *e, const char *name, | ||
1351 | unsigned int *i) | ||
1352 | { | ||
1353 | int ret; | ||
1354 | |||
1355 | ret = check_target(e, name); | ||
1356 | if (ret) | ||
1357 | return ret; | ||
1358 | |||
1359 | (*i)++; | ||
1360 | return 0; | ||
1361 | } | ||
1362 | |||
1363 | static int translate_compat_table(const char *name, | 1327 | static int translate_compat_table(const char *name, |
1364 | unsigned int valid_hooks, | 1328 | unsigned int valid_hooks, |
1365 | struct xt_table_info **pinfo, | 1329 | struct xt_table_info **pinfo, |
@@ -1372,8 +1336,10 @@ static int translate_compat_table(const char *name, | |||
1372 | unsigned int i, j; | 1336 | unsigned int i, j; |
1373 | struct xt_table_info *newinfo, *info; | 1337 | struct xt_table_info *newinfo, *info; |
1374 | void *pos, *entry0, *entry1; | 1338 | void *pos, *entry0, *entry1; |
1339 | struct compat_arpt_entry *iter0; | ||
1340 | struct arpt_entry *iter1; | ||
1375 | unsigned int size; | 1341 | unsigned int size; |
1376 | int ret; | 1342 | int ret = 0; |
1377 | 1343 | ||
1378 | info = *pinfo; | 1344 | info = *pinfo; |
1379 | entry0 = *pentry0; | 1345 | entry0 = *pentry0; |
@@ -1390,13 +1356,17 @@ static int translate_compat_table(const char *name, | |||
1390 | j = 0; | 1356 | j = 0; |
1391 | xt_compat_lock(NFPROTO_ARP); | 1357 | xt_compat_lock(NFPROTO_ARP); |
1392 | /* Walk through entries, checking offsets. */ | 1358 | /* Walk through entries, checking offsets. */ |
1393 | ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, | 1359 | xt_entry_foreach(iter0, entry0, total_size) { |
1394 | check_compat_entry_size_and_hooks, | 1360 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, |
1395 | info, &size, entry0, | 1361 | entry0, |
1396 | entry0 + total_size, | 1362 | entry0 + total_size, |
1397 | hook_entries, underflows, &j, name); | 1363 | hook_entries, |
1398 | if (ret != 0) | 1364 | underflows, |
1399 | goto out_unlock; | 1365 | name); |
1366 | if (ret != 0) | ||
1367 | goto out_unlock; | ||
1368 | ++j; | ||
1369 | } | ||
1400 | 1370 | ||
1401 | ret = -EINVAL; | 1371 | ret = -EINVAL; |
1402 | if (j != number) { | 1372 | if (j != number) { |
@@ -1435,9 +1405,12 @@ static int translate_compat_table(const char *name, | |||
1435 | entry1 = newinfo->entries[raw_smp_processor_id()]; | 1405 | entry1 = newinfo->entries[raw_smp_processor_id()]; |
1436 | pos = entry1; | 1406 | pos = entry1; |
1437 | size = total_size; | 1407 | size = total_size; |
1438 | ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, | 1408 | xt_entry_foreach(iter0, entry0, total_size) { |
1439 | compat_copy_entry_from_user, | 1409 | ret = compat_copy_entry_from_user(iter0, &pos, &size, |
1440 | &pos, &size, name, newinfo, entry1); | 1410 | name, newinfo, entry1); |
1411 | if (ret != 0) | ||
1412 | break; | ||
1413 | } | ||
1441 | xt_compat_flush_offsets(NFPROTO_ARP); | 1414 | xt_compat_flush_offsets(NFPROTO_ARP); |
1442 | xt_compat_unlock(NFPROTO_ARP); | 1415 | xt_compat_unlock(NFPROTO_ARP); |
1443 | if (ret) | 1416 | if (ret) |
@@ -1448,13 +1421,32 @@ static int translate_compat_table(const char *name, | |||
1448 | goto free_newinfo; | 1421 | goto free_newinfo; |
1449 | 1422 | ||
1450 | i = 0; | 1423 | i = 0; |
1451 | ret = ARPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, | 1424 | xt_entry_foreach(iter1, entry1, newinfo->size) { |
1452 | name, &i); | 1425 | ret = check_target(iter1, name); |
1426 | if (ret != 0) | ||
1427 | break; | ||
1428 | ++i; | ||
1429 | } | ||
1453 | if (ret) { | 1430 | if (ret) { |
1431 | /* | ||
1432 | * The first i matches need cleanup_entry (calls ->destroy) | ||
1433 | * because they had called ->check already. The other j-i | ||
1434 | * entries need only release. | ||
1435 | */ | ||
1436 | int skip = i; | ||
1454 | j -= i; | 1437 | j -= i; |
1455 | COMPAT_ARPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i, | 1438 | xt_entry_foreach(iter0, entry0, newinfo->size) { |
1456 | compat_release_entry, &j); | 1439 | if (skip-- > 0) |
1457 | ARPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i); | 1440 | continue; |
1441 | if (j-- == 0) | ||
1442 | break; | ||
1443 | compat_release_entry(iter0); | ||
1444 | } | ||
1445 | xt_entry_foreach(iter1, entry1, newinfo->size) { | ||
1446 | if (i-- == 0) | ||
1447 | break; | ||
1448 | cleanup_entry(iter1); | ||
1449 | } | ||
1458 | xt_free_table_info(newinfo); | 1450 | xt_free_table_info(newinfo); |
1459 | return ret; | 1451 | return ret; |
1460 | } | 1452 | } |
@@ -1472,7 +1464,11 @@ static int translate_compat_table(const char *name, | |||
1472 | free_newinfo: | 1464 | free_newinfo: |
1473 | xt_free_table_info(newinfo); | 1465 | xt_free_table_info(newinfo); |
1474 | out: | 1466 | out: |
1475 | COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j); | 1467 | xt_entry_foreach(iter0, entry0, total_size) { |
1468 | if (j-- == 0) | ||
1469 | break; | ||
1470 | compat_release_entry(iter0); | ||
1471 | } | ||
1476 | return ret; | 1472 | return ret; |
1477 | out_unlock: | 1473 | out_unlock: |
1478 | xt_compat_flush_offsets(NFPROTO_ARP); | 1474 | xt_compat_flush_offsets(NFPROTO_ARP); |
@@ -1499,6 +1495,7 @@ static int compat_do_replace(struct net *net, void __user *user, | |||
1499 | struct compat_arpt_replace tmp; | 1495 | struct compat_arpt_replace tmp; |
1500 | struct xt_table_info *newinfo; | 1496 | struct xt_table_info *newinfo; |
1501 | void *loc_cpu_entry; | 1497 | void *loc_cpu_entry; |
1498 | struct arpt_entry *iter; | ||
1502 | 1499 | ||
1503 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) | 1500 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) |
1504 | return -EFAULT; | 1501 | return -EFAULT; |
@@ -1536,7 +1533,8 @@ static int compat_do_replace(struct net *net, void __user *user, | |||
1536 | return 0; | 1533 | return 0; |
1537 | 1534 | ||
1538 | free_newinfo_untrans: | 1535 | free_newinfo_untrans: |
1539 | ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); | 1536 | xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) |
1537 | cleanup_entry(iter); | ||
1540 | free_newinfo: | 1538 | free_newinfo: |
1541 | xt_free_table_info(newinfo); | 1539 | xt_free_table_info(newinfo); |
1542 | return ret; | 1540 | return ret; |
@@ -1570,7 +1568,7 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, | |||
1570 | static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, | 1568 | static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, |
1571 | compat_uint_t *size, | 1569 | compat_uint_t *size, |
1572 | struct xt_counters *counters, | 1570 | struct xt_counters *counters, |
1573 | unsigned int *i) | 1571 | unsigned int i) |
1574 | { | 1572 | { |
1575 | struct arpt_entry_target *t; | 1573 | struct arpt_entry_target *t; |
1576 | struct compat_arpt_entry __user *ce; | 1574 | struct compat_arpt_entry __user *ce; |
@@ -1578,14 +1576,12 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, | |||
1578 | compat_uint_t origsize; | 1576 | compat_uint_t origsize; |
1579 | int ret; | 1577 | int ret; |
1580 | 1578 | ||
1581 | ret = -EFAULT; | ||
1582 | origsize = *size; | 1579 | origsize = *size; |
1583 | ce = (struct compat_arpt_entry __user *)*dstptr; | 1580 | ce = (struct compat_arpt_entry __user *)*dstptr; |
1584 | if (copy_to_user(ce, e, sizeof(struct arpt_entry))) | 1581 | if (copy_to_user(ce, e, sizeof(struct arpt_entry)) != 0 || |
1585 | goto out; | 1582 | copy_to_user(&ce->counters, &counters[i], |
1586 | 1583 | sizeof(counters[i])) != 0) | |
1587 | if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i]))) | 1584 | return -EFAULT; |
1588 | goto out; | ||
1589 | 1585 | ||
1590 | *dstptr += sizeof(struct compat_arpt_entry); | 1586 | *dstptr += sizeof(struct compat_arpt_entry); |
1591 | *size -= sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); | 1587 | *size -= sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); |
@@ -1595,18 +1591,12 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, | |||
1595 | t = arpt_get_target(e); | 1591 | t = arpt_get_target(e); |
1596 | ret = xt_compat_target_to_user(t, dstptr, size); | 1592 | ret = xt_compat_target_to_user(t, dstptr, size); |
1597 | if (ret) | 1593 | if (ret) |
1598 | goto out; | 1594 | return ret; |
1599 | ret = -EFAULT; | ||
1600 | next_offset = e->next_offset - (origsize - *size); | 1595 | next_offset = e->next_offset - (origsize - *size); |
1601 | if (put_user(target_offset, &ce->target_offset)) | 1596 | if (put_user(target_offset, &ce->target_offset) != 0 || |
1602 | goto out; | 1597 | put_user(next_offset, &ce->next_offset) != 0) |
1603 | if (put_user(next_offset, &ce->next_offset)) | 1598 | return -EFAULT; |
1604 | goto out; | ||
1605 | |||
1606 | (*i)++; | ||
1607 | return 0; | 1599 | return 0; |
1608 | out: | ||
1609 | return ret; | ||
1610 | } | 1600 | } |
1611 | 1601 | ||
1612 | static int compat_copy_entries_to_user(unsigned int total_size, | 1602 | static int compat_copy_entries_to_user(unsigned int total_size, |
@@ -1620,6 +1610,7 @@ static int compat_copy_entries_to_user(unsigned int total_size, | |||
1620 | int ret = 0; | 1610 | int ret = 0; |
1621 | void *loc_cpu_entry; | 1611 | void *loc_cpu_entry; |
1622 | unsigned int i = 0; | 1612 | unsigned int i = 0; |
1613 | struct arpt_entry *iter; | ||
1623 | 1614 | ||
1624 | counters = alloc_counters(table); | 1615 | counters = alloc_counters(table); |
1625 | if (IS_ERR(counters)) | 1616 | if (IS_ERR(counters)) |
@@ -1629,9 +1620,12 @@ static int compat_copy_entries_to_user(unsigned int total_size, | |||
1629 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; | 1620 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; |
1630 | pos = userptr; | 1621 | pos = userptr; |
1631 | size = total_size; | 1622 | size = total_size; |
1632 | ret = ARPT_ENTRY_ITERATE(loc_cpu_entry, total_size, | 1623 | xt_entry_foreach(iter, loc_cpu_entry, total_size) { |
1633 | compat_copy_entry_to_user, | 1624 | ret = compat_copy_entry_to_user(iter, &pos, |
1634 | &pos, &size, counters, &i); | 1625 | &size, counters, i++); |
1626 | if (ret != 0) | ||
1627 | break; | ||
1628 | } | ||
1635 | vfree(counters); | 1629 | vfree(counters); |
1636 | return ret; | 1630 | return ret; |
1637 | } | 1631 | } |
@@ -1799,12 +1793,7 @@ struct xt_table *arpt_register_table(struct net *net, | |||
1799 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; | 1793 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; |
1800 | memcpy(loc_cpu_entry, repl->entries, repl->size); | 1794 | memcpy(loc_cpu_entry, repl->entries, repl->size); |
1801 | 1795 | ||
1802 | ret = translate_table(table->name, table->valid_hooks, | 1796 | ret = translate_table(newinfo, loc_cpu_entry, repl); |
1803 | newinfo, loc_cpu_entry, repl->size, | ||
1804 | repl->num_entries, | ||
1805 | repl->hook_entry, | ||
1806 | repl->underflow); | ||
1807 | |||
1808 | duprintf("arpt_register_table: translate table gives %d\n", ret); | 1797 | duprintf("arpt_register_table: translate table gives %d\n", ret); |
1809 | if (ret != 0) | 1798 | if (ret != 0) |
1810 | goto out_free; | 1799 | goto out_free; |
@@ -1827,13 +1816,14 @@ void arpt_unregister_table(struct xt_table *table) | |||
1827 | struct xt_table_info *private; | 1816 | struct xt_table_info *private; |
1828 | void *loc_cpu_entry; | 1817 | void *loc_cpu_entry; |
1829 | struct module *table_owner = table->me; | 1818 | struct module *table_owner = table->me; |
1819 | struct arpt_entry *iter; | ||
1830 | 1820 | ||
1831 | private = xt_unregister_table(table); | 1821 | private = xt_unregister_table(table); |
1832 | 1822 | ||
1833 | /* Decrease module usage counts and free resources */ | 1823 | /* Decrease module usage counts and free resources */ |
1834 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; | 1824 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; |
1835 | ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size, | 1825 | xt_entry_foreach(iter, loc_cpu_entry, private->size) |
1836 | cleanup_entry, NULL); | 1826 | cleanup_entry(iter); |
1837 | if (private->number > private->initial_entries) | 1827 | if (private->number > private->initial_entries) |
1838 | module_put(table_owner); | 1828 | module_put(table_owner); |
1839 | xt_free_table_info(private); | 1829 | xt_free_table_info(private); |
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 97337601827a..79ca5e70d497 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c | |||
@@ -6,7 +6,9 @@ | |||
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/netfilter/x_tables.h> | ||
9 | #include <linux/netfilter_arp/arp_tables.h> | 10 | #include <linux/netfilter_arp/arp_tables.h> |
11 | #include <linux/slab.h> | ||
10 | 12 | ||
11 | MODULE_LICENSE("GPL"); | 13 | MODULE_LICENSE("GPL"); |
12 | MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); | 14 | MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); |
@@ -15,93 +17,37 @@ MODULE_DESCRIPTION("arptables filter table"); | |||
15 | #define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ | 17 | #define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ |
16 | (1 << NF_ARP_FORWARD)) | 18 | (1 << NF_ARP_FORWARD)) |
17 | 19 | ||
18 | static const struct | ||
19 | { | ||
20 | struct arpt_replace repl; | ||
21 | struct arpt_standard entries[3]; | ||
22 | struct arpt_error term; | ||
23 | } initial_table __net_initdata = { | ||
24 | .repl = { | ||
25 | .name = "filter", | ||
26 | .valid_hooks = FILTER_VALID_HOOKS, | ||
27 | .num_entries = 4, | ||
28 | .size = sizeof(struct arpt_standard) * 3 + sizeof(struct arpt_error), | ||
29 | .hook_entry = { | ||
30 | [NF_ARP_IN] = 0, | ||
31 | [NF_ARP_OUT] = sizeof(struct arpt_standard), | ||
32 | [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), | ||
33 | }, | ||
34 | .underflow = { | ||
35 | [NF_ARP_IN] = 0, | ||
36 | [NF_ARP_OUT] = sizeof(struct arpt_standard), | ||
37 | [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), | ||
38 | }, | ||
39 | }, | ||
40 | .entries = { | ||
41 | ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_IN */ | ||
42 | ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_OUT */ | ||
43 | ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_FORWARD */ | ||
44 | }, | ||
45 | .term = ARPT_ERROR_INIT, | ||
46 | }; | ||
47 | |||
48 | static const struct xt_table packet_filter = { | 20 | static const struct xt_table packet_filter = { |
49 | .name = "filter", | 21 | .name = "filter", |
50 | .valid_hooks = FILTER_VALID_HOOKS, | 22 | .valid_hooks = FILTER_VALID_HOOKS, |
51 | .me = THIS_MODULE, | 23 | .me = THIS_MODULE, |
52 | .af = NFPROTO_ARP, | 24 | .af = NFPROTO_ARP, |
25 | .priority = NF_IP_PRI_FILTER, | ||
53 | }; | 26 | }; |
54 | 27 | ||
55 | /* The work comes in here from netfilter.c */ | 28 | /* The work comes in here from netfilter.c */ |
56 | static unsigned int arpt_in_hook(unsigned int hook, | 29 | static unsigned int |
57 | struct sk_buff *skb, | 30 | arptable_filter_hook(unsigned int hook, struct sk_buff *skb, |
58 | const struct net_device *in, | 31 | const struct net_device *in, const struct net_device *out, |
59 | const struct net_device *out, | 32 | int (*okfn)(struct sk_buff *)) |
60 | int (*okfn)(struct sk_buff *)) | ||
61 | { | 33 | { |
62 | return arpt_do_table(skb, hook, in, out, | 34 | const struct net *net = dev_net((in != NULL) ? in : out); |
63 | dev_net(in)->ipv4.arptable_filter); | ||
64 | } | ||
65 | 35 | ||
66 | static unsigned int arpt_out_hook(unsigned int hook, | 36 | return arpt_do_table(skb, hook, in, out, net->ipv4.arptable_filter); |
67 | struct sk_buff *skb, | ||
68 | const struct net_device *in, | ||
69 | const struct net_device *out, | ||
70 | int (*okfn)(struct sk_buff *)) | ||
71 | { | ||
72 | return arpt_do_table(skb, hook, in, out, | ||
73 | dev_net(out)->ipv4.arptable_filter); | ||
74 | } | 37 | } |
75 | 38 | ||
76 | static struct nf_hook_ops arpt_ops[] __read_mostly = { | 39 | static struct nf_hook_ops *arpfilter_ops __read_mostly; |
77 | { | ||
78 | .hook = arpt_in_hook, | ||
79 | .owner = THIS_MODULE, | ||
80 | .pf = NFPROTO_ARP, | ||
81 | .hooknum = NF_ARP_IN, | ||
82 | .priority = NF_IP_PRI_FILTER, | ||
83 | }, | ||
84 | { | ||
85 | .hook = arpt_out_hook, | ||
86 | .owner = THIS_MODULE, | ||
87 | .pf = NFPROTO_ARP, | ||
88 | .hooknum = NF_ARP_OUT, | ||
89 | .priority = NF_IP_PRI_FILTER, | ||
90 | }, | ||
91 | { | ||
92 | .hook = arpt_in_hook, | ||
93 | .owner = THIS_MODULE, | ||
94 | .pf = NFPROTO_ARP, | ||
95 | .hooknum = NF_ARP_FORWARD, | ||
96 | .priority = NF_IP_PRI_FILTER, | ||
97 | }, | ||
98 | }; | ||
99 | 40 | ||
100 | static int __net_init arptable_filter_net_init(struct net *net) | 41 | static int __net_init arptable_filter_net_init(struct net *net) |
101 | { | 42 | { |
102 | /* Register table */ | 43 | struct arpt_replace *repl; |
44 | |||
45 | repl = arpt_alloc_initial_table(&packet_filter); | ||
46 | if (repl == NULL) | ||
47 | return -ENOMEM; | ||
103 | net->ipv4.arptable_filter = | 48 | net->ipv4.arptable_filter = |
104 | arpt_register_table(net, &packet_filter, &initial_table.repl); | 49 | arpt_register_table(net, &packet_filter, repl); |
50 | kfree(repl); | ||
105 | if (IS_ERR(net->ipv4.arptable_filter)) | 51 | if (IS_ERR(net->ipv4.arptable_filter)) |
106 | return PTR_ERR(net->ipv4.arptable_filter); | 52 | return PTR_ERR(net->ipv4.arptable_filter); |
107 | return 0; | 53 | return 0; |
@@ -125,9 +71,11 @@ static int __init arptable_filter_init(void) | |||
125 | if (ret < 0) | 71 | if (ret < 0) |
126 | return ret; | 72 | return ret; |
127 | 73 | ||
128 | ret = nf_register_hooks(arpt_ops, ARRAY_SIZE(arpt_ops)); | 74 | arpfilter_ops = xt_hook_link(&packet_filter, arptable_filter_hook); |
129 | if (ret < 0) | 75 | if (IS_ERR(arpfilter_ops)) { |
76 | ret = PTR_ERR(arpfilter_ops); | ||
130 | goto cleanup_table; | 77 | goto cleanup_table; |
78 | } | ||
131 | return ret; | 79 | return ret; |
132 | 80 | ||
133 | cleanup_table: | 81 | cleanup_table: |
@@ -137,7 +85,7 @@ cleanup_table: | |||
137 | 85 | ||
138 | static void __exit arptable_filter_fini(void) | 86 | static void __exit arptable_filter_fini(void) |
139 | { | 87 | { |
140 | nf_unregister_hooks(arpt_ops, ARRAY_SIZE(arpt_ops)); | 88 | xt_hook_unlink(&packet_filter, arpfilter_ops); |
141 | unregister_pernet_subsys(&arptable_filter_net_ops); | 89 | unregister_pernet_subsys(&arptable_filter_net_ops); |
142 | } | 90 | } |
143 | 91 | ||
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index c156db215987..e2787048aa0a 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/security.h> | 26 | #include <linux/security.h> |
27 | #include <linux/net.h> | 27 | #include <linux/net.h> |
28 | #include <linux/mutex.h> | 28 | #include <linux/mutex.h> |
29 | #include <linux/slab.h> | ||
29 | #include <net/net_namespace.h> | 30 | #include <net/net_namespace.h> |
30 | #include <net/sock.h> | 31 | #include <net/sock.h> |
31 | #include <net/route.h> | 32 | #include <net/route.h> |
@@ -497,10 +498,9 @@ ipq_rcv_nl_event(struct notifier_block *this, | |||
497 | { | 498 | { |
498 | struct netlink_notify *n = ptr; | 499 | struct netlink_notify *n = ptr; |
499 | 500 | ||
500 | if (event == NETLINK_URELEASE && | 501 | if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) { |
501 | n->protocol == NETLINK_FIREWALL && n->pid) { | ||
502 | write_lock_bh(&queue_lock); | 502 | write_lock_bh(&queue_lock); |
503 | if ((n->net == &init_net) && (n->pid == peer_pid)) | 503 | if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) |
504 | __ipq_reset(); | 504 | __ipq_reset(); |
505 | write_unlock_bh(&queue_lock); | 505 | write_unlock_bh(&queue_lock); |
506 | } | 506 | } |
@@ -516,14 +516,13 @@ static struct ctl_table_header *ipq_sysctl_header; | |||
516 | 516 | ||
517 | static ctl_table ipq_table[] = { | 517 | static ctl_table ipq_table[] = { |
518 | { | 518 | { |
519 | .ctl_name = NET_IPQ_QMAX, | ||
520 | .procname = NET_IPQ_QMAX_NAME, | 519 | .procname = NET_IPQ_QMAX_NAME, |
521 | .data = &queue_maxlen, | 520 | .data = &queue_maxlen, |
522 | .maxlen = sizeof(queue_maxlen), | 521 | .maxlen = sizeof(queue_maxlen), |
523 | .mode = 0644, | 522 | .mode = 0644, |
524 | .proc_handler = proc_dointvec | 523 | .proc_handler = proc_dointvec |
525 | }, | 524 | }, |
526 | { .ctl_name = 0 } | 525 | { } |
527 | }; | 526 | }; |
528 | #endif | 527 | #endif |
529 | 528 | ||
@@ -622,7 +621,7 @@ cleanup_netlink_notifier: | |||
622 | static void __exit ip_queue_fini(void) | 621 | static void __exit ip_queue_fini(void) |
623 | { | 622 | { |
624 | nf_unregister_queue_handlers(&nfqh); | 623 | nf_unregister_queue_handlers(&nfqh); |
625 | synchronize_net(); | 624 | |
626 | ipq_flush(NULL, 0); | 625 | ipq_flush(NULL, 0); |
627 | 626 | ||
628 | #ifdef CONFIG_SYSCTL | 627 | #ifdef CONFIG_SYSCTL |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index cde755d5eeab..b29c66df8d1f 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/netfilter/x_tables.h> | 28 | #include <linux/netfilter/x_tables.h> |
29 | #include <linux/netfilter_ipv4/ip_tables.h> | 29 | #include <linux/netfilter_ipv4/ip_tables.h> |
30 | #include <net/netfilter/nf_log.h> | 30 | #include <net/netfilter/nf_log.h> |
31 | #include "../../netfilter/xt_repldata.h" | ||
31 | 32 | ||
32 | MODULE_LICENSE("GPL"); | 33 | MODULE_LICENSE("GPL"); |
33 | MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); | 34 | MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); |
@@ -66,6 +67,12 @@ do { \ | |||
66 | #define inline | 67 | #define inline |
67 | #endif | 68 | #endif |
68 | 69 | ||
70 | void *ipt_alloc_initial_table(const struct xt_table *info) | ||
71 | { | ||
72 | return xt_alloc_initial_table(ipt, IPT); | ||
73 | } | ||
74 | EXPORT_SYMBOL_GPL(ipt_alloc_initial_table); | ||
75 | |||
69 | /* | 76 | /* |
70 | We keep a set of rules for each CPU, so we can avoid write-locking | 77 | We keep a set of rules for each CPU, so we can avoid write-locking |
71 | them in the softirq when updating the counters and therefore | 78 | them in the softirq when updating the counters and therefore |
@@ -89,9 +96,9 @@ ip_packet_match(const struct iphdr *ip, | |||
89 | #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) | 96 | #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) |
90 | 97 | ||
91 | if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, | 98 | if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, |
92 | IPT_INV_SRCIP) | 99 | IPT_INV_SRCIP) || |
93 | || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, | 100 | FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, |
94 | IPT_INV_DSTIP)) { | 101 | IPT_INV_DSTIP)) { |
95 | dprintf("Source or dest mismatch.\n"); | 102 | dprintf("Source or dest mismatch.\n"); |
96 | 103 | ||
97 | dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n", | 104 | dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n", |
@@ -122,8 +129,8 @@ ip_packet_match(const struct iphdr *ip, | |||
122 | } | 129 | } |
123 | 130 | ||
124 | /* Check specific protocol */ | 131 | /* Check specific protocol */ |
125 | if (ipinfo->proto | 132 | if (ipinfo->proto && |
126 | && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) { | 133 | FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) { |
127 | dprintf("Packet protocol %hi does not match %hi.%s\n", | 134 | dprintf("Packet protocol %hi does not match %hi.%s\n", |
128 | ip->protocol, ipinfo->proto, | 135 | ip->protocol, ipinfo->proto, |
129 | ipinfo->invflags&IPT_INV_PROTO ? " (INV)":""); | 136 | ipinfo->invflags&IPT_INV_PROTO ? " (INV)":""); |
@@ -169,7 +176,7 @@ ipt_error(struct sk_buff *skb, const struct xt_target_param *par) | |||
169 | 176 | ||
170 | /* Performance critical - called for every packet */ | 177 | /* Performance critical - called for every packet */ |
171 | static inline bool | 178 | static inline bool |
172 | do_match(struct ipt_entry_match *m, const struct sk_buff *skb, | 179 | do_match(const struct ipt_entry_match *m, const struct sk_buff *skb, |
173 | struct xt_match_param *par) | 180 | struct xt_match_param *par) |
174 | { | 181 | { |
175 | par->match = m->u.kernel.match; | 182 | par->match = m->u.kernel.match; |
@@ -184,7 +191,7 @@ do_match(struct ipt_entry_match *m, const struct sk_buff *skb, | |||
184 | 191 | ||
185 | /* Performance critical */ | 192 | /* Performance critical */ |
186 | static inline struct ipt_entry * | 193 | static inline struct ipt_entry * |
187 | get_entry(void *base, unsigned int offset) | 194 | get_entry(const void *base, unsigned int offset) |
188 | { | 195 | { |
189 | return (struct ipt_entry *)(base + offset); | 196 | return (struct ipt_entry *)(base + offset); |
190 | } | 197 | } |
@@ -199,6 +206,13 @@ static inline bool unconditional(const struct ipt_ip *ip) | |||
199 | #undef FWINV | 206 | #undef FWINV |
200 | } | 207 | } |
201 | 208 | ||
209 | /* for const-correctness */ | ||
210 | static inline const struct ipt_entry_target * | ||
211 | ipt_get_target_c(const struct ipt_entry *e) | ||
212 | { | ||
213 | return ipt_get_target((struct ipt_entry *)e); | ||
214 | } | ||
215 | |||
202 | #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ | 216 | #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ |
203 | defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) | 217 | defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) |
204 | static const char *const hooknames[] = { | 218 | static const char *const hooknames[] = { |
@@ -233,11 +247,11 @@ static struct nf_loginfo trace_loginfo = { | |||
233 | 247 | ||
234 | /* Mildly perf critical (only if packet tracing is on) */ | 248 | /* Mildly perf critical (only if packet tracing is on) */ |
235 | static inline int | 249 | static inline int |
236 | get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e, | 250 | get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e, |
237 | const char *hookname, const char **chainname, | 251 | const char *hookname, const char **chainname, |
238 | const char **comment, unsigned int *rulenum) | 252 | const char **comment, unsigned int *rulenum) |
239 | { | 253 | { |
240 | struct ipt_standard_target *t = (void *)ipt_get_target(s); | 254 | const struct ipt_standard_target *t = (void *)ipt_get_target_c(s); |
241 | 255 | ||
242 | if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) { | 256 | if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) { |
243 | /* Head of user chain: ERROR target with chainname */ | 257 | /* Head of user chain: ERROR target with chainname */ |
@@ -246,11 +260,11 @@ get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e, | |||
246 | } else if (s == e) { | 260 | } else if (s == e) { |
247 | (*rulenum)++; | 261 | (*rulenum)++; |
248 | 262 | ||
249 | if (s->target_offset == sizeof(struct ipt_entry) | 263 | if (s->target_offset == sizeof(struct ipt_entry) && |
250 | && strcmp(t->target.u.kernel.target->name, | 264 | strcmp(t->target.u.kernel.target->name, |
251 | IPT_STANDARD_TARGET) == 0 | 265 | IPT_STANDARD_TARGET) == 0 && |
252 | && t->verdict < 0 | 266 | t->verdict < 0 && |
253 | && unconditional(&s->ip)) { | 267 | unconditional(&s->ip)) { |
254 | /* Tail of chains: STANDARD target (return/policy) */ | 268 | /* Tail of chains: STANDARD target (return/policy) */ |
255 | *comment = *chainname == hookname | 269 | *comment = *chainname == hookname |
256 | ? comments[NF_IP_TRACE_COMMENT_POLICY] | 270 | ? comments[NF_IP_TRACE_COMMENT_POLICY] |
@@ -263,17 +277,18 @@ get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e, | |||
263 | return 0; | 277 | return 0; |
264 | } | 278 | } |
265 | 279 | ||
266 | static void trace_packet(struct sk_buff *skb, | 280 | static void trace_packet(const struct sk_buff *skb, |
267 | unsigned int hook, | 281 | unsigned int hook, |
268 | const struct net_device *in, | 282 | const struct net_device *in, |
269 | const struct net_device *out, | 283 | const struct net_device *out, |
270 | const char *tablename, | 284 | const char *tablename, |
271 | struct xt_table_info *private, | 285 | const struct xt_table_info *private, |
272 | struct ipt_entry *e) | 286 | const struct ipt_entry *e) |
273 | { | 287 | { |
274 | void *table_base; | 288 | const void *table_base; |
275 | const struct ipt_entry *root; | 289 | const struct ipt_entry *root; |
276 | const char *hookname, *chainname, *comment; | 290 | const char *hookname, *chainname, *comment; |
291 | const struct ipt_entry *iter; | ||
277 | unsigned int rulenum = 0; | 292 | unsigned int rulenum = 0; |
278 | 293 | ||
279 | table_base = private->entries[smp_processor_id()]; | 294 | table_base = private->entries[smp_processor_id()]; |
@@ -282,10 +297,10 @@ static void trace_packet(struct sk_buff *skb, | |||
282 | hookname = chainname = hooknames[hook]; | 297 | hookname = chainname = hooknames[hook]; |
283 | comment = comments[NF_IP_TRACE_COMMENT_RULE]; | 298 | comment = comments[NF_IP_TRACE_COMMENT_RULE]; |
284 | 299 | ||
285 | IPT_ENTRY_ITERATE(root, | 300 | xt_entry_foreach(iter, root, private->size - private->hook_entry[hook]) |
286 | private->size - private->hook_entry[hook], | 301 | if (get_chainname_rulenum(iter, e, hookname, |
287 | get_chainname_rulenum, | 302 | &chainname, &comment, &rulenum) != 0) |
288 | e, hookname, &chainname, &comment, &rulenum); | 303 | break; |
289 | 304 | ||
290 | nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo, | 305 | nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo, |
291 | "TRACE: %s:%s:%s:%u ", | 306 | "TRACE: %s:%s:%s:%u ", |
@@ -315,9 +330,9 @@ ipt_do_table(struct sk_buff *skb, | |||
315 | /* Initializing verdict to NF_DROP keeps gcc happy. */ | 330 | /* Initializing verdict to NF_DROP keeps gcc happy. */ |
316 | unsigned int verdict = NF_DROP; | 331 | unsigned int verdict = NF_DROP; |
317 | const char *indev, *outdev; | 332 | const char *indev, *outdev; |
318 | void *table_base; | 333 | const void *table_base; |
319 | struct ipt_entry *e, *back; | 334 | struct ipt_entry *e, *back; |
320 | struct xt_table_info *private; | 335 | const struct xt_table_info *private; |
321 | struct xt_match_param mtpar; | 336 | struct xt_match_param mtpar; |
322 | struct xt_target_param tgpar; | 337 | struct xt_target_param tgpar; |
323 | 338 | ||
@@ -350,17 +365,22 @@ ipt_do_table(struct sk_buff *skb, | |||
350 | back = get_entry(table_base, private->underflow[hook]); | 365 | back = get_entry(table_base, private->underflow[hook]); |
351 | 366 | ||
352 | do { | 367 | do { |
353 | struct ipt_entry_target *t; | 368 | const struct ipt_entry_target *t; |
369 | const struct xt_entry_match *ematch; | ||
354 | 370 | ||
355 | IP_NF_ASSERT(e); | 371 | IP_NF_ASSERT(e); |
356 | IP_NF_ASSERT(back); | 372 | IP_NF_ASSERT(back); |
357 | if (!ip_packet_match(ip, indev, outdev, | 373 | if (!ip_packet_match(ip, indev, outdev, |
358 | &e->ip, mtpar.fragoff) || | 374 | &e->ip, mtpar.fragoff)) { |
359 | IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) { | 375 | no_match: |
360 | e = ipt_next_entry(e); | 376 | e = ipt_next_entry(e); |
361 | continue; | 377 | continue; |
362 | } | 378 | } |
363 | 379 | ||
380 | xt_ematch_foreach(ematch, e) | ||
381 | if (do_match(ematch, skb, &mtpar) != 0) | ||
382 | goto no_match; | ||
383 | |||
364 | ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); | 384 | ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); |
365 | 385 | ||
366 | t = ipt_get_target(e); | 386 | t = ipt_get_target(e); |
@@ -388,8 +408,8 @@ ipt_do_table(struct sk_buff *skb, | |||
388 | back = get_entry(table_base, back->comefrom); | 408 | back = get_entry(table_base, back->comefrom); |
389 | continue; | 409 | continue; |
390 | } | 410 | } |
391 | if (table_base + v != ipt_next_entry(e) | 411 | if (table_base + v != ipt_next_entry(e) && |
392 | && !(e->ip.flags & IPT_F_GOTO)) { | 412 | !(e->ip.flags & IPT_F_GOTO)) { |
393 | /* Save old back ptr in next entry */ | 413 | /* Save old back ptr in next entry */ |
394 | struct ipt_entry *next = ipt_next_entry(e); | 414 | struct ipt_entry *next = ipt_next_entry(e); |
395 | next->comefrom = (void *)back - table_base; | 415 | next->comefrom = (void *)back - table_base; |
@@ -443,7 +463,7 @@ ipt_do_table(struct sk_buff *skb, | |||
443 | /* Figures out from what hook each rule can be called: returns 0 if | 463 | /* Figures out from what hook each rule can be called: returns 0 if |
444 | there are loops. Puts hook bitmask in comefrom. */ | 464 | there are loops. Puts hook bitmask in comefrom. */ |
445 | static int | 465 | static int |
446 | mark_source_chains(struct xt_table_info *newinfo, | 466 | mark_source_chains(const struct xt_table_info *newinfo, |
447 | unsigned int valid_hooks, void *entry0) | 467 | unsigned int valid_hooks, void *entry0) |
448 | { | 468 | { |
449 | unsigned int hook; | 469 | unsigned int hook; |
@@ -461,8 +481,8 @@ mark_source_chains(struct xt_table_info *newinfo, | |||
461 | e->counters.pcnt = pos; | 481 | e->counters.pcnt = pos; |
462 | 482 | ||
463 | for (;;) { | 483 | for (;;) { |
464 | struct ipt_standard_target *t | 484 | const struct ipt_standard_target *t |
465 | = (void *)ipt_get_target(e); | 485 | = (void *)ipt_get_target_c(e); |
466 | int visited = e->comefrom & (1 << hook); | 486 | int visited = e->comefrom & (1 << hook); |
467 | 487 | ||
468 | if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { | 488 | if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { |
@@ -473,11 +493,11 @@ mark_source_chains(struct xt_table_info *newinfo, | |||
473 | e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); | 493 | e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); |
474 | 494 | ||
475 | /* Unconditional return/END. */ | 495 | /* Unconditional return/END. */ |
476 | if ((e->target_offset == sizeof(struct ipt_entry) | 496 | if ((e->target_offset == sizeof(struct ipt_entry) && |
477 | && (strcmp(t->target.u.user.name, | 497 | (strcmp(t->target.u.user.name, |
478 | IPT_STANDARD_TARGET) == 0) | 498 | IPT_STANDARD_TARGET) == 0) && |
479 | && t->verdict < 0 | 499 | t->verdict < 0 && unconditional(&e->ip)) || |
480 | && unconditional(&e->ip)) || visited) { | 500 | visited) { |
481 | unsigned int oldpos, size; | 501 | unsigned int oldpos, size; |
482 | 502 | ||
483 | if ((strcmp(t->target.u.user.name, | 503 | if ((strcmp(t->target.u.user.name, |
@@ -524,8 +544,8 @@ mark_source_chains(struct xt_table_info *newinfo, | |||
524 | int newpos = t->verdict; | 544 | int newpos = t->verdict; |
525 | 545 | ||
526 | if (strcmp(t->target.u.user.name, | 546 | if (strcmp(t->target.u.user.name, |
527 | IPT_STANDARD_TARGET) == 0 | 547 | IPT_STANDARD_TARGET) == 0 && |
528 | && newpos >= 0) { | 548 | newpos >= 0) { |
529 | if (newpos > newinfo->size - | 549 | if (newpos > newinfo->size - |
530 | sizeof(struct ipt_entry)) { | 550 | sizeof(struct ipt_entry)) { |
531 | duprintf("mark_source_chains: " | 551 | duprintf("mark_source_chains: " |
@@ -552,27 +572,23 @@ mark_source_chains(struct xt_table_info *newinfo, | |||
552 | return 1; | 572 | return 1; |
553 | } | 573 | } |
554 | 574 | ||
555 | static int | 575 | static void cleanup_match(struct ipt_entry_match *m, struct net *net) |
556 | cleanup_match(struct ipt_entry_match *m, unsigned int *i) | ||
557 | { | 576 | { |
558 | struct xt_mtdtor_param par; | 577 | struct xt_mtdtor_param par; |
559 | 578 | ||
560 | if (i && (*i)-- == 0) | 579 | par.net = net; |
561 | return 1; | ||
562 | |||
563 | par.match = m->u.kernel.match; | 580 | par.match = m->u.kernel.match; |
564 | par.matchinfo = m->data; | 581 | par.matchinfo = m->data; |
565 | par.family = NFPROTO_IPV4; | 582 | par.family = NFPROTO_IPV4; |
566 | if (par.match->destroy != NULL) | 583 | if (par.match->destroy != NULL) |
567 | par.match->destroy(&par); | 584 | par.match->destroy(&par); |
568 | module_put(par.match->me); | 585 | module_put(par.match->me); |
569 | return 0; | ||
570 | } | 586 | } |
571 | 587 | ||
572 | static int | 588 | static int |
573 | check_entry(struct ipt_entry *e, const char *name) | 589 | check_entry(const struct ipt_entry *e, const char *name) |
574 | { | 590 | { |
575 | struct ipt_entry_target *t; | 591 | const struct ipt_entry_target *t; |
576 | 592 | ||
577 | if (!ip_checkentry(&e->ip)) { | 593 | if (!ip_checkentry(&e->ip)) { |
578 | duprintf("ip_tables: ip check failed %p %s.\n", e, name); | 594 | duprintf("ip_tables: ip check failed %p %s.\n", e, name); |
@@ -583,7 +599,7 @@ check_entry(struct ipt_entry *e, const char *name) | |||
583 | e->next_offset) | 599 | e->next_offset) |
584 | return -EINVAL; | 600 | return -EINVAL; |
585 | 601 | ||
586 | t = ipt_get_target(e); | 602 | t = ipt_get_target_c(e); |
587 | if (e->target_offset + t->u.target_size > e->next_offset) | 603 | if (e->target_offset + t->u.target_size > e->next_offset) |
588 | return -EINVAL; | 604 | return -EINVAL; |
589 | 605 | ||
@@ -591,8 +607,7 @@ check_entry(struct ipt_entry *e, const char *name) | |||
591 | } | 607 | } |
592 | 608 | ||
593 | static int | 609 | static int |
594 | check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, | 610 | check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par) |
595 | unsigned int *i) | ||
596 | { | 611 | { |
597 | const struct ipt_ip *ip = par->entryinfo; | 612 | const struct ipt_ip *ip = par->entryinfo; |
598 | int ret; | 613 | int ret; |
@@ -607,13 +622,11 @@ check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, | |||
607 | par.match->name); | 622 | par.match->name); |
608 | return ret; | 623 | return ret; |
609 | } | 624 | } |
610 | ++*i; | ||
611 | return 0; | 625 | return 0; |
612 | } | 626 | } |
613 | 627 | ||
614 | static int | 628 | static int |
615 | find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, | 629 | find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par) |
616 | unsigned int *i) | ||
617 | { | 630 | { |
618 | struct xt_match *match; | 631 | struct xt_match *match; |
619 | int ret; | 632 | int ret; |
@@ -627,7 +640,7 @@ find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, | |||
627 | } | 640 | } |
628 | m->u.kernel.match = match; | 641 | m->u.kernel.match = match; |
629 | 642 | ||
630 | ret = check_match(m, par, i); | 643 | ret = check_match(m, par); |
631 | if (ret) | 644 | if (ret) |
632 | goto err; | 645 | goto err; |
633 | 646 | ||
@@ -637,10 +650,11 @@ err: | |||
637 | return ret; | 650 | return ret; |
638 | } | 651 | } |
639 | 652 | ||
640 | static int check_target(struct ipt_entry *e, const char *name) | 653 | static int check_target(struct ipt_entry *e, struct net *net, const char *name) |
641 | { | 654 | { |
642 | struct ipt_entry_target *t = ipt_get_target(e); | 655 | struct ipt_entry_target *t = ipt_get_target(e); |
643 | struct xt_tgchk_param par = { | 656 | struct xt_tgchk_param par = { |
657 | .net = net, | ||
644 | .table = name, | 658 | .table = name, |
645 | .entryinfo = e, | 659 | .entryinfo = e, |
646 | .target = t->u.kernel.target, | 660 | .target = t->u.kernel.target, |
@@ -661,27 +675,32 @@ static int check_target(struct ipt_entry *e, const char *name) | |||
661 | } | 675 | } |
662 | 676 | ||
663 | static int | 677 | static int |
664 | find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, | 678 | find_check_entry(struct ipt_entry *e, struct net *net, const char *name, |
665 | unsigned int *i) | 679 | unsigned int size) |
666 | { | 680 | { |
667 | struct ipt_entry_target *t; | 681 | struct ipt_entry_target *t; |
668 | struct xt_target *target; | 682 | struct xt_target *target; |
669 | int ret; | 683 | int ret; |
670 | unsigned int j; | 684 | unsigned int j; |
671 | struct xt_mtchk_param mtpar; | 685 | struct xt_mtchk_param mtpar; |
686 | struct xt_entry_match *ematch; | ||
672 | 687 | ||
673 | ret = check_entry(e, name); | 688 | ret = check_entry(e, name); |
674 | if (ret) | 689 | if (ret) |
675 | return ret; | 690 | return ret; |
676 | 691 | ||
677 | j = 0; | 692 | j = 0; |
693 | mtpar.net = net; | ||
678 | mtpar.table = name; | 694 | mtpar.table = name; |
679 | mtpar.entryinfo = &e->ip; | 695 | mtpar.entryinfo = &e->ip; |
680 | mtpar.hook_mask = e->comefrom; | 696 | mtpar.hook_mask = e->comefrom; |
681 | mtpar.family = NFPROTO_IPV4; | 697 | mtpar.family = NFPROTO_IPV4; |
682 | ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j); | 698 | xt_ematch_foreach(ematch, e) { |
683 | if (ret != 0) | 699 | ret = find_check_match(ematch, &mtpar); |
684 | goto cleanup_matches; | 700 | if (ret != 0) |
701 | goto cleanup_matches; | ||
702 | ++j; | ||
703 | } | ||
685 | 704 | ||
686 | t = ipt_get_target(e); | 705 | t = ipt_get_target(e); |
687 | target = try_then_request_module(xt_find_target(AF_INET, | 706 | target = try_then_request_module(xt_find_target(AF_INET, |
@@ -695,27 +714,29 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, | |||
695 | } | 714 | } |
696 | t->u.kernel.target = target; | 715 | t->u.kernel.target = target; |
697 | 716 | ||
698 | ret = check_target(e, name); | 717 | ret = check_target(e, net, name); |
699 | if (ret) | 718 | if (ret) |
700 | goto err; | 719 | goto err; |
701 | |||
702 | (*i)++; | ||
703 | return 0; | 720 | return 0; |
704 | err: | 721 | err: |
705 | module_put(t->u.kernel.target->me); | 722 | module_put(t->u.kernel.target->me); |
706 | cleanup_matches: | 723 | cleanup_matches: |
707 | IPT_MATCH_ITERATE(e, cleanup_match, &j); | 724 | xt_ematch_foreach(ematch, e) { |
725 | if (j-- == 0) | ||
726 | break; | ||
727 | cleanup_match(ematch, net); | ||
728 | } | ||
708 | return ret; | 729 | return ret; |
709 | } | 730 | } |
710 | 731 | ||
711 | static bool check_underflow(struct ipt_entry *e) | 732 | static bool check_underflow(const struct ipt_entry *e) |
712 | { | 733 | { |
713 | const struct ipt_entry_target *t; | 734 | const struct ipt_entry_target *t; |
714 | unsigned int verdict; | 735 | unsigned int verdict; |
715 | 736 | ||
716 | if (!unconditional(&e->ip)) | 737 | if (!unconditional(&e->ip)) |
717 | return false; | 738 | return false; |
718 | t = ipt_get_target(e); | 739 | t = ipt_get_target_c(e); |
719 | if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) | 740 | if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) |
720 | return false; | 741 | return false; |
721 | verdict = ((struct ipt_standard_target *)t)->verdict; | 742 | verdict = ((struct ipt_standard_target *)t)->verdict; |
@@ -726,17 +747,16 @@ static bool check_underflow(struct ipt_entry *e) | |||
726 | static int | 747 | static int |
727 | check_entry_size_and_hooks(struct ipt_entry *e, | 748 | check_entry_size_and_hooks(struct ipt_entry *e, |
728 | struct xt_table_info *newinfo, | 749 | struct xt_table_info *newinfo, |
729 | unsigned char *base, | 750 | const unsigned char *base, |
730 | unsigned char *limit, | 751 | const unsigned char *limit, |
731 | const unsigned int *hook_entries, | 752 | const unsigned int *hook_entries, |
732 | const unsigned int *underflows, | 753 | const unsigned int *underflows, |
733 | unsigned int valid_hooks, | 754 | unsigned int valid_hooks) |
734 | unsigned int *i) | ||
735 | { | 755 | { |
736 | unsigned int h; | 756 | unsigned int h; |
737 | 757 | ||
738 | if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 | 758 | if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || |
739 | || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { | 759 | (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { |
740 | duprintf("Bad offset %p\n", e); | 760 | duprintf("Bad offset %p\n", e); |
741 | return -EINVAL; | 761 | return -EINVAL; |
742 | } | 762 | } |
@@ -768,50 +788,42 @@ check_entry_size_and_hooks(struct ipt_entry *e, | |||
768 | /* Clear counters and comefrom */ | 788 | /* Clear counters and comefrom */ |
769 | e->counters = ((struct xt_counters) { 0, 0 }); | 789 | e->counters = ((struct xt_counters) { 0, 0 }); |
770 | e->comefrom = 0; | 790 | e->comefrom = 0; |
771 | |||
772 | (*i)++; | ||
773 | return 0; | 791 | return 0; |
774 | } | 792 | } |
775 | 793 | ||
776 | static int | 794 | static void |
777 | cleanup_entry(struct ipt_entry *e, unsigned int *i) | 795 | cleanup_entry(struct ipt_entry *e, struct net *net) |
778 | { | 796 | { |
779 | struct xt_tgdtor_param par; | 797 | struct xt_tgdtor_param par; |
780 | struct ipt_entry_target *t; | 798 | struct ipt_entry_target *t; |
781 | 799 | struct xt_entry_match *ematch; | |
782 | if (i && (*i)-- == 0) | ||
783 | return 1; | ||
784 | 800 | ||
785 | /* Cleanup all matches */ | 801 | /* Cleanup all matches */ |
786 | IPT_MATCH_ITERATE(e, cleanup_match, NULL); | 802 | xt_ematch_foreach(ematch, e) |
803 | cleanup_match(ematch, net); | ||
787 | t = ipt_get_target(e); | 804 | t = ipt_get_target(e); |
788 | 805 | ||
806 | par.net = net; | ||
789 | par.target = t->u.kernel.target; | 807 | par.target = t->u.kernel.target; |
790 | par.targinfo = t->data; | 808 | par.targinfo = t->data; |
791 | par.family = NFPROTO_IPV4; | 809 | par.family = NFPROTO_IPV4; |
792 | if (par.target->destroy != NULL) | 810 | if (par.target->destroy != NULL) |
793 | par.target->destroy(&par); | 811 | par.target->destroy(&par); |
794 | module_put(par.target->me); | 812 | module_put(par.target->me); |
795 | return 0; | ||
796 | } | 813 | } |
797 | 814 | ||
798 | /* Checks and translates the user-supplied table segment (held in | 815 | /* Checks and translates the user-supplied table segment (held in |
799 | newinfo) */ | 816 | newinfo) */ |
800 | static int | 817 | static int |
801 | translate_table(const char *name, | 818 | translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, |
802 | unsigned int valid_hooks, | 819 | const struct ipt_replace *repl) |
803 | struct xt_table_info *newinfo, | ||
804 | void *entry0, | ||
805 | unsigned int size, | ||
806 | unsigned int number, | ||
807 | const unsigned int *hook_entries, | ||
808 | const unsigned int *underflows) | ||
809 | { | 820 | { |
821 | struct ipt_entry *iter; | ||
810 | unsigned int i; | 822 | unsigned int i; |
811 | int ret; | 823 | int ret = 0; |
812 | 824 | ||
813 | newinfo->size = size; | 825 | newinfo->size = repl->size; |
814 | newinfo->number = number; | 826 | newinfo->number = repl->num_entries; |
815 | 827 | ||
816 | /* Init all hooks to impossible value. */ | 828 | /* Init all hooks to impossible value. */ |
817 | for (i = 0; i < NF_INET_NUMHOOKS; i++) { | 829 | for (i = 0; i < NF_INET_NUMHOOKS; i++) { |
@@ -822,49 +834,58 @@ translate_table(const char *name, | |||
822 | duprintf("translate_table: size %u\n", newinfo->size); | 834 | duprintf("translate_table: size %u\n", newinfo->size); |
823 | i = 0; | 835 | i = 0; |
824 | /* Walk through entries, checking offsets. */ | 836 | /* Walk through entries, checking offsets. */ |
825 | ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, | 837 | xt_entry_foreach(iter, entry0, newinfo->size) { |
826 | check_entry_size_and_hooks, | 838 | ret = check_entry_size_and_hooks(iter, newinfo, entry0, |
827 | newinfo, | 839 | entry0 + repl->size, |
828 | entry0, | 840 | repl->hook_entry, |
829 | entry0 + size, | 841 | repl->underflow, |
830 | hook_entries, underflows, valid_hooks, &i); | 842 | repl->valid_hooks); |
831 | if (ret != 0) | 843 | if (ret != 0) |
832 | return ret; | 844 | return ret; |
845 | ++i; | ||
846 | } | ||
833 | 847 | ||
834 | if (i != number) { | 848 | if (i != repl->num_entries) { |
835 | duprintf("translate_table: %u not %u entries\n", | 849 | duprintf("translate_table: %u not %u entries\n", |
836 | i, number); | 850 | i, repl->num_entries); |
837 | return -EINVAL; | 851 | return -EINVAL; |
838 | } | 852 | } |
839 | 853 | ||
840 | /* Check hooks all assigned */ | 854 | /* Check hooks all assigned */ |
841 | for (i = 0; i < NF_INET_NUMHOOKS; i++) { | 855 | for (i = 0; i < NF_INET_NUMHOOKS; i++) { |
842 | /* Only hooks which are valid */ | 856 | /* Only hooks which are valid */ |
843 | if (!(valid_hooks & (1 << i))) | 857 | if (!(repl->valid_hooks & (1 << i))) |
844 | continue; | 858 | continue; |
845 | if (newinfo->hook_entry[i] == 0xFFFFFFFF) { | 859 | if (newinfo->hook_entry[i] == 0xFFFFFFFF) { |
846 | duprintf("Invalid hook entry %u %u\n", | 860 | duprintf("Invalid hook entry %u %u\n", |
847 | i, hook_entries[i]); | 861 | i, repl->hook_entry[i]); |
848 | return -EINVAL; | 862 | return -EINVAL; |
849 | } | 863 | } |
850 | if (newinfo->underflow[i] == 0xFFFFFFFF) { | 864 | if (newinfo->underflow[i] == 0xFFFFFFFF) { |
851 | duprintf("Invalid underflow %u %u\n", | 865 | duprintf("Invalid underflow %u %u\n", |
852 | i, underflows[i]); | 866 | i, repl->underflow[i]); |
853 | return -EINVAL; | 867 | return -EINVAL; |
854 | } | 868 | } |
855 | } | 869 | } |
856 | 870 | ||
857 | if (!mark_source_chains(newinfo, valid_hooks, entry0)) | 871 | if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) |
858 | return -ELOOP; | 872 | return -ELOOP; |
859 | 873 | ||
860 | /* Finally, each sanity check must pass */ | 874 | /* Finally, each sanity check must pass */ |
861 | i = 0; | 875 | i = 0; |
862 | ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, | 876 | xt_entry_foreach(iter, entry0, newinfo->size) { |
863 | find_check_entry, name, size, &i); | 877 | ret = find_check_entry(iter, net, repl->name, repl->size); |
878 | if (ret != 0) | ||
879 | break; | ||
880 | ++i; | ||
881 | } | ||
864 | 882 | ||
865 | if (ret != 0) { | 883 | if (ret != 0) { |
866 | IPT_ENTRY_ITERATE(entry0, newinfo->size, | 884 | xt_entry_foreach(iter, entry0, newinfo->size) { |
867 | cleanup_entry, &i); | 885 | if (i-- == 0) |
886 | break; | ||
887 | cleanup_entry(iter, net); | ||
888 | } | ||
868 | return ret; | 889 | return ret; |
869 | } | 890 | } |
870 | 891 | ||
@@ -877,33 +898,11 @@ translate_table(const char *name, | |||
877 | return ret; | 898 | return ret; |
878 | } | 899 | } |
879 | 900 | ||
880 | /* Gets counters. */ | ||
881 | static inline int | ||
882 | add_entry_to_counter(const struct ipt_entry *e, | ||
883 | struct xt_counters total[], | ||
884 | unsigned int *i) | ||
885 | { | ||
886 | ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); | ||
887 | |||
888 | (*i)++; | ||
889 | return 0; | ||
890 | } | ||
891 | |||
892 | static inline int | ||
893 | set_entry_to_counter(const struct ipt_entry *e, | ||
894 | struct ipt_counters total[], | ||
895 | unsigned int *i) | ||
896 | { | ||
897 | SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); | ||
898 | |||
899 | (*i)++; | ||
900 | return 0; | ||
901 | } | ||
902 | |||
903 | static void | 901 | static void |
904 | get_counters(const struct xt_table_info *t, | 902 | get_counters(const struct xt_table_info *t, |
905 | struct xt_counters counters[]) | 903 | struct xt_counters counters[]) |
906 | { | 904 | { |
905 | struct ipt_entry *iter; | ||
907 | unsigned int cpu; | 906 | unsigned int cpu; |
908 | unsigned int i; | 907 | unsigned int i; |
909 | unsigned int curcpu; | 908 | unsigned int curcpu; |
@@ -919,32 +918,32 @@ get_counters(const struct xt_table_info *t, | |||
919 | curcpu = smp_processor_id(); | 918 | curcpu = smp_processor_id(); |
920 | 919 | ||
921 | i = 0; | 920 | i = 0; |
922 | IPT_ENTRY_ITERATE(t->entries[curcpu], | 921 | xt_entry_foreach(iter, t->entries[curcpu], t->size) { |
923 | t->size, | 922 | SET_COUNTER(counters[i], iter->counters.bcnt, |
924 | set_entry_to_counter, | 923 | iter->counters.pcnt); |
925 | counters, | 924 | ++i; |
926 | &i); | 925 | } |
927 | 926 | ||
928 | for_each_possible_cpu(cpu) { | 927 | for_each_possible_cpu(cpu) { |
929 | if (cpu == curcpu) | 928 | if (cpu == curcpu) |
930 | continue; | 929 | continue; |
931 | i = 0; | 930 | i = 0; |
932 | xt_info_wrlock(cpu); | 931 | xt_info_wrlock(cpu); |
933 | IPT_ENTRY_ITERATE(t->entries[cpu], | 932 | xt_entry_foreach(iter, t->entries[cpu], t->size) { |
934 | t->size, | 933 | ADD_COUNTER(counters[i], iter->counters.bcnt, |
935 | add_entry_to_counter, | 934 | iter->counters.pcnt); |
936 | counters, | 935 | ++i; /* macro does multi eval of i */ |
937 | &i); | 936 | } |
938 | xt_info_wrunlock(cpu); | 937 | xt_info_wrunlock(cpu); |
939 | } | 938 | } |
940 | local_bh_enable(); | 939 | local_bh_enable(); |
941 | } | 940 | } |
942 | 941 | ||
943 | static struct xt_counters * alloc_counters(struct xt_table *table) | 942 | static struct xt_counters *alloc_counters(const struct xt_table *table) |
944 | { | 943 | { |
945 | unsigned int countersize; | 944 | unsigned int countersize; |
946 | struct xt_counters *counters; | 945 | struct xt_counters *counters; |
947 | struct xt_table_info *private = table->private; | 946 | const struct xt_table_info *private = table->private; |
948 | 947 | ||
949 | /* We need atomic snapshot of counters: rest doesn't change | 948 | /* We need atomic snapshot of counters: rest doesn't change |
950 | (other than comefrom, which userspace doesn't care | 949 | (other than comefrom, which userspace doesn't care |
@@ -962,11 +961,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table) | |||
962 | 961 | ||
963 | static int | 962 | static int |
964 | copy_entries_to_user(unsigned int total_size, | 963 | copy_entries_to_user(unsigned int total_size, |
965 | struct xt_table *table, | 964 | const struct xt_table *table, |
966 | void __user *userptr) | 965 | void __user *userptr) |
967 | { | 966 | { |
968 | unsigned int off, num; | 967 | unsigned int off, num; |
969 | struct ipt_entry *e; | 968 | const struct ipt_entry *e; |
970 | struct xt_counters *counters; | 969 | struct xt_counters *counters; |
971 | const struct xt_table_info *private = table->private; | 970 | const struct xt_table_info *private = table->private; |
972 | int ret = 0; | 971 | int ret = 0; |
@@ -1018,7 +1017,7 @@ copy_entries_to_user(unsigned int total_size, | |||
1018 | } | 1017 | } |
1019 | } | 1018 | } |
1020 | 1019 | ||
1021 | t = ipt_get_target(e); | 1020 | t = ipt_get_target_c(e); |
1022 | if (copy_to_user(userptr + off + e->target_offset | 1021 | if (copy_to_user(userptr + off + e->target_offset |
1023 | + offsetof(struct ipt_entry_target, | 1022 | + offsetof(struct ipt_entry_target, |
1024 | u.user.name), | 1023 | u.user.name), |
@@ -1035,7 +1034,7 @@ copy_entries_to_user(unsigned int total_size, | |||
1035 | } | 1034 | } |
1036 | 1035 | ||
1037 | #ifdef CONFIG_COMPAT | 1036 | #ifdef CONFIG_COMPAT |
1038 | static void compat_standard_from_user(void *dst, void *src) | 1037 | static void compat_standard_from_user(void *dst, const void *src) |
1039 | { | 1038 | { |
1040 | int v = *(compat_int_t *)src; | 1039 | int v = *(compat_int_t *)src; |
1041 | 1040 | ||
@@ -1044,7 +1043,7 @@ static void compat_standard_from_user(void *dst, void *src) | |||
1044 | memcpy(dst, &v, sizeof(v)); | 1043 | memcpy(dst, &v, sizeof(v)); |
1045 | } | 1044 | } |
1046 | 1045 | ||
1047 | static int compat_standard_to_user(void __user *dst, void *src) | 1046 | static int compat_standard_to_user(void __user *dst, const void *src) |
1048 | { | 1047 | { |
1049 | compat_int_t cv = *(int *)src; | 1048 | compat_int_t cv = *(int *)src; |
1050 | 1049 | ||
@@ -1053,25 +1052,20 @@ static int compat_standard_to_user(void __user *dst, void *src) | |||
1053 | return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; | 1052 | return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; |
1054 | } | 1053 | } |
1055 | 1054 | ||
1056 | static inline int | 1055 | static int compat_calc_entry(const struct ipt_entry *e, |
1057 | compat_calc_match(struct ipt_entry_match *m, int *size) | ||
1058 | { | ||
1059 | *size += xt_compat_match_offset(m->u.kernel.match); | ||
1060 | return 0; | ||
1061 | } | ||
1062 | |||
1063 | static int compat_calc_entry(struct ipt_entry *e, | ||
1064 | const struct xt_table_info *info, | 1056 | const struct xt_table_info *info, |
1065 | void *base, struct xt_table_info *newinfo) | 1057 | const void *base, struct xt_table_info *newinfo) |
1066 | { | 1058 | { |
1067 | struct ipt_entry_target *t; | 1059 | const struct xt_entry_match *ematch; |
1060 | const struct ipt_entry_target *t; | ||
1068 | unsigned int entry_offset; | 1061 | unsigned int entry_offset; |
1069 | int off, i, ret; | 1062 | int off, i, ret; |
1070 | 1063 | ||
1071 | off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); | 1064 | off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); |
1072 | entry_offset = (void *)e - base; | 1065 | entry_offset = (void *)e - base; |
1073 | IPT_MATCH_ITERATE(e, compat_calc_match, &off); | 1066 | xt_ematch_foreach(ematch, e) |
1074 | t = ipt_get_target(e); | 1067 | off += xt_compat_match_offset(ematch->u.kernel.match); |
1068 | t = ipt_get_target_c(e); | ||
1075 | off += xt_compat_target_offset(t->u.kernel.target); | 1069 | off += xt_compat_target_offset(t->u.kernel.target); |
1076 | newinfo->size -= off; | 1070 | newinfo->size -= off; |
1077 | ret = xt_compat_add_offset(AF_INET, entry_offset, off); | 1071 | ret = xt_compat_add_offset(AF_INET, entry_offset, off); |
@@ -1092,7 +1086,9 @@ static int compat_calc_entry(struct ipt_entry *e, | |||
1092 | static int compat_table_info(const struct xt_table_info *info, | 1086 | static int compat_table_info(const struct xt_table_info *info, |
1093 | struct xt_table_info *newinfo) | 1087 | struct xt_table_info *newinfo) |
1094 | { | 1088 | { |
1089 | struct ipt_entry *iter; | ||
1095 | void *loc_cpu_entry; | 1090 | void *loc_cpu_entry; |
1091 | int ret; | ||
1096 | 1092 | ||
1097 | if (!newinfo || !info) | 1093 | if (!newinfo || !info) |
1098 | return -EINVAL; | 1094 | return -EINVAL; |
@@ -1101,13 +1097,17 @@ static int compat_table_info(const struct xt_table_info *info, | |||
1101 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | 1097 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); |
1102 | newinfo->initial_entries = 0; | 1098 | newinfo->initial_entries = 0; |
1103 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; | 1099 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; |
1104 | return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size, | 1100 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { |
1105 | compat_calc_entry, info, loc_cpu_entry, | 1101 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); |
1106 | newinfo); | 1102 | if (ret != 0) |
1103 | return ret; | ||
1104 | } | ||
1105 | return 0; | ||
1107 | } | 1106 | } |
1108 | #endif | 1107 | #endif |
1109 | 1108 | ||
1110 | static int get_info(struct net *net, void __user *user, int *len, int compat) | 1109 | static int get_info(struct net *net, void __user *user, |
1110 | const int *len, int compat) | ||
1111 | { | 1111 | { |
1112 | char name[IPT_TABLE_MAXNAMELEN]; | 1112 | char name[IPT_TABLE_MAXNAMELEN]; |
1113 | struct xt_table *t; | 1113 | struct xt_table *t; |
@@ -1132,10 +1132,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) | |||
1132 | if (t && !IS_ERR(t)) { | 1132 | if (t && !IS_ERR(t)) { |
1133 | struct ipt_getinfo info; | 1133 | struct ipt_getinfo info; |
1134 | const struct xt_table_info *private = t->private; | 1134 | const struct xt_table_info *private = t->private; |
1135 | |||
1136 | #ifdef CONFIG_COMPAT | 1135 | #ifdef CONFIG_COMPAT |
1136 | struct xt_table_info tmp; | ||
1137 | |||
1137 | if (compat) { | 1138 | if (compat) { |
1138 | struct xt_table_info tmp; | ||
1139 | ret = compat_table_info(private, &tmp); | 1139 | ret = compat_table_info(private, &tmp); |
1140 | xt_compat_flush_offsets(AF_INET); | 1140 | xt_compat_flush_offsets(AF_INET); |
1141 | private = &tmp; | 1141 | private = &tmp; |
@@ -1167,7 +1167,8 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) | |||
1167 | } | 1167 | } |
1168 | 1168 | ||
1169 | static int | 1169 | static int |
1170 | get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len) | 1170 | get_entries(struct net *net, struct ipt_get_entries __user *uptr, |
1171 | const int *len) | ||
1171 | { | 1172 | { |
1172 | int ret; | 1173 | int ret; |
1173 | struct ipt_get_entries get; | 1174 | struct ipt_get_entries get; |
@@ -1215,6 +1216,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, | |||
1215 | struct xt_table_info *oldinfo; | 1216 | struct xt_table_info *oldinfo; |
1216 | struct xt_counters *counters; | 1217 | struct xt_counters *counters; |
1217 | void *loc_cpu_old_entry; | 1218 | void *loc_cpu_old_entry; |
1219 | struct ipt_entry *iter; | ||
1218 | 1220 | ||
1219 | ret = 0; | 1221 | ret = 0; |
1220 | counters = vmalloc(num_counters * sizeof(struct xt_counters)); | 1222 | counters = vmalloc(num_counters * sizeof(struct xt_counters)); |
@@ -1257,8 +1259,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, | |||
1257 | 1259 | ||
1258 | /* Decrease module usage counts and free resource */ | 1260 | /* Decrease module usage counts and free resource */ |
1259 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; | 1261 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; |
1260 | IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, | 1262 | xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) |
1261 | NULL); | 1263 | cleanup_entry(iter, net); |
1264 | |||
1262 | xt_free_table_info(oldinfo); | 1265 | xt_free_table_info(oldinfo); |
1263 | if (copy_to_user(counters_ptr, counters, | 1266 | if (copy_to_user(counters_ptr, counters, |
1264 | sizeof(struct xt_counters) * num_counters) != 0) | 1267 | sizeof(struct xt_counters) * num_counters) != 0) |
@@ -1277,12 +1280,13 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, | |||
1277 | } | 1280 | } |
1278 | 1281 | ||
1279 | static int | 1282 | static int |
1280 | do_replace(struct net *net, void __user *user, unsigned int len) | 1283 | do_replace(struct net *net, const void __user *user, unsigned int len) |
1281 | { | 1284 | { |
1282 | int ret; | 1285 | int ret; |
1283 | struct ipt_replace tmp; | 1286 | struct ipt_replace tmp; |
1284 | struct xt_table_info *newinfo; | 1287 | struct xt_table_info *newinfo; |
1285 | void *loc_cpu_entry; | 1288 | void *loc_cpu_entry; |
1289 | struct ipt_entry *iter; | ||
1286 | 1290 | ||
1287 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) | 1291 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) |
1288 | return -EFAULT; | 1292 | return -EFAULT; |
@@ -1303,9 +1307,7 @@ do_replace(struct net *net, void __user *user, unsigned int len) | |||
1303 | goto free_newinfo; | 1307 | goto free_newinfo; |
1304 | } | 1308 | } |
1305 | 1309 | ||
1306 | ret = translate_table(tmp.name, tmp.valid_hooks, | 1310 | ret = translate_table(net, newinfo, loc_cpu_entry, &tmp); |
1307 | newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, | ||
1308 | tmp.hook_entry, tmp.underflow); | ||
1309 | if (ret != 0) | 1311 | if (ret != 0) |
1310 | goto free_newinfo; | 1312 | goto free_newinfo; |
1311 | 1313 | ||
@@ -1318,27 +1320,16 @@ do_replace(struct net *net, void __user *user, unsigned int len) | |||
1318 | return 0; | 1320 | return 0; |
1319 | 1321 | ||
1320 | free_newinfo_untrans: | 1322 | free_newinfo_untrans: |
1321 | IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); | 1323 | xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) |
1324 | cleanup_entry(iter, net); | ||
1322 | free_newinfo: | 1325 | free_newinfo: |
1323 | xt_free_table_info(newinfo); | 1326 | xt_free_table_info(newinfo); |
1324 | return ret; | 1327 | return ret; |
1325 | } | 1328 | } |
1326 | 1329 | ||
1327 | /* We're lazy, and add to the first CPU; overflow works its fey magic | ||
1328 | * and everything is OK. */ | ||
1329 | static int | 1330 | static int |
1330 | add_counter_to_entry(struct ipt_entry *e, | 1331 | do_add_counters(struct net *net, const void __user *user, |
1331 | const struct xt_counters addme[], | 1332 | unsigned int len, int compat) |
1332 | unsigned int *i) | ||
1333 | { | ||
1334 | ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); | ||
1335 | |||
1336 | (*i)++; | ||
1337 | return 0; | ||
1338 | } | ||
1339 | |||
1340 | static int | ||
1341 | do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) | ||
1342 | { | 1333 | { |
1343 | unsigned int i, curcpu; | 1334 | unsigned int i, curcpu; |
1344 | struct xt_counters_info tmp; | 1335 | struct xt_counters_info tmp; |
@@ -1351,6 +1342,7 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat | |||
1351 | const struct xt_table_info *private; | 1342 | const struct xt_table_info *private; |
1352 | int ret = 0; | 1343 | int ret = 0; |
1353 | void *loc_cpu_entry; | 1344 | void *loc_cpu_entry; |
1345 | struct ipt_entry *iter; | ||
1354 | #ifdef CONFIG_COMPAT | 1346 | #ifdef CONFIG_COMPAT |
1355 | struct compat_xt_counters_info compat_tmp; | 1347 | struct compat_xt_counters_info compat_tmp; |
1356 | 1348 | ||
@@ -1408,11 +1400,10 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat | |||
1408 | curcpu = smp_processor_id(); | 1400 | curcpu = smp_processor_id(); |
1409 | loc_cpu_entry = private->entries[curcpu]; | 1401 | loc_cpu_entry = private->entries[curcpu]; |
1410 | xt_info_wrlock(curcpu); | 1402 | xt_info_wrlock(curcpu); |
1411 | IPT_ENTRY_ITERATE(loc_cpu_entry, | 1403 | xt_entry_foreach(iter, loc_cpu_entry, private->size) { |
1412 | private->size, | 1404 | ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); |
1413 | add_counter_to_entry, | 1405 | ++i; |
1414 | paddc, | 1406 | } |
1415 | &i); | ||
1416 | xt_info_wrunlock(curcpu); | 1407 | xt_info_wrunlock(curcpu); |
1417 | unlock_up_free: | 1408 | unlock_up_free: |
1418 | local_bh_enable(); | 1409 | local_bh_enable(); |
@@ -1440,45 +1431,40 @@ struct compat_ipt_replace { | |||
1440 | static int | 1431 | static int |
1441 | compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, | 1432 | compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, |
1442 | unsigned int *size, struct xt_counters *counters, | 1433 | unsigned int *size, struct xt_counters *counters, |
1443 | unsigned int *i) | 1434 | unsigned int i) |
1444 | { | 1435 | { |
1445 | struct ipt_entry_target *t; | 1436 | struct ipt_entry_target *t; |
1446 | struct compat_ipt_entry __user *ce; | 1437 | struct compat_ipt_entry __user *ce; |
1447 | u_int16_t target_offset, next_offset; | 1438 | u_int16_t target_offset, next_offset; |
1448 | compat_uint_t origsize; | 1439 | compat_uint_t origsize; |
1449 | int ret; | 1440 | const struct xt_entry_match *ematch; |
1441 | int ret = 0; | ||
1450 | 1442 | ||
1451 | ret = -EFAULT; | ||
1452 | origsize = *size; | 1443 | origsize = *size; |
1453 | ce = (struct compat_ipt_entry __user *)*dstptr; | 1444 | ce = (struct compat_ipt_entry __user *)*dstptr; |
1454 | if (copy_to_user(ce, e, sizeof(struct ipt_entry))) | 1445 | if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 || |
1455 | goto out; | 1446 | copy_to_user(&ce->counters, &counters[i], |
1456 | 1447 | sizeof(counters[i])) != 0) | |
1457 | if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i]))) | 1448 | return -EFAULT; |
1458 | goto out; | ||
1459 | 1449 | ||
1460 | *dstptr += sizeof(struct compat_ipt_entry); | 1450 | *dstptr += sizeof(struct compat_ipt_entry); |
1461 | *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); | 1451 | *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); |
1462 | 1452 | ||
1463 | ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size); | 1453 | xt_ematch_foreach(ematch, e) { |
1454 | ret = xt_compat_match_to_user(ematch, dstptr, size); | ||
1455 | if (ret != 0) | ||
1456 | return ret; | ||
1457 | } | ||
1464 | target_offset = e->target_offset - (origsize - *size); | 1458 | target_offset = e->target_offset - (origsize - *size); |
1465 | if (ret) | ||
1466 | goto out; | ||
1467 | t = ipt_get_target(e); | 1459 | t = ipt_get_target(e); |
1468 | ret = xt_compat_target_to_user(t, dstptr, size); | 1460 | ret = xt_compat_target_to_user(t, dstptr, size); |
1469 | if (ret) | 1461 | if (ret) |
1470 | goto out; | 1462 | return ret; |
1471 | ret = -EFAULT; | ||
1472 | next_offset = e->next_offset - (origsize - *size); | 1463 | next_offset = e->next_offset - (origsize - *size); |
1473 | if (put_user(target_offset, &ce->target_offset)) | 1464 | if (put_user(target_offset, &ce->target_offset) != 0 || |
1474 | goto out; | 1465 | put_user(next_offset, &ce->next_offset) != 0) |
1475 | if (put_user(next_offset, &ce->next_offset)) | 1466 | return -EFAULT; |
1476 | goto out; | ||
1477 | |||
1478 | (*i)++; | ||
1479 | return 0; | 1467 | return 0; |
1480 | out: | ||
1481 | return ret; | ||
1482 | } | 1468 | } |
1483 | 1469 | ||
1484 | static int | 1470 | static int |
@@ -1486,7 +1472,7 @@ compat_find_calc_match(struct ipt_entry_match *m, | |||
1486 | const char *name, | 1472 | const char *name, |
1487 | const struct ipt_ip *ip, | 1473 | const struct ipt_ip *ip, |
1488 | unsigned int hookmask, | 1474 | unsigned int hookmask, |
1489 | int *size, unsigned int *i) | 1475 | int *size) |
1490 | { | 1476 | { |
1491 | struct xt_match *match; | 1477 | struct xt_match *match; |
1492 | 1478 | ||
@@ -1500,47 +1486,32 @@ compat_find_calc_match(struct ipt_entry_match *m, | |||
1500 | } | 1486 | } |
1501 | m->u.kernel.match = match; | 1487 | m->u.kernel.match = match; |
1502 | *size += xt_compat_match_offset(match); | 1488 | *size += xt_compat_match_offset(match); |
1503 | |||
1504 | (*i)++; | ||
1505 | return 0; | ||
1506 | } | ||
1507 | |||
1508 | static int | ||
1509 | compat_release_match(struct ipt_entry_match *m, unsigned int *i) | ||
1510 | { | ||
1511 | if (i && (*i)-- == 0) | ||
1512 | return 1; | ||
1513 | |||
1514 | module_put(m->u.kernel.match->me); | ||
1515 | return 0; | 1489 | return 0; |
1516 | } | 1490 | } |
1517 | 1491 | ||
1518 | static int | 1492 | static void compat_release_entry(struct compat_ipt_entry *e) |
1519 | compat_release_entry(struct compat_ipt_entry *e, unsigned int *i) | ||
1520 | { | 1493 | { |
1521 | struct ipt_entry_target *t; | 1494 | struct ipt_entry_target *t; |
1522 | 1495 | struct xt_entry_match *ematch; | |
1523 | if (i && (*i)-- == 0) | ||
1524 | return 1; | ||
1525 | 1496 | ||
1526 | /* Cleanup all matches */ | 1497 | /* Cleanup all matches */ |
1527 | COMPAT_IPT_MATCH_ITERATE(e, compat_release_match, NULL); | 1498 | xt_ematch_foreach(ematch, e) |
1499 | module_put(ematch->u.kernel.match->me); | ||
1528 | t = compat_ipt_get_target(e); | 1500 | t = compat_ipt_get_target(e); |
1529 | module_put(t->u.kernel.target->me); | 1501 | module_put(t->u.kernel.target->me); |
1530 | return 0; | ||
1531 | } | 1502 | } |
1532 | 1503 | ||
1533 | static int | 1504 | static int |
1534 | check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, | 1505 | check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, |
1535 | struct xt_table_info *newinfo, | 1506 | struct xt_table_info *newinfo, |
1536 | unsigned int *size, | 1507 | unsigned int *size, |
1537 | unsigned char *base, | 1508 | const unsigned char *base, |
1538 | unsigned char *limit, | 1509 | const unsigned char *limit, |
1539 | unsigned int *hook_entries, | 1510 | const unsigned int *hook_entries, |
1540 | unsigned int *underflows, | 1511 | const unsigned int *underflows, |
1541 | unsigned int *i, | ||
1542 | const char *name) | 1512 | const char *name) |
1543 | { | 1513 | { |
1514 | struct xt_entry_match *ematch; | ||
1544 | struct ipt_entry_target *t; | 1515 | struct ipt_entry_target *t; |
1545 | struct xt_target *target; | 1516 | struct xt_target *target; |
1546 | unsigned int entry_offset; | 1517 | unsigned int entry_offset; |
@@ -1548,8 +1519,8 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, | |||
1548 | int ret, off, h; | 1519 | int ret, off, h; |
1549 | 1520 | ||
1550 | duprintf("check_compat_entry_size_and_hooks %p\n", e); | 1521 | duprintf("check_compat_entry_size_and_hooks %p\n", e); |
1551 | if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 | 1522 | if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || |
1552 | || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) { | 1523 | (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) { |
1553 | duprintf("Bad offset %p, limit = %p\n", e, limit); | 1524 | duprintf("Bad offset %p, limit = %p\n", e, limit); |
1554 | return -EINVAL; | 1525 | return -EINVAL; |
1555 | } | 1526 | } |
@@ -1569,10 +1540,13 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, | |||
1569 | off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); | 1540 | off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); |
1570 | entry_offset = (void *)e - (void *)base; | 1541 | entry_offset = (void *)e - (void *)base; |
1571 | j = 0; | 1542 | j = 0; |
1572 | ret = COMPAT_IPT_MATCH_ITERATE(e, compat_find_calc_match, name, | 1543 | xt_ematch_foreach(ematch, e) { |
1573 | &e->ip, e->comefrom, &off, &j); | 1544 | ret = compat_find_calc_match(ematch, name, |
1574 | if (ret != 0) | 1545 | &e->ip, e->comefrom, &off); |
1575 | goto release_matches; | 1546 | if (ret != 0) |
1547 | goto release_matches; | ||
1548 | ++j; | ||
1549 | } | ||
1576 | 1550 | ||
1577 | t = compat_ipt_get_target(e); | 1551 | t = compat_ipt_get_target(e); |
1578 | target = try_then_request_module(xt_find_target(AF_INET, | 1552 | target = try_then_request_module(xt_find_target(AF_INET, |
@@ -1604,14 +1578,16 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, | |||
1604 | /* Clear counters and comefrom */ | 1578 | /* Clear counters and comefrom */ |
1605 | memset(&e->counters, 0, sizeof(e->counters)); | 1579 | memset(&e->counters, 0, sizeof(e->counters)); |
1606 | e->comefrom = 0; | 1580 | e->comefrom = 0; |
1607 | |||
1608 | (*i)++; | ||
1609 | return 0; | 1581 | return 0; |
1610 | 1582 | ||
1611 | out: | 1583 | out: |
1612 | module_put(t->u.kernel.target->me); | 1584 | module_put(t->u.kernel.target->me); |
1613 | release_matches: | 1585 | release_matches: |
1614 | IPT_MATCH_ITERATE(e, compat_release_match, &j); | 1586 | xt_ematch_foreach(ematch, e) { |
1587 | if (j-- == 0) | ||
1588 | break; | ||
1589 | module_put(ematch->u.kernel.match->me); | ||
1590 | } | ||
1615 | return ret; | 1591 | return ret; |
1616 | } | 1592 | } |
1617 | 1593 | ||
@@ -1625,6 +1601,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, | |||
1625 | struct ipt_entry *de; | 1601 | struct ipt_entry *de; |
1626 | unsigned int origsize; | 1602 | unsigned int origsize; |
1627 | int ret, h; | 1603 | int ret, h; |
1604 | struct xt_entry_match *ematch; | ||
1628 | 1605 | ||
1629 | ret = 0; | 1606 | ret = 0; |
1630 | origsize = *size; | 1607 | origsize = *size; |
@@ -1635,10 +1612,11 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, | |||
1635 | *dstptr += sizeof(struct ipt_entry); | 1612 | *dstptr += sizeof(struct ipt_entry); |
1636 | *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); | 1613 | *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); |
1637 | 1614 | ||
1638 | ret = COMPAT_IPT_MATCH_ITERATE(e, xt_compat_match_from_user, | 1615 | xt_ematch_foreach(ematch, e) { |
1639 | dstptr, size); | 1616 | ret = xt_compat_match_from_user(ematch, dstptr, size); |
1640 | if (ret) | 1617 | if (ret != 0) |
1641 | return ret; | 1618 | return ret; |
1619 | } | ||
1642 | de->target_offset = e->target_offset - (origsize - *size); | 1620 | de->target_offset = e->target_offset - (origsize - *size); |
1643 | t = compat_ipt_get_target(e); | 1621 | t = compat_ipt_get_target(e); |
1644 | target = t->u.kernel.target; | 1622 | target = t->u.kernel.target; |
@@ -1655,36 +1633,43 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, | |||
1655 | } | 1633 | } |
1656 | 1634 | ||
1657 | static int | 1635 | static int |
1658 | compat_check_entry(struct ipt_entry *e, const char *name, | 1636 | compat_check_entry(struct ipt_entry *e, struct net *net, const char *name) |
1659 | unsigned int *i) | ||
1660 | { | 1637 | { |
1638 | struct xt_entry_match *ematch; | ||
1661 | struct xt_mtchk_param mtpar; | 1639 | struct xt_mtchk_param mtpar; |
1662 | unsigned int j; | 1640 | unsigned int j; |
1663 | int ret; | 1641 | int ret = 0; |
1664 | 1642 | ||
1665 | j = 0; | 1643 | j = 0; |
1644 | mtpar.net = net; | ||
1666 | mtpar.table = name; | 1645 | mtpar.table = name; |
1667 | mtpar.entryinfo = &e->ip; | 1646 | mtpar.entryinfo = &e->ip; |
1668 | mtpar.hook_mask = e->comefrom; | 1647 | mtpar.hook_mask = e->comefrom; |
1669 | mtpar.family = NFPROTO_IPV4; | 1648 | mtpar.family = NFPROTO_IPV4; |
1670 | ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j); | 1649 | xt_ematch_foreach(ematch, e) { |
1671 | if (ret) | 1650 | ret = check_match(ematch, &mtpar); |
1672 | goto cleanup_matches; | 1651 | if (ret != 0) |
1652 | goto cleanup_matches; | ||
1653 | ++j; | ||
1654 | } | ||
1673 | 1655 | ||
1674 | ret = check_target(e, name); | 1656 | ret = check_target(e, net, name); |
1675 | if (ret) | 1657 | if (ret) |
1676 | goto cleanup_matches; | 1658 | goto cleanup_matches; |
1677 | |||
1678 | (*i)++; | ||
1679 | return 0; | 1659 | return 0; |
1680 | 1660 | ||
1681 | cleanup_matches: | 1661 | cleanup_matches: |
1682 | IPT_MATCH_ITERATE(e, cleanup_match, &j); | 1662 | xt_ematch_foreach(ematch, e) { |
1663 | if (j-- == 0) | ||
1664 | break; | ||
1665 | cleanup_match(ematch, net); | ||
1666 | } | ||
1683 | return ret; | 1667 | return ret; |
1684 | } | 1668 | } |
1685 | 1669 | ||
1686 | static int | 1670 | static int |
1687 | translate_compat_table(const char *name, | 1671 | translate_compat_table(struct net *net, |
1672 | const char *name, | ||
1688 | unsigned int valid_hooks, | 1673 | unsigned int valid_hooks, |
1689 | struct xt_table_info **pinfo, | 1674 | struct xt_table_info **pinfo, |
1690 | void **pentry0, | 1675 | void **pentry0, |
@@ -1696,6 +1681,8 @@ translate_compat_table(const char *name, | |||
1696 | unsigned int i, j; | 1681 | unsigned int i, j; |
1697 | struct xt_table_info *newinfo, *info; | 1682 | struct xt_table_info *newinfo, *info; |
1698 | void *pos, *entry0, *entry1; | 1683 | void *pos, *entry0, *entry1; |
1684 | struct compat_ipt_entry *iter0; | ||
1685 | struct ipt_entry *iter1; | ||
1699 | unsigned int size; | 1686 | unsigned int size; |
1700 | int ret; | 1687 | int ret; |
1701 | 1688 | ||
@@ -1714,13 +1701,17 @@ translate_compat_table(const char *name, | |||
1714 | j = 0; | 1701 | j = 0; |
1715 | xt_compat_lock(AF_INET); | 1702 | xt_compat_lock(AF_INET); |
1716 | /* Walk through entries, checking offsets. */ | 1703 | /* Walk through entries, checking offsets. */ |
1717 | ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, | 1704 | xt_entry_foreach(iter0, entry0, total_size) { |
1718 | check_compat_entry_size_and_hooks, | 1705 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, |
1719 | info, &size, entry0, | 1706 | entry0, |
1720 | entry0 + total_size, | 1707 | entry0 + total_size, |
1721 | hook_entries, underflows, &j, name); | 1708 | hook_entries, |
1722 | if (ret != 0) | 1709 | underflows, |
1723 | goto out_unlock; | 1710 | name); |
1711 | if (ret != 0) | ||
1712 | goto out_unlock; | ||
1713 | ++j; | ||
1714 | } | ||
1724 | 1715 | ||
1725 | ret = -EINVAL; | 1716 | ret = -EINVAL; |
1726 | if (j != number) { | 1717 | if (j != number) { |
@@ -1759,9 +1750,12 @@ translate_compat_table(const char *name, | |||
1759 | entry1 = newinfo->entries[raw_smp_processor_id()]; | 1750 | entry1 = newinfo->entries[raw_smp_processor_id()]; |
1760 | pos = entry1; | 1751 | pos = entry1; |
1761 | size = total_size; | 1752 | size = total_size; |
1762 | ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, | 1753 | xt_entry_foreach(iter0, entry0, total_size) { |
1763 | compat_copy_entry_from_user, | 1754 | ret = compat_copy_entry_from_user(iter0, &pos, &size, |
1764 | &pos, &size, name, newinfo, entry1); | 1755 | name, newinfo, entry1); |
1756 | if (ret != 0) | ||
1757 | break; | ||
1758 | } | ||
1765 | xt_compat_flush_offsets(AF_INET); | 1759 | xt_compat_flush_offsets(AF_INET); |
1766 | xt_compat_unlock(AF_INET); | 1760 | xt_compat_unlock(AF_INET); |
1767 | if (ret) | 1761 | if (ret) |
@@ -1772,13 +1766,32 @@ translate_compat_table(const char *name, | |||
1772 | goto free_newinfo; | 1766 | goto free_newinfo; |
1773 | 1767 | ||
1774 | i = 0; | 1768 | i = 0; |
1775 | ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, | 1769 | xt_entry_foreach(iter1, entry1, newinfo->size) { |
1776 | name, &i); | 1770 | ret = compat_check_entry(iter1, net, name); |
1771 | if (ret != 0) | ||
1772 | break; | ||
1773 | ++i; | ||
1774 | } | ||
1777 | if (ret) { | 1775 | if (ret) { |
1776 | /* | ||
1777 | * The first i matches need cleanup_entry (calls ->destroy) | ||
1778 | * because they had called ->check already. The other j-i | ||
1779 | * entries need only release. | ||
1780 | */ | ||
1781 | int skip = i; | ||
1778 | j -= i; | 1782 | j -= i; |
1779 | COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i, | 1783 | xt_entry_foreach(iter0, entry0, newinfo->size) { |
1780 | compat_release_entry, &j); | 1784 | if (skip-- > 0) |
1781 | IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i); | 1785 | continue; |
1786 | if (j-- == 0) | ||
1787 | break; | ||
1788 | compat_release_entry(iter0); | ||
1789 | } | ||
1790 | xt_entry_foreach(iter1, entry1, newinfo->size) { | ||
1791 | if (i-- == 0) | ||
1792 | break; | ||
1793 | cleanup_entry(iter1, net); | ||
1794 | } | ||
1782 | xt_free_table_info(newinfo); | 1795 | xt_free_table_info(newinfo); |
1783 | return ret; | 1796 | return ret; |
1784 | } | 1797 | } |
@@ -1796,7 +1809,11 @@ translate_compat_table(const char *name, | |||
1796 | free_newinfo: | 1809 | free_newinfo: |
1797 | xt_free_table_info(newinfo); | 1810 | xt_free_table_info(newinfo); |
1798 | out: | 1811 | out: |
1799 | COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j); | 1812 | xt_entry_foreach(iter0, entry0, total_size) { |
1813 | if (j-- == 0) | ||
1814 | break; | ||
1815 | compat_release_entry(iter0); | ||
1816 | } | ||
1800 | return ret; | 1817 | return ret; |
1801 | out_unlock: | 1818 | out_unlock: |
1802 | xt_compat_flush_offsets(AF_INET); | 1819 | xt_compat_flush_offsets(AF_INET); |
@@ -1811,6 +1828,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) | |||
1811 | struct compat_ipt_replace tmp; | 1828 | struct compat_ipt_replace tmp; |
1812 | struct xt_table_info *newinfo; | 1829 | struct xt_table_info *newinfo; |
1813 | void *loc_cpu_entry; | 1830 | void *loc_cpu_entry; |
1831 | struct ipt_entry *iter; | ||
1814 | 1832 | ||
1815 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) | 1833 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) |
1816 | return -EFAULT; | 1834 | return -EFAULT; |
@@ -1833,7 +1851,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) | |||
1833 | goto free_newinfo; | 1851 | goto free_newinfo; |
1834 | } | 1852 | } |
1835 | 1853 | ||
1836 | ret = translate_compat_table(tmp.name, tmp.valid_hooks, | 1854 | ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, |
1837 | &newinfo, &loc_cpu_entry, tmp.size, | 1855 | &newinfo, &loc_cpu_entry, tmp.size, |
1838 | tmp.num_entries, tmp.hook_entry, | 1856 | tmp.num_entries, tmp.hook_entry, |
1839 | tmp.underflow); | 1857 | tmp.underflow); |
@@ -1849,7 +1867,8 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) | |||
1849 | return 0; | 1867 | return 0; |
1850 | 1868 | ||
1851 | free_newinfo_untrans: | 1869 | free_newinfo_untrans: |
1852 | IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); | 1870 | xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) |
1871 | cleanup_entry(iter, net); | ||
1853 | free_newinfo: | 1872 | free_newinfo: |
1854 | xt_free_table_info(newinfo); | 1873 | xt_free_table_info(newinfo); |
1855 | return ret; | 1874 | return ret; |
@@ -1898,6 +1917,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, | |||
1898 | int ret = 0; | 1917 | int ret = 0; |
1899 | const void *loc_cpu_entry; | 1918 | const void *loc_cpu_entry; |
1900 | unsigned int i = 0; | 1919 | unsigned int i = 0; |
1920 | struct ipt_entry *iter; | ||
1901 | 1921 | ||
1902 | counters = alloc_counters(table); | 1922 | counters = alloc_counters(table); |
1903 | if (IS_ERR(counters)) | 1923 | if (IS_ERR(counters)) |
@@ -1910,9 +1930,12 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, | |||
1910 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; | 1930 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; |
1911 | pos = userptr; | 1931 | pos = userptr; |
1912 | size = total_size; | 1932 | size = total_size; |
1913 | ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size, | 1933 | xt_entry_foreach(iter, loc_cpu_entry, total_size) { |
1914 | compat_copy_entry_to_user, | 1934 | ret = compat_copy_entry_to_user(iter, &pos, |
1915 | &pos, &size, counters, &i); | 1935 | &size, counters, i++); |
1936 | if (ret != 0) | ||
1937 | break; | ||
1938 | } | ||
1916 | 1939 | ||
1917 | vfree(counters); | 1940 | vfree(counters); |
1918 | return ret; | 1941 | return ret; |
@@ -2086,11 +2109,7 @@ struct xt_table *ipt_register_table(struct net *net, | |||
2086 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; | 2109 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; |
2087 | memcpy(loc_cpu_entry, repl->entries, repl->size); | 2110 | memcpy(loc_cpu_entry, repl->entries, repl->size); |
2088 | 2111 | ||
2089 | ret = translate_table(table->name, table->valid_hooks, | 2112 | ret = translate_table(net, newinfo, loc_cpu_entry, repl); |
2090 | newinfo, loc_cpu_entry, repl->size, | ||
2091 | repl->num_entries, | ||
2092 | repl->hook_entry, | ||
2093 | repl->underflow); | ||
2094 | if (ret != 0) | 2113 | if (ret != 0) |
2095 | goto out_free; | 2114 | goto out_free; |
2096 | 2115 | ||
@@ -2108,17 +2127,19 @@ out: | |||
2108 | return ERR_PTR(ret); | 2127 | return ERR_PTR(ret); |
2109 | } | 2128 | } |
2110 | 2129 | ||
2111 | void ipt_unregister_table(struct xt_table *table) | 2130 | void ipt_unregister_table(struct net *net, struct xt_table *table) |
2112 | { | 2131 | { |
2113 | struct xt_table_info *private; | 2132 | struct xt_table_info *private; |
2114 | void *loc_cpu_entry; | 2133 | void *loc_cpu_entry; |
2115 | struct module *table_owner = table->me; | 2134 | struct module *table_owner = table->me; |
2135 | struct ipt_entry *iter; | ||
2116 | 2136 | ||
2117 | private = xt_unregister_table(table); | 2137 | private = xt_unregister_table(table); |
2118 | 2138 | ||
2119 | /* Decrease module usage counts and free resources */ | 2139 | /* Decrease module usage counts and free resources */ |
2120 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; | 2140 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; |
2121 | IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL); | 2141 | xt_entry_foreach(iter, loc_cpu_entry, private->size) |
2142 | cleanup_entry(iter, net); | ||
2122 | if (private->number > private->initial_entries) | 2143 | if (private->number > private->initial_entries) |
2123 | module_put(table_owner); | 2144 | module_put(table_owner); |
2124 | xt_free_table_info(private); | 2145 | xt_free_table_info(private); |
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 2e4f98b85524..ab828400ed71 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/jhash.h> | 14 | #include <linux/jhash.h> |
15 | #include <linux/bitops.h> | 15 | #include <linux/bitops.h> |
16 | #include <linux/skbuff.h> | 16 | #include <linux/skbuff.h> |
17 | #include <linux/slab.h> | ||
17 | #include <linux/ip.h> | 18 | #include <linux/ip.h> |
18 | #include <linux/tcp.h> | 19 | #include <linux/tcp.h> |
19 | #include <linux/udp.h> | 20 | #include <linux/udp.h> |
@@ -303,9 +304,9 @@ clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par) | |||
303 | 304 | ||
304 | /* special case: ICMP error handling. conntrack distinguishes between | 305 | /* special case: ICMP error handling. conntrack distinguishes between |
305 | * error messages (RELATED) and information requests (see below) */ | 306 | * error messages (RELATED) and information requests (see below) */ |
306 | if (ip_hdr(skb)->protocol == IPPROTO_ICMP | 307 | if (ip_hdr(skb)->protocol == IPPROTO_ICMP && |
307 | && (ctinfo == IP_CT_RELATED | 308 | (ctinfo == IP_CT_RELATED || |
308 | || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY)) | 309 | ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)) |
309 | return XT_CONTINUE; | 310 | return XT_CONTINUE; |
310 | 311 | ||
311 | /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, | 312 | /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, |
@@ -362,8 +363,8 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par) | |||
362 | return false; | 363 | return false; |
363 | 364 | ||
364 | } | 365 | } |
365 | if (e->ip.dmsk.s_addr != htonl(0xffffffff) | 366 | if (e->ip.dmsk.s_addr != htonl(0xffffffff) || |
366 | || e->ip.dst.s_addr == 0) { | 367 | e->ip.dst.s_addr == 0) { |
367 | printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n"); | 368 | printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n"); |
368 | return false; | 369 | return false; |
369 | } | 370 | } |
@@ -495,14 +496,14 @@ arp_mangle(unsigned int hook, | |||
495 | struct clusterip_config *c; | 496 | struct clusterip_config *c; |
496 | 497 | ||
497 | /* we don't care about non-ethernet and non-ipv4 ARP */ | 498 | /* we don't care about non-ethernet and non-ipv4 ARP */ |
498 | if (arp->ar_hrd != htons(ARPHRD_ETHER) | 499 | if (arp->ar_hrd != htons(ARPHRD_ETHER) || |
499 | || arp->ar_pro != htons(ETH_P_IP) | 500 | arp->ar_pro != htons(ETH_P_IP) || |
500 | || arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN) | 501 | arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN) |
501 | return NF_ACCEPT; | 502 | return NF_ACCEPT; |
502 | 503 | ||
503 | /* we only want to mangle arp requests and replies */ | 504 | /* we only want to mangle arp requests and replies */ |
504 | if (arp->ar_op != htons(ARPOP_REPLY) | 505 | if (arp->ar_op != htons(ARPOP_REPLY) && |
505 | && arp->ar_op != htons(ARPOP_REQUEST)) | 506 | arp->ar_op != htons(ARPOP_REQUEST)) |
506 | return NF_ACCEPT; | 507 | return NF_ACCEPT; |
507 | 508 | ||
508 | payload = (void *)(arp+1); | 509 | payload = (void *)(arp+1); |
@@ -560,8 +561,7 @@ struct clusterip_seq_position { | |||
560 | 561 | ||
561 | static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) | 562 | static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) |
562 | { | 563 | { |
563 | const struct proc_dir_entry *pde = s->private; | 564 | struct clusterip_config *c = s->private; |
564 | struct clusterip_config *c = pde->data; | ||
565 | unsigned int weight; | 565 | unsigned int weight; |
566 | u_int32_t local_nodes; | 566 | u_int32_t local_nodes; |
567 | struct clusterip_seq_position *idx; | 567 | struct clusterip_seq_position *idx; |
@@ -632,10 +632,9 @@ static int clusterip_proc_open(struct inode *inode, struct file *file) | |||
632 | 632 | ||
633 | if (!ret) { | 633 | if (!ret) { |
634 | struct seq_file *sf = file->private_data; | 634 | struct seq_file *sf = file->private_data; |
635 | struct proc_dir_entry *pde = PDE(inode); | 635 | struct clusterip_config *c = PDE(inode)->data; |
636 | struct clusterip_config *c = pde->data; | ||
637 | 636 | ||
638 | sf->private = pde; | 637 | sf->private = c; |
639 | 638 | ||
640 | clusterip_config_get(c); | 639 | clusterip_config_get(c); |
641 | } | 640 | } |
@@ -645,8 +644,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file) | |||
645 | 644 | ||
646 | static int clusterip_proc_release(struct inode *inode, struct file *file) | 645 | static int clusterip_proc_release(struct inode *inode, struct file *file) |
647 | { | 646 | { |
648 | struct proc_dir_entry *pde = PDE(inode); | 647 | struct clusterip_config *c = PDE(inode)->data; |
649 | struct clusterip_config *c = pde->data; | ||
650 | int ret; | 648 | int ret; |
651 | 649 | ||
652 | ret = seq_release(inode, file); | 650 | ret = seq_release(inode, file); |
@@ -660,10 +658,9 @@ static int clusterip_proc_release(struct inode *inode, struct file *file) | |||
660 | static ssize_t clusterip_proc_write(struct file *file, const char __user *input, | 658 | static ssize_t clusterip_proc_write(struct file *file, const char __user *input, |
661 | size_t size, loff_t *ofs) | 659 | size_t size, loff_t *ofs) |
662 | { | 660 | { |
661 | struct clusterip_config *c = PDE(file->f_path.dentry->d_inode)->data; | ||
663 | #define PROC_WRITELEN 10 | 662 | #define PROC_WRITELEN 10 |
664 | char buffer[PROC_WRITELEN+1]; | 663 | char buffer[PROC_WRITELEN+1]; |
665 | const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
666 | struct clusterip_config *c = pde->data; | ||
667 | unsigned long nodenum; | 664 | unsigned long nodenum; |
668 | 665 | ||
669 | if (copy_from_user(buffer, input, PROC_WRITELEN)) | 666 | if (copy_from_user(buffer, input, PROC_WRITELEN)) |
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index f7e2fa0974dc..ea5cea2415c1 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c | |||
@@ -50,7 +50,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) | |||
50 | struct tcphdr _tcph, *tcph; | 50 | struct tcphdr _tcph, *tcph; |
51 | __be16 oldval; | 51 | __be16 oldval; |
52 | 52 | ||
53 | /* Not enought header? */ | 53 | /* Not enough header? */ |
54 | tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); | 54 | tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); |
55 | if (!tcph) | 55 | if (!tcph) |
56 | return false; | 56 | return false; |
@@ -85,8 +85,8 @@ ecn_tg(struct sk_buff *skb, const struct xt_target_param *par) | |||
85 | if (!set_ect_ip(skb, einfo)) | 85 | if (!set_ect_ip(skb, einfo)) |
86 | return NF_DROP; | 86 | return NF_DROP; |
87 | 87 | ||
88 | if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) | 88 | if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) && |
89 | && ip_hdr(skb)->protocol == IPPROTO_TCP) | 89 | ip_hdr(skb)->protocol == IPPROTO_TCP) |
90 | if (!set_ect_tcp(skb, einfo)) | 90 | if (!set_ect_tcp(skb, einfo)) |
91 | return NF_DROP; | 91 | return NF_DROP; |
92 | 92 | ||
@@ -108,8 +108,8 @@ static bool ecn_tg_check(const struct xt_tgchk_param *par) | |||
108 | einfo->ip_ect); | 108 | einfo->ip_ect); |
109 | return false; | 109 | return false; |
110 | } | 110 | } |
111 | if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) | 111 | if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) && |
112 | && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { | 112 | (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { |
113 | printk(KERN_WARNING "ECN: cannot use TCP operations on a " | 113 | printk(KERN_WARNING "ECN: cannot use TCP operations on a " |
114 | "non-tcp rule\n"); | 114 | "non-tcp rule\n"); |
115 | return false; | 115 | return false; |
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index acc44c69eb68..ee128efa1c8d 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c | |||
@@ -74,8 +74,8 @@ static void dump_packet(const struct nf_loginfo *info, | |||
74 | if (ntohs(ih->frag_off) & IP_OFFSET) | 74 | if (ntohs(ih->frag_off) & IP_OFFSET) |
75 | printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); | 75 | printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); |
76 | 76 | ||
77 | if ((logflags & IPT_LOG_IPOPT) | 77 | if ((logflags & IPT_LOG_IPOPT) && |
78 | && ih->ihl * 4 > sizeof(struct iphdr)) { | 78 | ih->ihl * 4 > sizeof(struct iphdr)) { |
79 | const unsigned char *op; | 79 | const unsigned char *op; |
80 | unsigned char _opt[4 * 15 - sizeof(struct iphdr)]; | 80 | unsigned char _opt[4 * 15 - sizeof(struct iphdr)]; |
81 | unsigned int i, optsize; | 81 | unsigned int i, optsize; |
@@ -146,8 +146,8 @@ static void dump_packet(const struct nf_loginfo *info, | |||
146 | /* Max length: 11 "URGP=65535 " */ | 146 | /* Max length: 11 "URGP=65535 " */ |
147 | printk("URGP=%u ", ntohs(th->urg_ptr)); | 147 | printk("URGP=%u ", ntohs(th->urg_ptr)); |
148 | 148 | ||
149 | if ((logflags & IPT_LOG_TCPOPT) | 149 | if ((logflags & IPT_LOG_TCPOPT) && |
150 | && th->doff * 4 > sizeof(struct tcphdr)) { | 150 | th->doff * 4 > sizeof(struct tcphdr)) { |
151 | unsigned char _opt[4 * 15 - sizeof(struct tcphdr)]; | 151 | unsigned char _opt[4 * 15 - sizeof(struct tcphdr)]; |
152 | const unsigned char *op; | 152 | const unsigned char *op; |
153 | unsigned int i, optsize; | 153 | unsigned int i, optsize; |
@@ -238,9 +238,9 @@ static void dump_packet(const struct nf_loginfo *info, | |||
238 | printk("TYPE=%u CODE=%u ", ich->type, ich->code); | 238 | printk("TYPE=%u CODE=%u ", ich->type, ich->code); |
239 | 239 | ||
240 | /* Max length: 25 "INCOMPLETE [65535 bytes] " */ | 240 | /* Max length: 25 "INCOMPLETE [65535 bytes] " */ |
241 | if (ich->type <= NR_ICMP_TYPES | 241 | if (ich->type <= NR_ICMP_TYPES && |
242 | && required_len[ich->type] | 242 | required_len[ich->type] && |
243 | && skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) { | 243 | skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) { |
244 | printk("INCOMPLETE [%u bytes] ", | 244 | printk("INCOMPLETE [%u bytes] ", |
245 | skb->len - iphoff - ih->ihl*4); | 245 | skb->len - iphoff - ih->ihl*4); |
246 | break; | 246 | break; |
@@ -276,8 +276,8 @@ static void dump_packet(const struct nf_loginfo *info, | |||
276 | } | 276 | } |
277 | 277 | ||
278 | /* Max length: 10 "MTU=65535 " */ | 278 | /* Max length: 10 "MTU=65535 " */ |
279 | if (ich->type == ICMP_DEST_UNREACH | 279 | if (ich->type == ICMP_DEST_UNREACH && |
280 | && ich->code == ICMP_FRAG_NEEDED) | 280 | ich->code == ICMP_FRAG_NEEDED) |
281 | printk("MTU=%u ", ntohs(ich->un.frag.mtu)); | 281 | printk("MTU=%u ", ntohs(ich->un.frag.mtu)); |
282 | } | 282 | } |
283 | break; | 283 | break; |
@@ -407,8 +407,8 @@ ipt_log_packet(u_int8_t pf, | |||
407 | if (in && !out) { | 407 | if (in && !out) { |
408 | /* MAC logging for input chain only. */ | 408 | /* MAC logging for input chain only. */ |
409 | printk("MAC="); | 409 | printk("MAC="); |
410 | if (skb->dev && skb->dev->hard_header_len | 410 | if (skb->dev && skb->dev->hard_header_len && |
411 | && skb->mac_header != skb->network_header) { | 411 | skb->mac_header != skb->network_header) { |
412 | int i; | 412 | int i; |
413 | const unsigned char *p = skb_mac_header(skb); | 413 | const unsigned char *p = skb_mac_header(skb); |
414 | for (i = 0; i < skb->dev->hard_header_len; i++,p++) | 414 | for (i = 0; i < skb->dev->hard_header_len; i++,p++) |
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index dada0863946d..650b54042b01 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c | |||
@@ -59,8 +59,8 @@ masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par) | |||
59 | ct = nf_ct_get(skb, &ctinfo); | 59 | ct = nf_ct_get(skb, &ctinfo); |
60 | nat = nfct_nat(ct); | 60 | nat = nfct_nat(ct); |
61 | 61 | ||
62 | NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED | 62 | NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || |
63 | || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); | 63 | ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); |
64 | 64 | ||
65 | /* Source address is 0.0.0.0 - locally generated packet that is | 65 | /* Source address is 0.0.0.0 - locally generated packet that is |
66 | * probably not supposed to be masqueraded. | 66 | * probably not supposed to be masqueraded. |
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index c93ae44bff2a..a0e8bcf04159 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c | |||
@@ -12,6 +12,7 @@ | |||
12 | 12 | ||
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/skbuff.h> | 14 | #include <linux/skbuff.h> |
15 | #include <linux/slab.h> | ||
15 | #include <linux/ip.h> | 16 | #include <linux/ip.h> |
16 | #include <linux/udp.h> | 17 | #include <linux/udp.h> |
17 | #include <linux/icmp.h> | 18 | #include <linux/icmp.h> |
@@ -184,8 +185,8 @@ static bool reject_tg_check(const struct xt_tgchk_param *par) | |||
184 | return false; | 185 | return false; |
185 | } else if (rejinfo->with == IPT_TCP_RESET) { | 186 | } else if (rejinfo->with == IPT_TCP_RESET) { |
186 | /* Must specify that it's a TCP packet */ | 187 | /* Must specify that it's a TCP packet */ |
187 | if (e->ip.proto != IPPROTO_TCP | 188 | if (e->ip.proto != IPPROTO_TCP || |
188 | || (e->ip.invflags & XT_INV_PROTO)) { | 189 | (e->ip.invflags & XT_INV_PROTO)) { |
189 | printk("ipt_REJECT: TCP_RESET invalid for non-tcp\n"); | 190 | printk("ipt_REJECT: TCP_RESET invalid for non-tcp\n"); |
190 | return false; | 191 | return false; |
191 | } | 192 | } |
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index d32cc4bb328a..0dbe697f164f 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/module.h> | 33 | #include <linux/module.h> |
34 | #include <linux/spinlock.h> | 34 | #include <linux/spinlock.h> |
35 | #include <linux/socket.h> | 35 | #include <linux/socket.h> |
36 | #include <linux/slab.h> | ||
36 | #include <linux/skbuff.h> | 37 | #include <linux/skbuff.h> |
37 | #include <linux/kernel.h> | 38 | #include <linux/kernel.h> |
38 | #include <linux/timer.h> | 39 | #include <linux/timer.h> |
@@ -226,9 +227,9 @@ static void ipt_ulog_packet(unsigned int hooknum, | |||
226 | else | 227 | else |
227 | *(pm->prefix) = '\0'; | 228 | *(pm->prefix) = '\0'; |
228 | 229 | ||
229 | if (in && in->hard_header_len > 0 | 230 | if (in && in->hard_header_len > 0 && |
230 | && skb->mac_header != skb->network_header | 231 | skb->mac_header != skb->network_header && |
231 | && in->hard_header_len <= ULOG_MAC_LEN) { | 232 | in->hard_header_len <= ULOG_MAC_LEN) { |
232 | memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len); | 233 | memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len); |
233 | pm->mac_len = in->hard_header_len; | 234 | pm->mac_len = in->hard_header_len; |
234 | } else | 235 | } else |
@@ -338,7 +339,7 @@ struct compat_ipt_ulog_info { | |||
338 | char prefix[ULOG_PREFIX_LEN]; | 339 | char prefix[ULOG_PREFIX_LEN]; |
339 | }; | 340 | }; |
340 | 341 | ||
341 | static void ulog_tg_compat_from_user(void *dst, void *src) | 342 | static void ulog_tg_compat_from_user(void *dst, const void *src) |
342 | { | 343 | { |
343 | const struct compat_ipt_ulog_info *cl = src; | 344 | const struct compat_ipt_ulog_info *cl = src; |
344 | struct ipt_ulog_info l = { | 345 | struct ipt_ulog_info l = { |
@@ -351,7 +352,7 @@ static void ulog_tg_compat_from_user(void *dst, void *src) | |||
351 | memcpy(dst, &l, sizeof(l)); | 352 | memcpy(dst, &l, sizeof(l)); |
352 | } | 353 | } |
353 | 354 | ||
354 | static int ulog_tg_compat_to_user(void __user *dst, void *src) | 355 | static int ulog_tg_compat_to_user(void __user *dst, const void *src) |
355 | { | 356 | { |
356 | const struct ipt_ulog_info *l = src; | 357 | const struct ipt_ulog_info *l = src; |
357 | struct compat_ipt_ulog_info cl = { | 358 | struct compat_ipt_ulog_info cl = { |
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index 6289b64144c6..2a1e56b71908 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c | |||
@@ -96,8 +96,8 @@ static bool ecn_mt_check(const struct xt_mtchk_param *par) | |||
96 | if (info->invert & IPT_ECN_OP_MATCH_MASK) | 96 | if (info->invert & IPT_ECN_OP_MATCH_MASK) |
97 | return false; | 97 | return false; |
98 | 98 | ||
99 | if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) | 99 | if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) && |
100 | && ip->proto != IPPROTO_TCP) { | 100 | ip->proto != IPPROTO_TCP) { |
101 | printk(KERN_WARNING "ipt_ecn: can't match TCP bits in rule for" | 101 | printk(KERN_WARNING "ipt_ecn: can't match TCP bits in rule for" |
102 | " non-tcp packets\n"); | 102 | " non-tcp packets\n"); |
103 | return false; | 103 | return false; |
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index df566cbd68e5..55392466daa4 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/moduleparam.h> | 14 | #include <linux/moduleparam.h> |
15 | #include <linux/netfilter_ipv4/ip_tables.h> | 15 | #include <linux/netfilter_ipv4/ip_tables.h> |
16 | #include <linux/slab.h> | ||
16 | #include <net/ip.h> | 17 | #include <net/ip.h> |
17 | 18 | ||
18 | MODULE_LICENSE("GPL"); | 19 | MODULE_LICENSE("GPL"); |
@@ -23,104 +24,32 @@ MODULE_DESCRIPTION("iptables filter table"); | |||
23 | (1 << NF_INET_FORWARD) | \ | 24 | (1 << NF_INET_FORWARD) | \ |
24 | (1 << NF_INET_LOCAL_OUT)) | 25 | (1 << NF_INET_LOCAL_OUT)) |
25 | 26 | ||
26 | static struct | ||
27 | { | ||
28 | struct ipt_replace repl; | ||
29 | struct ipt_standard entries[3]; | ||
30 | struct ipt_error term; | ||
31 | } initial_table __net_initdata = { | ||
32 | .repl = { | ||
33 | .name = "filter", | ||
34 | .valid_hooks = FILTER_VALID_HOOKS, | ||
35 | .num_entries = 4, | ||
36 | .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), | ||
37 | .hook_entry = { | ||
38 | [NF_INET_LOCAL_IN] = 0, | ||
39 | [NF_INET_FORWARD] = sizeof(struct ipt_standard), | ||
40 | [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, | ||
41 | }, | ||
42 | .underflow = { | ||
43 | [NF_INET_LOCAL_IN] = 0, | ||
44 | [NF_INET_FORWARD] = sizeof(struct ipt_standard), | ||
45 | [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, | ||
46 | }, | ||
47 | }, | ||
48 | .entries = { | ||
49 | IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ | ||
50 | IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ | ||
51 | IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ | ||
52 | }, | ||
53 | .term = IPT_ERROR_INIT, /* ERROR */ | ||
54 | }; | ||
55 | |||
56 | static const struct xt_table packet_filter = { | 27 | static const struct xt_table packet_filter = { |
57 | .name = "filter", | 28 | .name = "filter", |
58 | .valid_hooks = FILTER_VALID_HOOKS, | 29 | .valid_hooks = FILTER_VALID_HOOKS, |
59 | .me = THIS_MODULE, | 30 | .me = THIS_MODULE, |
60 | .af = NFPROTO_IPV4, | 31 | .af = NFPROTO_IPV4, |
32 | .priority = NF_IP_PRI_FILTER, | ||
61 | }; | 33 | }; |
62 | 34 | ||
63 | /* The work comes in here from netfilter.c. */ | ||
64 | static unsigned int | ||
65 | ipt_local_in_hook(unsigned int hook, | ||
66 | struct sk_buff *skb, | ||
67 | const struct net_device *in, | ||
68 | const struct net_device *out, | ||
69 | int (*okfn)(struct sk_buff *)) | ||
70 | { | ||
71 | return ipt_do_table(skb, hook, in, out, | ||
72 | dev_net(in)->ipv4.iptable_filter); | ||
73 | } | ||
74 | |||
75 | static unsigned int | 35 | static unsigned int |
76 | ipt_hook(unsigned int hook, | 36 | iptable_filter_hook(unsigned int hook, struct sk_buff *skb, |
77 | struct sk_buff *skb, | 37 | const struct net_device *in, const struct net_device *out, |
78 | const struct net_device *in, | 38 | int (*okfn)(struct sk_buff *)) |
79 | const struct net_device *out, | ||
80 | int (*okfn)(struct sk_buff *)) | ||
81 | { | 39 | { |
82 | return ipt_do_table(skb, hook, in, out, | 40 | const struct net *net; |
83 | dev_net(in)->ipv4.iptable_filter); | ||
84 | } | ||
85 | 41 | ||
86 | static unsigned int | 42 | if (hook == NF_INET_LOCAL_OUT && |
87 | ipt_local_out_hook(unsigned int hook, | 43 | (skb->len < sizeof(struct iphdr) || |
88 | struct sk_buff *skb, | 44 | ip_hdrlen(skb) < sizeof(struct iphdr))) |
89 | const struct net_device *in, | 45 | /* root is playing with raw sockets. */ |
90 | const struct net_device *out, | ||
91 | int (*okfn)(struct sk_buff *)) | ||
92 | { | ||
93 | /* root is playing with raw sockets. */ | ||
94 | if (skb->len < sizeof(struct iphdr) || | ||
95 | ip_hdrlen(skb) < sizeof(struct iphdr)) | ||
96 | return NF_ACCEPT; | 46 | return NF_ACCEPT; |
97 | return ipt_do_table(skb, hook, in, out, | 47 | |
98 | dev_net(out)->ipv4.iptable_filter); | 48 | net = dev_net((in != NULL) ? in : out); |
49 | return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_filter); | ||
99 | } | 50 | } |
100 | 51 | ||
101 | static struct nf_hook_ops ipt_ops[] __read_mostly = { | 52 | static struct nf_hook_ops *filter_ops __read_mostly; |
102 | { | ||
103 | .hook = ipt_local_in_hook, | ||
104 | .owner = THIS_MODULE, | ||
105 | .pf = NFPROTO_IPV4, | ||
106 | .hooknum = NF_INET_LOCAL_IN, | ||
107 | .priority = NF_IP_PRI_FILTER, | ||
108 | }, | ||
109 | { | ||
110 | .hook = ipt_hook, | ||
111 | .owner = THIS_MODULE, | ||
112 | .pf = NFPROTO_IPV4, | ||
113 | .hooknum = NF_INET_FORWARD, | ||
114 | .priority = NF_IP_PRI_FILTER, | ||
115 | }, | ||
116 | { | ||
117 | .hook = ipt_local_out_hook, | ||
118 | .owner = THIS_MODULE, | ||
119 | .pf = NFPROTO_IPV4, | ||
120 | .hooknum = NF_INET_LOCAL_OUT, | ||
121 | .priority = NF_IP_PRI_FILTER, | ||
122 | }, | ||
123 | }; | ||
124 | 53 | ||
125 | /* Default to forward because I got too much mail already. */ | 54 | /* Default to forward because I got too much mail already. */ |
126 | static int forward = NF_ACCEPT; | 55 | static int forward = NF_ACCEPT; |
@@ -128,9 +57,18 @@ module_param(forward, bool, 0000); | |||
128 | 57 | ||
129 | static int __net_init iptable_filter_net_init(struct net *net) | 58 | static int __net_init iptable_filter_net_init(struct net *net) |
130 | { | 59 | { |
131 | /* Register table */ | 60 | struct ipt_replace *repl; |
61 | |||
62 | repl = ipt_alloc_initial_table(&packet_filter); | ||
63 | if (repl == NULL) | ||
64 | return -ENOMEM; | ||
65 | /* Entry 1 is the FORWARD hook */ | ||
66 | ((struct ipt_standard *)repl->entries)[1].target.verdict = | ||
67 | -forward - 1; | ||
68 | |||
132 | net->ipv4.iptable_filter = | 69 | net->ipv4.iptable_filter = |
133 | ipt_register_table(net, &packet_filter, &initial_table.repl); | 70 | ipt_register_table(net, &packet_filter, repl); |
71 | kfree(repl); | ||
134 | if (IS_ERR(net->ipv4.iptable_filter)) | 72 | if (IS_ERR(net->ipv4.iptable_filter)) |
135 | return PTR_ERR(net->ipv4.iptable_filter); | 73 | return PTR_ERR(net->ipv4.iptable_filter); |
136 | return 0; | 74 | return 0; |
@@ -138,7 +76,7 @@ static int __net_init iptable_filter_net_init(struct net *net) | |||
138 | 76 | ||
139 | static void __net_exit iptable_filter_net_exit(struct net *net) | 77 | static void __net_exit iptable_filter_net_exit(struct net *net) |
140 | { | 78 | { |
141 | ipt_unregister_table(net->ipv4.iptable_filter); | 79 | ipt_unregister_table(net, net->ipv4.iptable_filter); |
142 | } | 80 | } |
143 | 81 | ||
144 | static struct pernet_operations iptable_filter_net_ops = { | 82 | static struct pernet_operations iptable_filter_net_ops = { |
@@ -155,17 +93,16 @@ static int __init iptable_filter_init(void) | |||
155 | return -EINVAL; | 93 | return -EINVAL; |
156 | } | 94 | } |
157 | 95 | ||
158 | /* Entry 1 is the FORWARD hook */ | ||
159 | initial_table.entries[1].target.verdict = -forward - 1; | ||
160 | |||
161 | ret = register_pernet_subsys(&iptable_filter_net_ops); | 96 | ret = register_pernet_subsys(&iptable_filter_net_ops); |
162 | if (ret < 0) | 97 | if (ret < 0) |
163 | return ret; | 98 | return ret; |
164 | 99 | ||
165 | /* Register hooks */ | 100 | /* Register hooks */ |
166 | ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); | 101 | filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook); |
167 | if (ret < 0) | 102 | if (IS_ERR(filter_ops)) { |
103 | ret = PTR_ERR(filter_ops); | ||
168 | goto cleanup_table; | 104 | goto cleanup_table; |
105 | } | ||
169 | 106 | ||
170 | return ret; | 107 | return ret; |
171 | 108 | ||
@@ -176,7 +113,7 @@ static int __init iptable_filter_init(void) | |||
176 | 113 | ||
177 | static void __exit iptable_filter_fini(void) | 114 | static void __exit iptable_filter_fini(void) |
178 | { | 115 | { |
179 | nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); | 116 | xt_hook_unlink(&packet_filter, filter_ops); |
180 | unregister_pernet_subsys(&iptable_filter_net_ops); | 117 | unregister_pernet_subsys(&iptable_filter_net_ops); |
181 | } | 118 | } |
182 | 119 | ||
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 036047f9b0f2..294a2a32f293 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/netfilter_ipv4/ip_tables.h> | 12 | #include <linux/netfilter_ipv4/ip_tables.h> |
13 | #include <linux/netdevice.h> | 13 | #include <linux/netdevice.h> |
14 | #include <linux/skbuff.h> | 14 | #include <linux/skbuff.h> |
15 | #include <linux/slab.h> | ||
15 | #include <net/sock.h> | 16 | #include <net/sock.h> |
16 | #include <net/route.h> | 17 | #include <net/route.h> |
17 | #include <linux/ip.h> | 18 | #include <linux/ip.h> |
@@ -27,101 +28,16 @@ MODULE_DESCRIPTION("iptables mangle table"); | |||
27 | (1 << NF_INET_LOCAL_OUT) | \ | 28 | (1 << NF_INET_LOCAL_OUT) | \ |
28 | (1 << NF_INET_POST_ROUTING)) | 29 | (1 << NF_INET_POST_ROUTING)) |
29 | 30 | ||
30 | /* Ouch - five different hooks? Maybe this should be a config option..... -- BC */ | ||
31 | static const struct | ||
32 | { | ||
33 | struct ipt_replace repl; | ||
34 | struct ipt_standard entries[5]; | ||
35 | struct ipt_error term; | ||
36 | } initial_table __net_initdata = { | ||
37 | .repl = { | ||
38 | .name = "mangle", | ||
39 | .valid_hooks = MANGLE_VALID_HOOKS, | ||
40 | .num_entries = 6, | ||
41 | .size = sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error), | ||
42 | .hook_entry = { | ||
43 | [NF_INET_PRE_ROUTING] = 0, | ||
44 | [NF_INET_LOCAL_IN] = sizeof(struct ipt_standard), | ||
45 | [NF_INET_FORWARD] = sizeof(struct ipt_standard) * 2, | ||
46 | [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 3, | ||
47 | [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard) * 4, | ||
48 | }, | ||
49 | .underflow = { | ||
50 | [NF_INET_PRE_ROUTING] = 0, | ||
51 | [NF_INET_LOCAL_IN] = sizeof(struct ipt_standard), | ||
52 | [NF_INET_FORWARD] = sizeof(struct ipt_standard) * 2, | ||
53 | [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 3, | ||
54 | [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard) * 4, | ||
55 | }, | ||
56 | }, | ||
57 | .entries = { | ||
58 | IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ | ||
59 | IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ | ||
60 | IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ | ||
61 | IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ | ||
62 | IPT_STANDARD_INIT(NF_ACCEPT), /* POST_ROUTING */ | ||
63 | }, | ||
64 | .term = IPT_ERROR_INIT, /* ERROR */ | ||
65 | }; | ||
66 | |||
67 | static const struct xt_table packet_mangler = { | 31 | static const struct xt_table packet_mangler = { |
68 | .name = "mangle", | 32 | .name = "mangle", |
69 | .valid_hooks = MANGLE_VALID_HOOKS, | 33 | .valid_hooks = MANGLE_VALID_HOOKS, |
70 | .me = THIS_MODULE, | 34 | .me = THIS_MODULE, |
71 | .af = NFPROTO_IPV4, | 35 | .af = NFPROTO_IPV4, |
36 | .priority = NF_IP_PRI_MANGLE, | ||
72 | }; | 37 | }; |
73 | 38 | ||
74 | /* The work comes in here from netfilter.c. */ | ||
75 | static unsigned int | ||
76 | ipt_pre_routing_hook(unsigned int hook, | ||
77 | struct sk_buff *skb, | ||
78 | const struct net_device *in, | ||
79 | const struct net_device *out, | ||
80 | int (*okfn)(struct sk_buff *)) | ||
81 | { | ||
82 | return ipt_do_table(skb, hook, in, out, | ||
83 | dev_net(in)->ipv4.iptable_mangle); | ||
84 | } | ||
85 | |||
86 | static unsigned int | ||
87 | ipt_post_routing_hook(unsigned int hook, | ||
88 | struct sk_buff *skb, | ||
89 | const struct net_device *in, | ||
90 | const struct net_device *out, | ||
91 | int (*okfn)(struct sk_buff *)) | ||
92 | { | ||
93 | return ipt_do_table(skb, hook, in, out, | ||
94 | dev_net(out)->ipv4.iptable_mangle); | ||
95 | } | ||
96 | |||
97 | static unsigned int | ||
98 | ipt_local_in_hook(unsigned int hook, | ||
99 | struct sk_buff *skb, | ||
100 | const struct net_device *in, | ||
101 | const struct net_device *out, | ||
102 | int (*okfn)(struct sk_buff *)) | ||
103 | { | ||
104 | return ipt_do_table(skb, hook, in, out, | ||
105 | dev_net(in)->ipv4.iptable_mangle); | ||
106 | } | ||
107 | |||
108 | static unsigned int | ||
109 | ipt_forward_hook(unsigned int hook, | ||
110 | struct sk_buff *skb, | ||
111 | const struct net_device *in, | ||
112 | const struct net_device *out, | ||
113 | int (*okfn)(struct sk_buff *)) | ||
114 | { | ||
115 | return ipt_do_table(skb, hook, in, out, | ||
116 | dev_net(in)->ipv4.iptable_mangle); | ||
117 | } | ||
118 | |||
119 | static unsigned int | 39 | static unsigned int |
120 | ipt_local_hook(unsigned int hook, | 40 | ipt_mangle_out(struct sk_buff *skb, const struct net_device *out) |
121 | struct sk_buff *skb, | ||
122 | const struct net_device *in, | ||
123 | const struct net_device *out, | ||
124 | int (*okfn)(struct sk_buff *)) | ||
125 | { | 41 | { |
126 | unsigned int ret; | 42 | unsigned int ret; |
127 | const struct iphdr *iph; | 43 | const struct iphdr *iph; |
@@ -130,8 +46,8 @@ ipt_local_hook(unsigned int hook, | |||
130 | u_int32_t mark; | 46 | u_int32_t mark; |
131 | 47 | ||
132 | /* root is playing with raw sockets. */ | 48 | /* root is playing with raw sockets. */ |
133 | if (skb->len < sizeof(struct iphdr) | 49 | if (skb->len < sizeof(struct iphdr) || |
134 | || ip_hdrlen(skb) < sizeof(struct iphdr)) | 50 | ip_hdrlen(skb) < sizeof(struct iphdr)) |
135 | return NF_ACCEPT; | 51 | return NF_ACCEPT; |
136 | 52 | ||
137 | /* Save things which could affect route */ | 53 | /* Save things which could affect route */ |
@@ -141,7 +57,7 @@ ipt_local_hook(unsigned int hook, | |||
141 | daddr = iph->daddr; | 57 | daddr = iph->daddr; |
142 | tos = iph->tos; | 58 | tos = iph->tos; |
143 | 59 | ||
144 | ret = ipt_do_table(skb, hook, in, out, | 60 | ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out, |
145 | dev_net(out)->ipv4.iptable_mangle); | 61 | dev_net(out)->ipv4.iptable_mangle); |
146 | /* Reroute for ANY change. */ | 62 | /* Reroute for ANY change. */ |
147 | if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { | 63 | if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { |
@@ -158,49 +74,36 @@ ipt_local_hook(unsigned int hook, | |||
158 | return ret; | 74 | return ret; |
159 | } | 75 | } |
160 | 76 | ||
161 | static struct nf_hook_ops ipt_ops[] __read_mostly = { | 77 | /* The work comes in here from netfilter.c. */ |
162 | { | 78 | static unsigned int |
163 | .hook = ipt_pre_routing_hook, | 79 | iptable_mangle_hook(unsigned int hook, |
164 | .owner = THIS_MODULE, | 80 | struct sk_buff *skb, |
165 | .pf = NFPROTO_IPV4, | 81 | const struct net_device *in, |
166 | .hooknum = NF_INET_PRE_ROUTING, | 82 | const struct net_device *out, |
167 | .priority = NF_IP_PRI_MANGLE, | 83 | int (*okfn)(struct sk_buff *)) |
168 | }, | 84 | { |
169 | { | 85 | if (hook == NF_INET_LOCAL_OUT) |
170 | .hook = ipt_local_in_hook, | 86 | return ipt_mangle_out(skb, out); |
171 | .owner = THIS_MODULE, | 87 | if (hook == NF_INET_POST_ROUTING) |
172 | .pf = NFPROTO_IPV4, | 88 | return ipt_do_table(skb, hook, in, out, |
173 | .hooknum = NF_INET_LOCAL_IN, | 89 | dev_net(out)->ipv4.iptable_mangle); |
174 | .priority = NF_IP_PRI_MANGLE, | 90 | /* PREROUTING/INPUT/FORWARD: */ |
175 | }, | 91 | return ipt_do_table(skb, hook, in, out, |
176 | { | 92 | dev_net(in)->ipv4.iptable_mangle); |
177 | .hook = ipt_forward_hook, | 93 | } |
178 | .owner = THIS_MODULE, | 94 | |
179 | .pf = NFPROTO_IPV4, | 95 | static struct nf_hook_ops *mangle_ops __read_mostly; |
180 | .hooknum = NF_INET_FORWARD, | ||
181 | .priority = NF_IP_PRI_MANGLE, | ||
182 | }, | ||
183 | { | ||
184 | .hook = ipt_local_hook, | ||
185 | .owner = THIS_MODULE, | ||
186 | .pf = NFPROTO_IPV4, | ||
187 | .hooknum = NF_INET_LOCAL_OUT, | ||
188 | .priority = NF_IP_PRI_MANGLE, | ||
189 | }, | ||
190 | { | ||
191 | .hook = ipt_post_routing_hook, | ||
192 | .owner = THIS_MODULE, | ||
193 | .pf = NFPROTO_IPV4, | ||
194 | .hooknum = NF_INET_POST_ROUTING, | ||
195 | .priority = NF_IP_PRI_MANGLE, | ||
196 | }, | ||
197 | }; | ||
198 | 96 | ||
199 | static int __net_init iptable_mangle_net_init(struct net *net) | 97 | static int __net_init iptable_mangle_net_init(struct net *net) |
200 | { | 98 | { |
201 | /* Register table */ | 99 | struct ipt_replace *repl; |
100 | |||
101 | repl = ipt_alloc_initial_table(&packet_mangler); | ||
102 | if (repl == NULL) | ||
103 | return -ENOMEM; | ||
202 | net->ipv4.iptable_mangle = | 104 | net->ipv4.iptable_mangle = |
203 | ipt_register_table(net, &packet_mangler, &initial_table.repl); | 105 | ipt_register_table(net, &packet_mangler, repl); |
106 | kfree(repl); | ||
204 | if (IS_ERR(net->ipv4.iptable_mangle)) | 107 | if (IS_ERR(net->ipv4.iptable_mangle)) |
205 | return PTR_ERR(net->ipv4.iptable_mangle); | 108 | return PTR_ERR(net->ipv4.iptable_mangle); |
206 | return 0; | 109 | return 0; |
@@ -208,7 +111,7 @@ static int __net_init iptable_mangle_net_init(struct net *net) | |||
208 | 111 | ||
209 | static void __net_exit iptable_mangle_net_exit(struct net *net) | 112 | static void __net_exit iptable_mangle_net_exit(struct net *net) |
210 | { | 113 | { |
211 | ipt_unregister_table(net->ipv4.iptable_mangle); | 114 | ipt_unregister_table(net, net->ipv4.iptable_mangle); |
212 | } | 115 | } |
213 | 116 | ||
214 | static struct pernet_operations iptable_mangle_net_ops = { | 117 | static struct pernet_operations iptable_mangle_net_ops = { |
@@ -225,9 +128,11 @@ static int __init iptable_mangle_init(void) | |||
225 | return ret; | 128 | return ret; |
226 | 129 | ||
227 | /* Register hooks */ | 130 | /* Register hooks */ |
228 | ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); | 131 | mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook); |
229 | if (ret < 0) | 132 | if (IS_ERR(mangle_ops)) { |
133 | ret = PTR_ERR(mangle_ops); | ||
230 | goto cleanup_table; | 134 | goto cleanup_table; |
135 | } | ||
231 | 136 | ||
232 | return ret; | 137 | return ret; |
233 | 138 | ||
@@ -238,7 +143,7 @@ static int __init iptable_mangle_init(void) | |||
238 | 143 | ||
239 | static void __exit iptable_mangle_fini(void) | 144 | static void __exit iptable_mangle_fini(void) |
240 | { | 145 | { |
241 | nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); | 146 | xt_hook_unlink(&packet_mangler, mangle_ops); |
242 | unregister_pernet_subsys(&iptable_mangle_net_ops); | 147 | unregister_pernet_subsys(&iptable_mangle_net_ops); |
243 | } | 148 | } |
244 | 149 | ||
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 993edc23be09..07fb710cd722 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c | |||
@@ -5,94 +5,49 @@ | |||
5 | */ | 5 | */ |
6 | #include <linux/module.h> | 6 | #include <linux/module.h> |
7 | #include <linux/netfilter_ipv4/ip_tables.h> | 7 | #include <linux/netfilter_ipv4/ip_tables.h> |
8 | #include <linux/slab.h> | ||
8 | #include <net/ip.h> | 9 | #include <net/ip.h> |
9 | 10 | ||
10 | #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) | 11 | #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) |
11 | 12 | ||
12 | static const struct | ||
13 | { | ||
14 | struct ipt_replace repl; | ||
15 | struct ipt_standard entries[2]; | ||
16 | struct ipt_error term; | ||
17 | } initial_table __net_initdata = { | ||
18 | .repl = { | ||
19 | .name = "raw", | ||
20 | .valid_hooks = RAW_VALID_HOOKS, | ||
21 | .num_entries = 3, | ||
22 | .size = sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error), | ||
23 | .hook_entry = { | ||
24 | [NF_INET_PRE_ROUTING] = 0, | ||
25 | [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) | ||
26 | }, | ||
27 | .underflow = { | ||
28 | [NF_INET_PRE_ROUTING] = 0, | ||
29 | [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) | ||
30 | }, | ||
31 | }, | ||
32 | .entries = { | ||
33 | IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ | ||
34 | IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ | ||
35 | }, | ||
36 | .term = IPT_ERROR_INIT, /* ERROR */ | ||
37 | }; | ||
38 | |||
39 | static const struct xt_table packet_raw = { | 13 | static const struct xt_table packet_raw = { |
40 | .name = "raw", | 14 | .name = "raw", |
41 | .valid_hooks = RAW_VALID_HOOKS, | 15 | .valid_hooks = RAW_VALID_HOOKS, |
42 | .me = THIS_MODULE, | 16 | .me = THIS_MODULE, |
43 | .af = NFPROTO_IPV4, | 17 | .af = NFPROTO_IPV4, |
18 | .priority = NF_IP_PRI_RAW, | ||
44 | }; | 19 | }; |
45 | 20 | ||
46 | /* The work comes in here from netfilter.c. */ | 21 | /* The work comes in here from netfilter.c. */ |
47 | static unsigned int | 22 | static unsigned int |
48 | ipt_hook(unsigned int hook, | 23 | iptable_raw_hook(unsigned int hook, struct sk_buff *skb, |
49 | struct sk_buff *skb, | 24 | const struct net_device *in, const struct net_device *out, |
50 | const struct net_device *in, | 25 | int (*okfn)(struct sk_buff *)) |
51 | const struct net_device *out, | ||
52 | int (*okfn)(struct sk_buff *)) | ||
53 | { | 26 | { |
54 | return ipt_do_table(skb, hook, in, out, | 27 | const struct net *net; |
55 | dev_net(in)->ipv4.iptable_raw); | ||
56 | } | ||
57 | 28 | ||
58 | static unsigned int | 29 | if (hook == NF_INET_LOCAL_OUT && |
59 | ipt_local_hook(unsigned int hook, | 30 | (skb->len < sizeof(struct iphdr) || |
60 | struct sk_buff *skb, | 31 | ip_hdrlen(skb) < sizeof(struct iphdr))) |
61 | const struct net_device *in, | 32 | /* root is playing with raw sockets. */ |
62 | const struct net_device *out, | ||
63 | int (*okfn)(struct sk_buff *)) | ||
64 | { | ||
65 | /* root is playing with raw sockets. */ | ||
66 | if (skb->len < sizeof(struct iphdr) || | ||
67 | ip_hdrlen(skb) < sizeof(struct iphdr)) | ||
68 | return NF_ACCEPT; | 33 | return NF_ACCEPT; |
69 | return ipt_do_table(skb, hook, in, out, | 34 | |
70 | dev_net(out)->ipv4.iptable_raw); | 35 | net = dev_net((in != NULL) ? in : out); |
36 | return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_raw); | ||
71 | } | 37 | } |
72 | 38 | ||
73 | /* 'raw' is the very first table. */ | 39 | static struct nf_hook_ops *rawtable_ops __read_mostly; |
74 | static struct nf_hook_ops ipt_ops[] __read_mostly = { | ||
75 | { | ||
76 | .hook = ipt_hook, | ||
77 | .pf = NFPROTO_IPV4, | ||
78 | .hooknum = NF_INET_PRE_ROUTING, | ||
79 | .priority = NF_IP_PRI_RAW, | ||
80 | .owner = THIS_MODULE, | ||
81 | }, | ||
82 | { | ||
83 | .hook = ipt_local_hook, | ||
84 | .pf = NFPROTO_IPV4, | ||
85 | .hooknum = NF_INET_LOCAL_OUT, | ||
86 | .priority = NF_IP_PRI_RAW, | ||
87 | .owner = THIS_MODULE, | ||
88 | }, | ||
89 | }; | ||
90 | 40 | ||
91 | static int __net_init iptable_raw_net_init(struct net *net) | 41 | static int __net_init iptable_raw_net_init(struct net *net) |
92 | { | 42 | { |
93 | /* Register table */ | 43 | struct ipt_replace *repl; |
44 | |||
45 | repl = ipt_alloc_initial_table(&packet_raw); | ||
46 | if (repl == NULL) | ||
47 | return -ENOMEM; | ||
94 | net->ipv4.iptable_raw = | 48 | net->ipv4.iptable_raw = |
95 | ipt_register_table(net, &packet_raw, &initial_table.repl); | 49 | ipt_register_table(net, &packet_raw, repl); |
50 | kfree(repl); | ||
96 | if (IS_ERR(net->ipv4.iptable_raw)) | 51 | if (IS_ERR(net->ipv4.iptable_raw)) |
97 | return PTR_ERR(net->ipv4.iptable_raw); | 52 | return PTR_ERR(net->ipv4.iptable_raw); |
98 | return 0; | 53 | return 0; |
@@ -100,7 +55,7 @@ static int __net_init iptable_raw_net_init(struct net *net) | |||
100 | 55 | ||
101 | static void __net_exit iptable_raw_net_exit(struct net *net) | 56 | static void __net_exit iptable_raw_net_exit(struct net *net) |
102 | { | 57 | { |
103 | ipt_unregister_table(net->ipv4.iptable_raw); | 58 | ipt_unregister_table(net, net->ipv4.iptable_raw); |
104 | } | 59 | } |
105 | 60 | ||
106 | static struct pernet_operations iptable_raw_net_ops = { | 61 | static struct pernet_operations iptable_raw_net_ops = { |
@@ -117,9 +72,11 @@ static int __init iptable_raw_init(void) | |||
117 | return ret; | 72 | return ret; |
118 | 73 | ||
119 | /* Register hooks */ | 74 | /* Register hooks */ |
120 | ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); | 75 | rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook); |
121 | if (ret < 0) | 76 | if (IS_ERR(rawtable_ops)) { |
77 | ret = PTR_ERR(rawtable_ops); | ||
122 | goto cleanup_table; | 78 | goto cleanup_table; |
79 | } | ||
123 | 80 | ||
124 | return ret; | 81 | return ret; |
125 | 82 | ||
@@ -130,7 +87,7 @@ static int __init iptable_raw_init(void) | |||
130 | 87 | ||
131 | static void __exit iptable_raw_fini(void) | 88 | static void __exit iptable_raw_fini(void) |
132 | { | 89 | { |
133 | nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); | 90 | xt_hook_unlink(&packet_raw, rawtable_ops); |
134 | unregister_pernet_subsys(&iptable_raw_net_ops); | 91 | unregister_pernet_subsys(&iptable_raw_net_ops); |
135 | } | 92 | } |
136 | 93 | ||
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 99eb76c65d25..be45bdc4c602 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c | |||
@@ -17,6 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <linux/netfilter_ipv4/ip_tables.h> | 19 | #include <linux/netfilter_ipv4/ip_tables.h> |
20 | #include <linux/slab.h> | ||
20 | #include <net/ip.h> | 21 | #include <net/ip.h> |
21 | 22 | ||
22 | MODULE_LICENSE("GPL"); | 23 | MODULE_LICENSE("GPL"); |
@@ -27,109 +28,44 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules"); | |||
27 | (1 << NF_INET_FORWARD) | \ | 28 | (1 << NF_INET_FORWARD) | \ |
28 | (1 << NF_INET_LOCAL_OUT) | 29 | (1 << NF_INET_LOCAL_OUT) |
29 | 30 | ||
30 | static const struct | ||
31 | { | ||
32 | struct ipt_replace repl; | ||
33 | struct ipt_standard entries[3]; | ||
34 | struct ipt_error term; | ||
35 | } initial_table __net_initdata = { | ||
36 | .repl = { | ||
37 | .name = "security", | ||
38 | .valid_hooks = SECURITY_VALID_HOOKS, | ||
39 | .num_entries = 4, | ||
40 | .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), | ||
41 | .hook_entry = { | ||
42 | [NF_INET_LOCAL_IN] = 0, | ||
43 | [NF_INET_FORWARD] = sizeof(struct ipt_standard), | ||
44 | [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, | ||
45 | }, | ||
46 | .underflow = { | ||
47 | [NF_INET_LOCAL_IN] = 0, | ||
48 | [NF_INET_FORWARD] = sizeof(struct ipt_standard), | ||
49 | [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, | ||
50 | }, | ||
51 | }, | ||
52 | .entries = { | ||
53 | IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ | ||
54 | IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ | ||
55 | IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ | ||
56 | }, | ||
57 | .term = IPT_ERROR_INIT, /* ERROR */ | ||
58 | }; | ||
59 | |||
60 | static const struct xt_table security_table = { | 31 | static const struct xt_table security_table = { |
61 | .name = "security", | 32 | .name = "security", |
62 | .valid_hooks = SECURITY_VALID_HOOKS, | 33 | .valid_hooks = SECURITY_VALID_HOOKS, |
63 | .me = THIS_MODULE, | 34 | .me = THIS_MODULE, |
64 | .af = NFPROTO_IPV4, | 35 | .af = NFPROTO_IPV4, |
36 | .priority = NF_IP_PRI_SECURITY, | ||
65 | }; | 37 | }; |
66 | 38 | ||
67 | static unsigned int | 39 | static unsigned int |
68 | ipt_local_in_hook(unsigned int hook, | 40 | iptable_security_hook(unsigned int hook, struct sk_buff *skb, |
69 | struct sk_buff *skb, | 41 | const struct net_device *in, |
70 | const struct net_device *in, | 42 | const struct net_device *out, |
71 | const struct net_device *out, | 43 | int (*okfn)(struct sk_buff *)) |
72 | int (*okfn)(struct sk_buff *)) | ||
73 | { | ||
74 | return ipt_do_table(skb, hook, in, out, | ||
75 | dev_net(in)->ipv4.iptable_security); | ||
76 | } | ||
77 | |||
78 | static unsigned int | ||
79 | ipt_forward_hook(unsigned int hook, | ||
80 | struct sk_buff *skb, | ||
81 | const struct net_device *in, | ||
82 | const struct net_device *out, | ||
83 | int (*okfn)(struct sk_buff *)) | ||
84 | { | 44 | { |
85 | return ipt_do_table(skb, hook, in, out, | 45 | const struct net *net; |
86 | dev_net(in)->ipv4.iptable_security); | ||
87 | } | ||
88 | 46 | ||
89 | static unsigned int | 47 | if (hook == NF_INET_LOCAL_OUT && |
90 | ipt_local_out_hook(unsigned int hook, | 48 | (skb->len < sizeof(struct iphdr) || |
91 | struct sk_buff *skb, | 49 | ip_hdrlen(skb) < sizeof(struct iphdr))) |
92 | const struct net_device *in, | 50 | /* Somebody is playing with raw sockets. */ |
93 | const struct net_device *out, | ||
94 | int (*okfn)(struct sk_buff *)) | ||
95 | { | ||
96 | /* Somebody is playing with raw sockets. */ | ||
97 | if (skb->len < sizeof(struct iphdr) | ||
98 | || ip_hdrlen(skb) < sizeof(struct iphdr)) | ||
99 | return NF_ACCEPT; | 51 | return NF_ACCEPT; |
100 | return ipt_do_table(skb, hook, in, out, | 52 | |
101 | dev_net(out)->ipv4.iptable_security); | 53 | net = dev_net((in != NULL) ? in : out); |
54 | return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_security); | ||
102 | } | 55 | } |
103 | 56 | ||
104 | static struct nf_hook_ops ipt_ops[] __read_mostly = { | 57 | static struct nf_hook_ops *sectbl_ops __read_mostly; |
105 | { | ||
106 | .hook = ipt_local_in_hook, | ||
107 | .owner = THIS_MODULE, | ||
108 | .pf = NFPROTO_IPV4, | ||
109 | .hooknum = NF_INET_LOCAL_IN, | ||
110 | .priority = NF_IP_PRI_SECURITY, | ||
111 | }, | ||
112 | { | ||
113 | .hook = ipt_forward_hook, | ||
114 | .owner = THIS_MODULE, | ||
115 | .pf = NFPROTO_IPV4, | ||
116 | .hooknum = NF_INET_FORWARD, | ||
117 | .priority = NF_IP_PRI_SECURITY, | ||
118 | }, | ||
119 | { | ||
120 | .hook = ipt_local_out_hook, | ||
121 | .owner = THIS_MODULE, | ||
122 | .pf = NFPROTO_IPV4, | ||
123 | .hooknum = NF_INET_LOCAL_OUT, | ||
124 | .priority = NF_IP_PRI_SECURITY, | ||
125 | }, | ||
126 | }; | ||
127 | 58 | ||
128 | static int __net_init iptable_security_net_init(struct net *net) | 59 | static int __net_init iptable_security_net_init(struct net *net) |
129 | { | 60 | { |
130 | net->ipv4.iptable_security = | 61 | struct ipt_replace *repl; |
131 | ipt_register_table(net, &security_table, &initial_table.repl); | ||
132 | 62 | ||
63 | repl = ipt_alloc_initial_table(&security_table); | ||
64 | if (repl == NULL) | ||
65 | return -ENOMEM; | ||
66 | net->ipv4.iptable_security = | ||
67 | ipt_register_table(net, &security_table, repl); | ||
68 | kfree(repl); | ||
133 | if (IS_ERR(net->ipv4.iptable_security)) | 69 | if (IS_ERR(net->ipv4.iptable_security)) |
134 | return PTR_ERR(net->ipv4.iptable_security); | 70 | return PTR_ERR(net->ipv4.iptable_security); |
135 | 71 | ||
@@ -138,7 +74,7 @@ static int __net_init iptable_security_net_init(struct net *net) | |||
138 | 74 | ||
139 | static void __net_exit iptable_security_net_exit(struct net *net) | 75 | static void __net_exit iptable_security_net_exit(struct net *net) |
140 | { | 76 | { |
141 | ipt_unregister_table(net->ipv4.iptable_security); | 77 | ipt_unregister_table(net, net->ipv4.iptable_security); |
142 | } | 78 | } |
143 | 79 | ||
144 | static struct pernet_operations iptable_security_net_ops = { | 80 | static struct pernet_operations iptable_security_net_ops = { |
@@ -154,9 +90,11 @@ static int __init iptable_security_init(void) | |||
154 | if (ret < 0) | 90 | if (ret < 0) |
155 | return ret; | 91 | return ret; |
156 | 92 | ||
157 | ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); | 93 | sectbl_ops = xt_hook_link(&security_table, iptable_security_hook); |
158 | if (ret < 0) | 94 | if (IS_ERR(sectbl_ops)) { |
95 | ret = PTR_ERR(sectbl_ops); | ||
159 | goto cleanup_table; | 96 | goto cleanup_table; |
97 | } | ||
160 | 98 | ||
161 | return ret; | 99 | return ret; |
162 | 100 | ||
@@ -167,7 +105,7 @@ cleanup_table: | |||
167 | 105 | ||
168 | static void __exit iptable_security_fini(void) | 106 | static void __exit iptable_security_fini(void) |
169 | { | 107 | { |
170 | nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); | 108 | xt_hook_unlink(&security_table, sectbl_ops); |
171 | unregister_pernet_subsys(&iptable_security_net_ops); | 109 | unregister_pernet_subsys(&iptable_security_net_ops); |
172 | } | 110 | } |
173 | 111 | ||
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index aa95bb82ee6c..2bb1f87051c4 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <net/netfilter/nf_conntrack_helper.h> | 22 | #include <net/netfilter/nf_conntrack_helper.h> |
23 | #include <net/netfilter/nf_conntrack_l4proto.h> | 23 | #include <net/netfilter/nf_conntrack_l4proto.h> |
24 | #include <net/netfilter/nf_conntrack_l3proto.h> | 24 | #include <net/netfilter/nf_conntrack_l3proto.h> |
25 | #include <net/netfilter/nf_conntrack_zones.h> | ||
25 | #include <net/netfilter/nf_conntrack_core.h> | 26 | #include <net/netfilter/nf_conntrack_core.h> |
26 | #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> | 27 | #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> |
27 | #include <net/netfilter/nf_nat_helper.h> | 28 | #include <net/netfilter/nf_nat_helper.h> |
@@ -195,7 +196,6 @@ static int log_invalid_proto_max = 255; | |||
195 | 196 | ||
196 | static ctl_table ip_ct_sysctl_table[] = { | 197 | static ctl_table ip_ct_sysctl_table[] = { |
197 | { | 198 | { |
198 | .ctl_name = NET_IPV4_NF_CONNTRACK_MAX, | ||
199 | .procname = "ip_conntrack_max", | 199 | .procname = "ip_conntrack_max", |
200 | .data = &nf_conntrack_max, | 200 | .data = &nf_conntrack_max, |
201 | .maxlen = sizeof(int), | 201 | .maxlen = sizeof(int), |
@@ -203,7 +203,6 @@ static ctl_table ip_ct_sysctl_table[] = { | |||
203 | .proc_handler = proc_dointvec, | 203 | .proc_handler = proc_dointvec, |
204 | }, | 204 | }, |
205 | { | 205 | { |
206 | .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT, | ||
207 | .procname = "ip_conntrack_count", | 206 | .procname = "ip_conntrack_count", |
208 | .data = &init_net.ct.count, | 207 | .data = &init_net.ct.count, |
209 | .maxlen = sizeof(int), | 208 | .maxlen = sizeof(int), |
@@ -211,15 +210,13 @@ static ctl_table ip_ct_sysctl_table[] = { | |||
211 | .proc_handler = proc_dointvec, | 210 | .proc_handler = proc_dointvec, |
212 | }, | 211 | }, |
213 | { | 212 | { |
214 | .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS, | ||
215 | .procname = "ip_conntrack_buckets", | 213 | .procname = "ip_conntrack_buckets", |
216 | .data = &nf_conntrack_htable_size, | 214 | .data = &init_net.ct.htable_size, |
217 | .maxlen = sizeof(unsigned int), | 215 | .maxlen = sizeof(unsigned int), |
218 | .mode = 0444, | 216 | .mode = 0444, |
219 | .proc_handler = proc_dointvec, | 217 | .proc_handler = proc_dointvec, |
220 | }, | 218 | }, |
221 | { | 219 | { |
222 | .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM, | ||
223 | .procname = "ip_conntrack_checksum", | 220 | .procname = "ip_conntrack_checksum", |
224 | .data = &init_net.ct.sysctl_checksum, | 221 | .data = &init_net.ct.sysctl_checksum, |
225 | .maxlen = sizeof(int), | 222 | .maxlen = sizeof(int), |
@@ -227,19 +224,15 @@ static ctl_table ip_ct_sysctl_table[] = { | |||
227 | .proc_handler = proc_dointvec, | 224 | .proc_handler = proc_dointvec, |
228 | }, | 225 | }, |
229 | { | 226 | { |
230 | .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID, | ||
231 | .procname = "ip_conntrack_log_invalid", | 227 | .procname = "ip_conntrack_log_invalid", |
232 | .data = &init_net.ct.sysctl_log_invalid, | 228 | .data = &init_net.ct.sysctl_log_invalid, |
233 | .maxlen = sizeof(unsigned int), | 229 | .maxlen = sizeof(unsigned int), |
234 | .mode = 0644, | 230 | .mode = 0644, |
235 | .proc_handler = proc_dointvec_minmax, | 231 | .proc_handler = proc_dointvec_minmax, |
236 | .strategy = sysctl_intvec, | ||
237 | .extra1 = &log_invalid_proto_min, | 232 | .extra1 = &log_invalid_proto_min, |
238 | .extra2 = &log_invalid_proto_max, | 233 | .extra2 = &log_invalid_proto_max, |
239 | }, | 234 | }, |
240 | { | 235 | { } |
241 | .ctl_name = 0 | ||
242 | } | ||
243 | }; | 236 | }; |
244 | #endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */ | 237 | #endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */ |
245 | 238 | ||
@@ -255,10 +248,10 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) | |||
255 | struct nf_conntrack_tuple tuple; | 248 | struct nf_conntrack_tuple tuple; |
256 | 249 | ||
257 | memset(&tuple, 0, sizeof(tuple)); | 250 | memset(&tuple, 0, sizeof(tuple)); |
258 | tuple.src.u3.ip = inet->rcv_saddr; | 251 | tuple.src.u3.ip = inet->inet_rcv_saddr; |
259 | tuple.src.u.tcp.port = inet->sport; | 252 | tuple.src.u.tcp.port = inet->inet_sport; |
260 | tuple.dst.u3.ip = inet->daddr; | 253 | tuple.dst.u3.ip = inet->inet_daddr; |
261 | tuple.dst.u.tcp.port = inet->dport; | 254 | tuple.dst.u.tcp.port = inet->inet_dport; |
262 | tuple.src.l3num = PF_INET; | 255 | tuple.src.l3num = PF_INET; |
263 | tuple.dst.protonum = sk->sk_protocol; | 256 | tuple.dst.protonum = sk->sk_protocol; |
264 | 257 | ||
@@ -274,7 +267,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) | |||
274 | return -EINVAL; | 267 | return -EINVAL; |
275 | } | 268 | } |
276 | 269 | ||
277 | h = nf_conntrack_find_get(sock_net(sk), &tuple); | 270 | h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple); |
278 | if (h) { | 271 | if (h) { |
279 | struct sockaddr_in sin; | 272 | struct sockaddr_in sin; |
280 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); | 273 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 8668a3defda6..2fb7b76da94f 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | |||
@@ -32,7 +32,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) | |||
32 | struct hlist_nulls_node *n; | 32 | struct hlist_nulls_node *n; |
33 | 33 | ||
34 | for (st->bucket = 0; | 34 | for (st->bucket = 0; |
35 | st->bucket < nf_conntrack_htable_size; | 35 | st->bucket < net->ct.htable_size; |
36 | st->bucket++) { | 36 | st->bucket++) { |
37 | n = rcu_dereference(net->ct.hash[st->bucket].first); | 37 | n = rcu_dereference(net->ct.hash[st->bucket].first); |
38 | if (!is_a_nulls(n)) | 38 | if (!is_a_nulls(n)) |
@@ -50,7 +50,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, | |||
50 | head = rcu_dereference(head->next); | 50 | head = rcu_dereference(head->next); |
51 | while (is_a_nulls(head)) { | 51 | while (is_a_nulls(head)) { |
52 | if (likely(get_nulls_value(head) == st->bucket)) { | 52 | if (likely(get_nulls_value(head) == st->bucket)) { |
53 | if (++st->bucket >= nf_conntrack_htable_size) | 53 | if (++st->bucket >= net->ct.htable_size) |
54 | return NULL; | 54 | return NULL; |
55 | } | 55 | } |
56 | head = rcu_dereference(net->ct.hash[st->bucket].first); | 56 | head = rcu_dereference(net->ct.hash[st->bucket].first); |
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index d71ba7677344..7404bde95994 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <net/netfilter/nf_conntrack_tuple.h> | 18 | #include <net/netfilter/nf_conntrack_tuple.h> |
19 | #include <net/netfilter/nf_conntrack_l4proto.h> | 19 | #include <net/netfilter/nf_conntrack_l4proto.h> |
20 | #include <net/netfilter/nf_conntrack_core.h> | 20 | #include <net/netfilter/nf_conntrack_core.h> |
21 | #include <net/netfilter/nf_conntrack_zones.h> | ||
21 | #include <net/netfilter/nf_log.h> | 22 | #include <net/netfilter/nf_log.h> |
22 | 23 | ||
23 | static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; | 24 | static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; |
@@ -54,8 +55,8 @@ static const u_int8_t invmap[] = { | |||
54 | static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple, | 55 | static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple, |
55 | const struct nf_conntrack_tuple *orig) | 56 | const struct nf_conntrack_tuple *orig) |
56 | { | 57 | { |
57 | if (orig->dst.u.icmp.type >= sizeof(invmap) | 58 | if (orig->dst.u.icmp.type >= sizeof(invmap) || |
58 | || !invmap[orig->dst.u.icmp.type]) | 59 | !invmap[orig->dst.u.icmp.type]) |
59 | return false; | 60 | return false; |
60 | 61 | ||
61 | tuple->src.u.icmp.id = orig->src.u.icmp.id; | 62 | tuple->src.u.icmp.id = orig->src.u.icmp.id; |
@@ -101,8 +102,8 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, | |||
101 | [ICMP_ADDRESS] = 1 | 102 | [ICMP_ADDRESS] = 1 |
102 | }; | 103 | }; |
103 | 104 | ||
104 | if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) | 105 | if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) || |
105 | || !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) { | 106 | !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) { |
106 | /* Can't create a new ICMP `conn' with this. */ | 107 | /* Can't create a new ICMP `conn' with this. */ |
107 | pr_debug("icmp: can't create new conn with type %u\n", | 108 | pr_debug("icmp: can't create new conn with type %u\n", |
108 | ct->tuplehash[0].tuple.dst.u.icmp.type); | 109 | ct->tuplehash[0].tuple.dst.u.icmp.type); |
@@ -114,13 +115,14 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, | |||
114 | 115 | ||
115 | /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ | 116 | /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ |
116 | static int | 117 | static int |
117 | icmp_error_message(struct net *net, struct sk_buff *skb, | 118 | icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, |
118 | enum ip_conntrack_info *ctinfo, | 119 | enum ip_conntrack_info *ctinfo, |
119 | unsigned int hooknum) | 120 | unsigned int hooknum) |
120 | { | 121 | { |
121 | struct nf_conntrack_tuple innertuple, origtuple; | 122 | struct nf_conntrack_tuple innertuple, origtuple; |
122 | const struct nf_conntrack_l4proto *innerproto; | 123 | const struct nf_conntrack_l4proto *innerproto; |
123 | const struct nf_conntrack_tuple_hash *h; | 124 | const struct nf_conntrack_tuple_hash *h; |
125 | u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; | ||
124 | 126 | ||
125 | NF_CT_ASSERT(skb->nfct == NULL); | 127 | NF_CT_ASSERT(skb->nfct == NULL); |
126 | 128 | ||
@@ -146,7 +148,7 @@ icmp_error_message(struct net *net, struct sk_buff *skb, | |||
146 | 148 | ||
147 | *ctinfo = IP_CT_RELATED; | 149 | *ctinfo = IP_CT_RELATED; |
148 | 150 | ||
149 | h = nf_conntrack_find_get(net, &innertuple); | 151 | h = nf_conntrack_find_get(net, zone, &innertuple); |
150 | if (!h) { | 152 | if (!h) { |
151 | pr_debug("icmp_error_message: no match\n"); | 153 | pr_debug("icmp_error_message: no match\n"); |
152 | return -NF_ACCEPT; | 154 | return -NF_ACCEPT; |
@@ -163,7 +165,8 @@ icmp_error_message(struct net *net, struct sk_buff *skb, | |||
163 | 165 | ||
164 | /* Small and modified version of icmp_rcv */ | 166 | /* Small and modified version of icmp_rcv */ |
165 | static int | 167 | static int |
166 | icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, | 168 | icmp_error(struct net *net, struct nf_conn *tmpl, |
169 | struct sk_buff *skb, unsigned int dataoff, | ||
167 | enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) | 170 | enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) |
168 | { | 171 | { |
169 | const struct icmphdr *icmph; | 172 | const struct icmphdr *icmph; |
@@ -201,14 +204,14 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, | |||
201 | } | 204 | } |
202 | 205 | ||
203 | /* Need to track icmp error message? */ | 206 | /* Need to track icmp error message? */ |
204 | if (icmph->type != ICMP_DEST_UNREACH | 207 | if (icmph->type != ICMP_DEST_UNREACH && |
205 | && icmph->type != ICMP_SOURCE_QUENCH | 208 | icmph->type != ICMP_SOURCE_QUENCH && |
206 | && icmph->type != ICMP_TIME_EXCEEDED | 209 | icmph->type != ICMP_TIME_EXCEEDED && |
207 | && icmph->type != ICMP_PARAMETERPROB | 210 | icmph->type != ICMP_PARAMETERPROB && |
208 | && icmph->type != ICMP_REDIRECT) | 211 | icmph->type != ICMP_REDIRECT) |
209 | return NF_ACCEPT; | 212 | return NF_ACCEPT; |
210 | 213 | ||
211 | return icmp_error_message(net, skb, ctinfo, hooknum); | 214 | return icmp_error_message(net, tmpl, skb, ctinfo, hooknum); |
212 | } | 215 | } |
213 | 216 | ||
214 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 217 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) |
@@ -238,17 +241,17 @@ static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = { | |||
238 | static int icmp_nlattr_to_tuple(struct nlattr *tb[], | 241 | static int icmp_nlattr_to_tuple(struct nlattr *tb[], |
239 | struct nf_conntrack_tuple *tuple) | 242 | struct nf_conntrack_tuple *tuple) |
240 | { | 243 | { |
241 | if (!tb[CTA_PROTO_ICMP_TYPE] | 244 | if (!tb[CTA_PROTO_ICMP_TYPE] || |
242 | || !tb[CTA_PROTO_ICMP_CODE] | 245 | !tb[CTA_PROTO_ICMP_CODE] || |
243 | || !tb[CTA_PROTO_ICMP_ID]) | 246 | !tb[CTA_PROTO_ICMP_ID]) |
244 | return -EINVAL; | 247 | return -EINVAL; |
245 | 248 | ||
246 | tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]); | 249 | tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]); |
247 | tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]); | 250 | tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]); |
248 | tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]); | 251 | tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]); |
249 | 252 | ||
250 | if (tuple->dst.u.icmp.type >= sizeof(invmap) | 253 | if (tuple->dst.u.icmp.type >= sizeof(invmap) || |
251 | || !invmap[tuple->dst.u.icmp.type]) | 254 | !invmap[tuple->dst.u.icmp.type]) |
252 | return -EINVAL; | 255 | return -EINVAL; |
253 | 256 | ||
254 | return 0; | 257 | return 0; |
@@ -270,9 +273,7 @@ static struct ctl_table icmp_sysctl_table[] = { | |||
270 | .mode = 0644, | 273 | .mode = 0644, |
271 | .proc_handler = proc_dointvec_jiffies, | 274 | .proc_handler = proc_dointvec_jiffies, |
272 | }, | 275 | }, |
273 | { | 276 | { } |
274 | .ctl_name = 0 | ||
275 | } | ||
276 | }; | 277 | }; |
277 | #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT | 278 | #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT |
278 | static struct ctl_table icmp_compat_sysctl_table[] = { | 279 | static struct ctl_table icmp_compat_sysctl_table[] = { |
@@ -283,9 +284,7 @@ static struct ctl_table icmp_compat_sysctl_table[] = { | |||
283 | .mode = 0644, | 284 | .mode = 0644, |
284 | .proc_handler = proc_dointvec_jiffies, | 285 | .proc_handler = proc_dointvec_jiffies, |
285 | }, | 286 | }, |
286 | { | 287 | { } |
287 | .ctl_name = 0 | ||
288 | } | ||
289 | }; | 288 | }; |
290 | #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ | 289 | #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ |
291 | #endif /* CONFIG_SYSCTL */ | 290 | #endif /* CONFIG_SYSCTL */ |
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index fa2d6b6fc3e5..cb763ae9ed90 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c | |||
@@ -14,8 +14,13 @@ | |||
14 | #include <net/route.h> | 14 | #include <net/route.h> |
15 | #include <net/ip.h> | 15 | #include <net/ip.h> |
16 | 16 | ||
17 | #include <linux/netfilter_bridge.h> | ||
17 | #include <linux/netfilter_ipv4.h> | 18 | #include <linux/netfilter_ipv4.h> |
18 | #include <net/netfilter/ipv4/nf_defrag_ipv4.h> | 19 | #include <net/netfilter/ipv4/nf_defrag_ipv4.h> |
20 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
21 | #include <net/netfilter/nf_conntrack.h> | ||
22 | #endif | ||
23 | #include <net/netfilter/nf_conntrack_zones.h> | ||
19 | 24 | ||
20 | /* Returns new sk_buff, or NULL */ | 25 | /* Returns new sk_buff, or NULL */ |
21 | static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) | 26 | static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) |
@@ -34,6 +39,27 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) | |||
34 | return err; | 39 | return err; |
35 | } | 40 | } |
36 | 41 | ||
42 | static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, | ||
43 | struct sk_buff *skb) | ||
44 | { | ||
45 | u16 zone = NF_CT_DEFAULT_ZONE; | ||
46 | |||
47 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
48 | if (skb->nfct) | ||
49 | zone = nf_ct_zone((struct nf_conn *)skb->nfct); | ||
50 | #endif | ||
51 | |||
52 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
53 | if (skb->nf_bridge && | ||
54 | skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) | ||
55 | return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone; | ||
56 | #endif | ||
57 | if (hooknum == NF_INET_PRE_ROUTING) | ||
58 | return IP_DEFRAG_CONNTRACK_IN + zone; | ||
59 | else | ||
60 | return IP_DEFRAG_CONNTRACK_OUT + zone; | ||
61 | } | ||
62 | |||
37 | static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, | 63 | static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, |
38 | struct sk_buff *skb, | 64 | struct sk_buff *skb, |
39 | const struct net_device *in, | 65 | const struct net_device *in, |
@@ -44,16 +70,14 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, | |||
44 | #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) | 70 | #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) |
45 | /* Previously seen (loopback)? Ignore. Do this before | 71 | /* Previously seen (loopback)? Ignore. Do this before |
46 | fragment check. */ | 72 | fragment check. */ |
47 | if (skb->nfct) | 73 | if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) |
48 | return NF_ACCEPT; | 74 | return NF_ACCEPT; |
49 | #endif | 75 | #endif |
50 | #endif | 76 | #endif |
51 | /* Gather fragments. */ | 77 | /* Gather fragments. */ |
52 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { | 78 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { |
53 | if (nf_ct_ipv4_gather_frags(skb, | 79 | enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb); |
54 | hooknum == NF_INET_PRE_ROUTING ? | 80 | if (nf_ct_ipv4_gather_frags(skb, user)) |
55 | IP_DEFRAG_CONNTRACK_IN : | ||
56 | IP_DEFRAG_CONNTRACK_OUT)) | ||
57 | return NF_STOLEN; | 81 | return NF_STOLEN; |
58 | } | 82 | } |
59 | return NF_ACCEPT; | 83 | return NF_ACCEPT; |
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index fe1a64479dd0..4f8bddb760c9 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/types.h> | 12 | #include <linux/types.h> |
13 | #include <linux/timer.h> | 13 | #include <linux/timer.h> |
14 | #include <linux/skbuff.h> | 14 | #include <linux/skbuff.h> |
15 | #include <linux/gfp.h> | ||
15 | #include <net/checksum.h> | 16 | #include <net/checksum.h> |
16 | #include <net/icmp.h> | 17 | #include <net/icmp.h> |
17 | #include <net/ip.h> | 18 | #include <net/ip.h> |
@@ -30,14 +31,12 @@ | |||
30 | #include <net/netfilter/nf_conntrack_helper.h> | 31 | #include <net/netfilter/nf_conntrack_helper.h> |
31 | #include <net/netfilter/nf_conntrack_l3proto.h> | 32 | #include <net/netfilter/nf_conntrack_l3proto.h> |
32 | #include <net/netfilter/nf_conntrack_l4proto.h> | 33 | #include <net/netfilter/nf_conntrack_l4proto.h> |
34 | #include <net/netfilter/nf_conntrack_zones.h> | ||
33 | 35 | ||
34 | static DEFINE_SPINLOCK(nf_nat_lock); | 36 | static DEFINE_SPINLOCK(nf_nat_lock); |
35 | 37 | ||
36 | static struct nf_conntrack_l3proto *l3proto __read_mostly; | 38 | static struct nf_conntrack_l3proto *l3proto __read_mostly; |
37 | 39 | ||
38 | /* Calculated at init based on memory size */ | ||
39 | static unsigned int nf_nat_htable_size __read_mostly; | ||
40 | |||
41 | #define MAX_IP_NAT_PROTO 256 | 40 | #define MAX_IP_NAT_PROTO 256 |
42 | static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] | 41 | static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] |
43 | __read_mostly; | 42 | __read_mostly; |
@@ -72,15 +71,16 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put); | |||
72 | 71 | ||
73 | /* We keep an extra hash for each conntrack, for fast searching. */ | 72 | /* We keep an extra hash for each conntrack, for fast searching. */ |
74 | static inline unsigned int | 73 | static inline unsigned int |
75 | hash_by_src(const struct nf_conntrack_tuple *tuple) | 74 | hash_by_src(const struct net *net, u16 zone, |
75 | const struct nf_conntrack_tuple *tuple) | ||
76 | { | 76 | { |
77 | unsigned int hash; | 77 | unsigned int hash; |
78 | 78 | ||
79 | /* Original src, to ensure we map it consistently if poss. */ | 79 | /* Original src, to ensure we map it consistently if poss. */ |
80 | hash = jhash_3words((__force u32)tuple->src.u3.ip, | 80 | hash = jhash_3words((__force u32)tuple->src.u3.ip, |
81 | (__force u32)tuple->src.u.all, | 81 | (__force u32)tuple->src.u.all ^ zone, |
82 | tuple->dst.protonum, 0); | 82 | tuple->dst.protonum, 0); |
83 | return ((u64)hash * nf_nat_htable_size) >> 32; | 83 | return ((u64)hash * net->ipv4.nat_htable_size) >> 32; |
84 | } | 84 | } |
85 | 85 | ||
86 | /* Is this tuple already taken? (not by us) */ | 86 | /* Is this tuple already taken? (not by us) */ |
@@ -142,12 +142,12 @@ same_src(const struct nf_conn *ct, | |||
142 | 142 | ||
143 | /* Only called for SRC manip */ | 143 | /* Only called for SRC manip */ |
144 | static int | 144 | static int |
145 | find_appropriate_src(struct net *net, | 145 | find_appropriate_src(struct net *net, u16 zone, |
146 | const struct nf_conntrack_tuple *tuple, | 146 | const struct nf_conntrack_tuple *tuple, |
147 | struct nf_conntrack_tuple *result, | 147 | struct nf_conntrack_tuple *result, |
148 | const struct nf_nat_range *range) | 148 | const struct nf_nat_range *range) |
149 | { | 149 | { |
150 | unsigned int h = hash_by_src(tuple); | 150 | unsigned int h = hash_by_src(net, zone, tuple); |
151 | const struct nf_conn_nat *nat; | 151 | const struct nf_conn_nat *nat; |
152 | const struct nf_conn *ct; | 152 | const struct nf_conn *ct; |
153 | const struct hlist_node *n; | 153 | const struct hlist_node *n; |
@@ -155,7 +155,7 @@ find_appropriate_src(struct net *net, | |||
155 | rcu_read_lock(); | 155 | rcu_read_lock(); |
156 | hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) { | 156 | hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) { |
157 | ct = nat->ct; | 157 | ct = nat->ct; |
158 | if (same_src(ct, tuple)) { | 158 | if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) { |
159 | /* Copy source part from reply tuple. */ | 159 | /* Copy source part from reply tuple. */ |
160 | nf_ct_invert_tuplepr(result, | 160 | nf_ct_invert_tuplepr(result, |
161 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple); | 161 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple); |
@@ -178,7 +178,7 @@ find_appropriate_src(struct net *net, | |||
178 | the ip with the lowest src-ip/dst-ip/proto usage. | 178 | the ip with the lowest src-ip/dst-ip/proto usage. |
179 | */ | 179 | */ |
180 | static void | 180 | static void |
181 | find_best_ips_proto(struct nf_conntrack_tuple *tuple, | 181 | find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, |
182 | const struct nf_nat_range *range, | 182 | const struct nf_nat_range *range, |
183 | const struct nf_conn *ct, | 183 | const struct nf_conn *ct, |
184 | enum nf_nat_manip_type maniptype) | 184 | enum nf_nat_manip_type maniptype) |
@@ -212,7 +212,7 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple, | |||
212 | maxip = ntohl(range->max_ip); | 212 | maxip = ntohl(range->max_ip); |
213 | j = jhash_2words((__force u32)tuple->src.u3.ip, | 213 | j = jhash_2words((__force u32)tuple->src.u3.ip, |
214 | range->flags & IP_NAT_RANGE_PERSISTENT ? | 214 | range->flags & IP_NAT_RANGE_PERSISTENT ? |
215 | 0 : (__force u32)tuple->dst.u3.ip, 0); | 215 | 0 : (__force u32)tuple->dst.u3.ip ^ zone, 0); |
216 | j = ((u64)j * (maxip - minip + 1)) >> 32; | 216 | j = ((u64)j * (maxip - minip + 1)) >> 32; |
217 | *var_ipp = htonl(minip + j); | 217 | *var_ipp = htonl(minip + j); |
218 | } | 218 | } |
@@ -232,6 +232,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
232 | { | 232 | { |
233 | struct net *net = nf_ct_net(ct); | 233 | struct net *net = nf_ct_net(ct); |
234 | const struct nf_nat_protocol *proto; | 234 | const struct nf_nat_protocol *proto; |
235 | u16 zone = nf_ct_zone(ct); | ||
235 | 236 | ||
236 | /* 1) If this srcip/proto/src-proto-part is currently mapped, | 237 | /* 1) If this srcip/proto/src-proto-part is currently mapped, |
237 | and that same mapping gives a unique tuple within the given | 238 | and that same mapping gives a unique tuple within the given |
@@ -242,7 +243,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
242 | manips not an issue. */ | 243 | manips not an issue. */ |
243 | if (maniptype == IP_NAT_MANIP_SRC && | 244 | if (maniptype == IP_NAT_MANIP_SRC && |
244 | !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { | 245 | !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { |
245 | if (find_appropriate_src(net, orig_tuple, tuple, range)) { | 246 | if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) { |
246 | pr_debug("get_unique_tuple: Found current src map\n"); | 247 | pr_debug("get_unique_tuple: Found current src map\n"); |
247 | if (!nf_nat_used_tuple(tuple, ct)) | 248 | if (!nf_nat_used_tuple(tuple, ct)) |
248 | return; | 249 | return; |
@@ -252,7 +253,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
252 | /* 2) Select the least-used IP/proto combination in the given | 253 | /* 2) Select the least-used IP/proto combination in the given |
253 | range. */ | 254 | range. */ |
254 | *tuple = *orig_tuple; | 255 | *tuple = *orig_tuple; |
255 | find_best_ips_proto(tuple, range, ct, maniptype); | 256 | find_best_ips_proto(zone, tuple, range, ct, maniptype); |
256 | 257 | ||
257 | /* 3) The per-protocol part of the manip is made to map into | 258 | /* 3) The per-protocol part of the manip is made to map into |
258 | the range to make a unique tuple. */ | 259 | the range to make a unique tuple. */ |
@@ -330,7 +331,8 @@ nf_nat_setup_info(struct nf_conn *ct, | |||
330 | if (have_to_hash) { | 331 | if (have_to_hash) { |
331 | unsigned int srchash; | 332 | unsigned int srchash; |
332 | 333 | ||
333 | srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | 334 | srchash = hash_by_src(net, nf_ct_zone(ct), |
335 | &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | ||
334 | spin_lock_bh(&nf_nat_lock); | 336 | spin_lock_bh(&nf_nat_lock); |
335 | /* nf_conntrack_alter_reply might re-allocate exntension aera */ | 337 | /* nf_conntrack_alter_reply might re-allocate exntension aera */ |
336 | nat = nfct_nat(ct); | 338 | nat = nfct_nat(ct); |
@@ -679,8 +681,10 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, | |||
679 | 681 | ||
680 | static int __net_init nf_nat_net_init(struct net *net) | 682 | static int __net_init nf_nat_net_init(struct net *net) |
681 | { | 683 | { |
682 | net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, | 684 | /* Leave them the same for the moment. */ |
683 | &net->ipv4.nat_vmalloced, 0); | 685 | net->ipv4.nat_htable_size = net->ct.htable_size; |
686 | net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, | ||
687 | &net->ipv4.nat_vmalloced, 0); | ||
684 | if (!net->ipv4.nat_bysource) | 688 | if (!net->ipv4.nat_bysource) |
685 | return -ENOMEM; | 689 | return -ENOMEM; |
686 | return 0; | 690 | return 0; |
@@ -703,7 +707,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) | |||
703 | nf_ct_iterate_cleanup(net, &clean_nat, NULL); | 707 | nf_ct_iterate_cleanup(net, &clean_nat, NULL); |
704 | synchronize_rcu(); | 708 | synchronize_rcu(); |
705 | nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, | 709 | nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, |
706 | nf_nat_htable_size); | 710 | net->ipv4.nat_htable_size); |
707 | } | 711 | } |
708 | 712 | ||
709 | static struct pernet_operations nf_nat_net_ops = { | 713 | static struct pernet_operations nf_nat_net_ops = { |
@@ -724,9 +728,6 @@ static int __init nf_nat_init(void) | |||
724 | return ret; | 728 | return ret; |
725 | } | 729 | } |
726 | 730 | ||
727 | /* Leave them the same for the moment. */ | ||
728 | nf_nat_htable_size = nf_conntrack_htable_size; | ||
729 | |||
730 | ret = register_pernet_subsys(&nf_nat_net_ops); | 731 | ret = register_pernet_subsys(&nf_nat_net_ops); |
731 | if (ret < 0) | 732 | if (ret < 0) |
732 | goto cleanup_extend; | 733 | goto cleanup_extend; |
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c index a1d5d58a58bf..86e0e84ff0a0 100644 --- a/net/ipv4/netfilter/nf_nat_ftp.c +++ b/net/ipv4/netfilter/nf_nat_ftp.c | |||
@@ -27,76 +27,29 @@ MODULE_ALIAS("ip_nat_ftp"); | |||
27 | 27 | ||
28 | /* FIXME: Time out? --RR */ | 28 | /* FIXME: Time out? --RR */ |
29 | 29 | ||
30 | static int | 30 | static int nf_nat_ftp_fmt_cmd(enum nf_ct_ftp_type type, |
31 | mangle_rfc959_packet(struct sk_buff *skb, | 31 | char *buffer, size_t buflen, |
32 | __be32 newip, | 32 | __be32 addr, u16 port) |
33 | u_int16_t port, | ||
34 | unsigned int matchoff, | ||
35 | unsigned int matchlen, | ||
36 | struct nf_conn *ct, | ||
37 | enum ip_conntrack_info ctinfo) | ||
38 | { | 33 | { |
39 | char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")]; | 34 | switch (type) { |
40 | 35 | case NF_CT_FTP_PORT: | |
41 | sprintf(buffer, "%u,%u,%u,%u,%u,%u", | 36 | case NF_CT_FTP_PASV: |
42 | NIPQUAD(newip), port>>8, port&0xFF); | 37 | return snprintf(buffer, buflen, "%u,%u,%u,%u,%u,%u", |
43 | 38 | ((unsigned char *)&addr)[0], | |
44 | pr_debug("calling nf_nat_mangle_tcp_packet\n"); | 39 | ((unsigned char *)&addr)[1], |
45 | 40 | ((unsigned char *)&addr)[2], | |
46 | return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, | 41 | ((unsigned char *)&addr)[3], |
47 | matchlen, buffer, strlen(buffer)); | 42 | port >> 8, |
48 | } | 43 | port & 0xFF); |
49 | 44 | case NF_CT_FTP_EPRT: | |
50 | /* |1|132.235.1.2|6275| */ | 45 | return snprintf(buffer, buflen, "|1|%pI4|%u|", &addr, port); |
51 | static int | 46 | case NF_CT_FTP_EPSV: |
52 | mangle_eprt_packet(struct sk_buff *skb, | 47 | return snprintf(buffer, buflen, "|||%u|", port); |
53 | __be32 newip, | 48 | } |
54 | u_int16_t port, | ||
55 | unsigned int matchoff, | ||
56 | unsigned int matchlen, | ||
57 | struct nf_conn *ct, | ||
58 | enum ip_conntrack_info ctinfo) | ||
59 | { | ||
60 | char buffer[sizeof("|1|255.255.255.255|65535|")]; | ||
61 | |||
62 | sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port); | ||
63 | |||
64 | pr_debug("calling nf_nat_mangle_tcp_packet\n"); | ||
65 | |||
66 | return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, | ||
67 | matchlen, buffer, strlen(buffer)); | ||
68 | } | ||
69 | |||
70 | /* |1|132.235.1.2|6275| */ | ||
71 | static int | ||
72 | mangle_epsv_packet(struct sk_buff *skb, | ||
73 | __be32 newip, | ||
74 | u_int16_t port, | ||
75 | unsigned int matchoff, | ||
76 | unsigned int matchlen, | ||
77 | struct nf_conn *ct, | ||
78 | enum ip_conntrack_info ctinfo) | ||
79 | { | ||
80 | char buffer[sizeof("|||65535|")]; | ||
81 | |||
82 | sprintf(buffer, "|||%u|", port); | ||
83 | |||
84 | pr_debug("calling nf_nat_mangle_tcp_packet\n"); | ||
85 | 49 | ||
86 | return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, | 50 | return 0; |
87 | matchlen, buffer, strlen(buffer)); | ||
88 | } | 51 | } |
89 | 52 | ||
90 | static int (*mangle[])(struct sk_buff *, __be32, u_int16_t, | ||
91 | unsigned int, unsigned int, struct nf_conn *, | ||
92 | enum ip_conntrack_info) | ||
93 | = { | ||
94 | [NF_CT_FTP_PORT] = mangle_rfc959_packet, | ||
95 | [NF_CT_FTP_PASV] = mangle_rfc959_packet, | ||
96 | [NF_CT_FTP_EPRT] = mangle_eprt_packet, | ||
97 | [NF_CT_FTP_EPSV] = mangle_epsv_packet | ||
98 | }; | ||
99 | |||
100 | /* So, this packet has hit the connection tracking matching code. | 53 | /* So, this packet has hit the connection tracking matching code. |
101 | Mangle it, and change the expectation to match the new version. */ | 54 | Mangle it, and change the expectation to match the new version. */ |
102 | static unsigned int nf_nat_ftp(struct sk_buff *skb, | 55 | static unsigned int nf_nat_ftp(struct sk_buff *skb, |
@@ -110,6 +63,8 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, | |||
110 | u_int16_t port; | 63 | u_int16_t port; |
111 | int dir = CTINFO2DIR(ctinfo); | 64 | int dir = CTINFO2DIR(ctinfo); |
112 | struct nf_conn *ct = exp->master; | 65 | struct nf_conn *ct = exp->master; |
66 | char buffer[sizeof("|1|255.255.255.255|65535|")]; | ||
67 | unsigned int buflen; | ||
113 | 68 | ||
114 | pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen); | 69 | pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen); |
115 | 70 | ||
@@ -132,11 +87,21 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, | |||
132 | if (port == 0) | 87 | if (port == 0) |
133 | return NF_DROP; | 88 | return NF_DROP; |
134 | 89 | ||
135 | if (!mangle[type](skb, newip, port, matchoff, matchlen, ct, ctinfo)) { | 90 | buflen = nf_nat_ftp_fmt_cmd(type, buffer, sizeof(buffer), newip, port); |
136 | nf_ct_unexpect_related(exp); | 91 | if (!buflen) |
137 | return NF_DROP; | 92 | goto out; |
138 | } | 93 | |
94 | pr_debug("calling nf_nat_mangle_tcp_packet\n"); | ||
95 | |||
96 | if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, | ||
97 | matchlen, buffer, buflen)) | ||
98 | goto out; | ||
99 | |||
139 | return NF_ACCEPT; | 100 | return NF_ACCEPT; |
101 | |||
102 | out: | ||
103 | nf_ct_unexpect_related(exp); | ||
104 | return NF_DROP; | ||
140 | } | 105 | } |
141 | 106 | ||
142 | static void __exit nf_nat_ftp_fini(void) | 107 | static void __exit nf_nat_ftp_fini(void) |
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index f9520fa3aba9..4a0c6b548eee 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c | |||
@@ -8,6 +8,7 @@ | |||
8 | * published by the Free Software Foundation. | 8 | * published by the Free Software Foundation. |
9 | */ | 9 | */ |
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/gfp.h> | ||
11 | #include <linux/kmod.h> | 12 | #include <linux/kmod.h> |
12 | #include <linux/types.h> | 13 | #include <linux/types.h> |
13 | #include <linux/timer.h> | 14 | #include <linux/timer.h> |
@@ -41,18 +42,14 @@ adjust_tcp_sequence(u32 seq, | |||
41 | struct nf_conn *ct, | 42 | struct nf_conn *ct, |
42 | enum ip_conntrack_info ctinfo) | 43 | enum ip_conntrack_info ctinfo) |
43 | { | 44 | { |
44 | int dir; | 45 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
45 | struct nf_nat_seq *this_way, *other_way; | ||
46 | struct nf_conn_nat *nat = nfct_nat(ct); | 46 | struct nf_conn_nat *nat = nfct_nat(ct); |
47 | struct nf_nat_seq *this_way = &nat->seq[dir]; | ||
47 | 48 | ||
48 | pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", seq, seq); | 49 | pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", |
49 | 50 | seq, sizediff); | |
50 | dir = CTINFO2DIR(ctinfo); | ||
51 | |||
52 | this_way = &nat->seq[dir]; | ||
53 | other_way = &nat->seq[!dir]; | ||
54 | 51 | ||
55 | pr_debug("nf_nat_resize_packet: Seq_offset before: "); | 52 | pr_debug("adjust_tcp_sequence: Seq_offset before: "); |
56 | DUMP_OFFSET(this_way); | 53 | DUMP_OFFSET(this_way); |
57 | 54 | ||
58 | spin_lock_bh(&nf_nat_seqofs_lock); | 55 | spin_lock_bh(&nf_nat_seqofs_lock); |
@@ -63,13 +60,13 @@ adjust_tcp_sequence(u32 seq, | |||
63 | * retransmit */ | 60 | * retransmit */ |
64 | if (this_way->offset_before == this_way->offset_after || | 61 | if (this_way->offset_before == this_way->offset_after || |
65 | before(this_way->correction_pos, seq)) { | 62 | before(this_way->correction_pos, seq)) { |
66 | this_way->correction_pos = seq; | 63 | this_way->correction_pos = seq; |
67 | this_way->offset_before = this_way->offset_after; | 64 | this_way->offset_before = this_way->offset_after; |
68 | this_way->offset_after += sizediff; | 65 | this_way->offset_after += sizediff; |
69 | } | 66 | } |
70 | spin_unlock_bh(&nf_nat_seqofs_lock); | 67 | spin_unlock_bh(&nf_nat_seqofs_lock); |
71 | 68 | ||
72 | pr_debug("nf_nat_resize_packet: Seq_offset after: "); | 69 | pr_debug("adjust_tcp_sequence: Seq_offset after: "); |
73 | DUMP_OFFSET(this_way); | 70 | DUMP_OFFSET(this_way); |
74 | } | 71 | } |
75 | 72 | ||
@@ -145,6 +142,17 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra) | |||
145 | return 1; | 142 | return 1; |
146 | } | 143 | } |
147 | 144 | ||
145 | void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, | ||
146 | __be32 seq, s16 off) | ||
147 | { | ||
148 | if (!off) | ||
149 | return; | ||
150 | set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); | ||
151 | adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo); | ||
152 | nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); | ||
153 | } | ||
154 | EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); | ||
155 | |||
148 | /* Generic function for mangling variable-length address changes inside | 156 | /* Generic function for mangling variable-length address changes inside |
149 | * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX | 157 | * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX |
150 | * command in FTP). | 158 | * command in FTP). |
@@ -153,14 +161,13 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra) | |||
153 | * skb enlargement, ... | 161 | * skb enlargement, ... |
154 | * | 162 | * |
155 | * */ | 163 | * */ |
156 | int | 164 | int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, |
157 | nf_nat_mangle_tcp_packet(struct sk_buff *skb, | 165 | struct nf_conn *ct, |
158 | struct nf_conn *ct, | 166 | enum ip_conntrack_info ctinfo, |
159 | enum ip_conntrack_info ctinfo, | 167 | unsigned int match_offset, |
160 | unsigned int match_offset, | 168 | unsigned int match_len, |
161 | unsigned int match_len, | 169 | const char *rep_buffer, |
162 | const char *rep_buffer, | 170 | unsigned int rep_len, bool adjust) |
163 | unsigned int rep_len) | ||
164 | { | 171 | { |
165 | struct rtable *rt = skb_rtable(skb); | 172 | struct rtable *rt = skb_rtable(skb); |
166 | struct iphdr *iph; | 173 | struct iphdr *iph; |
@@ -206,16 +213,13 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, | |||
206 | inet_proto_csum_replace2(&tcph->check, skb, | 213 | inet_proto_csum_replace2(&tcph->check, skb, |
207 | htons(oldlen), htons(datalen), 1); | 214 | htons(oldlen), htons(datalen), 1); |
208 | 215 | ||
209 | if (rep_len != match_len) { | 216 | if (adjust && rep_len != match_len) |
210 | set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); | 217 | nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq, |
211 | adjust_tcp_sequence(ntohl(tcph->seq), | 218 | (int)rep_len - (int)match_len); |
212 | (int)rep_len - (int)match_len, | 219 | |
213 | ct, ctinfo); | ||
214 | nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); | ||
215 | } | ||
216 | return 1; | 220 | return 1; |
217 | } | 221 | } |
218 | EXPORT_SYMBOL(nf_nat_mangle_tcp_packet); | 222 | EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet); |
219 | 223 | ||
220 | /* Generic function for mangling variable-length address changes inside | 224 | /* Generic function for mangling variable-length address changes inside |
221 | * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX | 225 | * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX |
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 9eb171056c63..4c060038d29f 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <net/netfilter/nf_nat_rule.h> | 25 | #include <net/netfilter/nf_nat_rule.h> |
26 | #include <net/netfilter/nf_conntrack_helper.h> | 26 | #include <net/netfilter/nf_conntrack_helper.h> |
27 | #include <net/netfilter/nf_conntrack_expect.h> | 27 | #include <net/netfilter/nf_conntrack_expect.h> |
28 | #include <net/netfilter/nf_conntrack_zones.h> | ||
28 | #include <linux/netfilter/nf_conntrack_proto_gre.h> | 29 | #include <linux/netfilter/nf_conntrack_proto_gre.h> |
29 | #include <linux/netfilter/nf_conntrack_pptp.h> | 30 | #include <linux/netfilter/nf_conntrack_pptp.h> |
30 | 31 | ||
@@ -74,7 +75,7 @@ static void pptp_nat_expected(struct nf_conn *ct, | |||
74 | 75 | ||
75 | pr_debug("trying to unexpect other dir: "); | 76 | pr_debug("trying to unexpect other dir: "); |
76 | nf_ct_dump_tuple_ip(&t); | 77 | nf_ct_dump_tuple_ip(&t); |
77 | other_exp = nf_ct_expect_find_get(net, &t); | 78 | other_exp = nf_ct_expect_find_get(net, nf_ct_zone(ct), &t); |
78 | if (other_exp) { | 79 | if (other_exp) { |
79 | nf_ct_unexpect_related(other_exp); | 80 | nf_ct_unexpect_related(other_exp); |
80 | nf_ct_expect_put(other_exp); | 81 | nf_ct_expect_put(other_exp); |
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 9e81e0dfb4ec..26de2c1f7fab 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/kmod.h> | 15 | #include <linux/kmod.h> |
16 | #include <linux/skbuff.h> | 16 | #include <linux/skbuff.h> |
17 | #include <linux/proc_fs.h> | 17 | #include <linux/proc_fs.h> |
18 | #include <linux/slab.h> | ||
18 | #include <net/checksum.h> | 19 | #include <net/checksum.h> |
19 | #include <net/route.h> | 20 | #include <net/route.h> |
20 | #include <linux/bitops.h> | 21 | #include <linux/bitops.h> |
@@ -28,36 +29,6 @@ | |||
28 | (1 << NF_INET_POST_ROUTING) | \ | 29 | (1 << NF_INET_POST_ROUTING) | \ |
29 | (1 << NF_INET_LOCAL_OUT)) | 30 | (1 << NF_INET_LOCAL_OUT)) |
30 | 31 | ||
31 | static const struct | ||
32 | { | ||
33 | struct ipt_replace repl; | ||
34 | struct ipt_standard entries[3]; | ||
35 | struct ipt_error term; | ||
36 | } nat_initial_table __net_initdata = { | ||
37 | .repl = { | ||
38 | .name = "nat", | ||
39 | .valid_hooks = NAT_VALID_HOOKS, | ||
40 | .num_entries = 4, | ||
41 | .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), | ||
42 | .hook_entry = { | ||
43 | [NF_INET_PRE_ROUTING] = 0, | ||
44 | [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard), | ||
45 | [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 | ||
46 | }, | ||
47 | .underflow = { | ||
48 | [NF_INET_PRE_ROUTING] = 0, | ||
49 | [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard), | ||
50 | [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 | ||
51 | }, | ||
52 | }, | ||
53 | .entries = { | ||
54 | IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ | ||
55 | IPT_STANDARD_INIT(NF_ACCEPT), /* POST_ROUTING */ | ||
56 | IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ | ||
57 | }, | ||
58 | .term = IPT_ERROR_INIT, /* ERROR */ | ||
59 | }; | ||
60 | |||
61 | static const struct xt_table nat_table = { | 32 | static const struct xt_table nat_table = { |
62 | .name = "nat", | 33 | .name = "nat", |
63 | .valid_hooks = NAT_VALID_HOOKS, | 34 | .valid_hooks = NAT_VALID_HOOKS, |
@@ -186,8 +157,13 @@ static struct xt_target ipt_dnat_reg __read_mostly = { | |||
186 | 157 | ||
187 | static int __net_init nf_nat_rule_net_init(struct net *net) | 158 | static int __net_init nf_nat_rule_net_init(struct net *net) |
188 | { | 159 | { |
189 | net->ipv4.nat_table = ipt_register_table(net, &nat_table, | 160 | struct ipt_replace *repl; |
190 | &nat_initial_table.repl); | 161 | |
162 | repl = ipt_alloc_initial_table(&nat_table); | ||
163 | if (repl == NULL) | ||
164 | return -ENOMEM; | ||
165 | net->ipv4.nat_table = ipt_register_table(net, &nat_table, repl); | ||
166 | kfree(repl); | ||
191 | if (IS_ERR(net->ipv4.nat_table)) | 167 | if (IS_ERR(net->ipv4.nat_table)) |
192 | return PTR_ERR(net->ipv4.nat_table); | 168 | return PTR_ERR(net->ipv4.nat_table); |
193 | return 0; | 169 | return 0; |
@@ -195,7 +171,7 @@ static int __net_init nf_nat_rule_net_init(struct net *net) | |||
195 | 171 | ||
196 | static void __net_exit nf_nat_rule_net_exit(struct net *net) | 172 | static void __net_exit nf_nat_rule_net_exit(struct net *net) |
197 | { | 173 | { |
198 | ipt_unregister_table(net->ipv4.nat_table); | 174 | ipt_unregister_table(net, net->ipv4.nat_table); |
199 | } | 175 | } |
200 | 176 | ||
201 | static struct pernet_operations nf_nat_rule_net_ops = { | 177 | static struct pernet_operations nf_nat_rule_net_ops = { |
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 07d61a57613c..11b538deaaec 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* SIP extension for UDP NAT alteration. | 1 | /* SIP extension for NAT alteration. |
2 | * | 2 | * |
3 | * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar> | 3 | * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar> |
4 | * based on RR's ip_nat_ftp.c and other modules. | 4 | * based on RR's ip_nat_ftp.c and other modules. |
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/ip.h> | 15 | #include <linux/ip.h> |
16 | #include <net/ip.h> | 16 | #include <net/ip.h> |
17 | #include <linux/udp.h> | 17 | #include <linux/udp.h> |
18 | #include <linux/tcp.h> | ||
18 | 19 | ||
19 | #include <net/netfilter/nf_nat.h> | 20 | #include <net/netfilter/nf_nat.h> |
20 | #include <net/netfilter/nf_nat_helper.h> | 21 | #include <net/netfilter/nf_nat_helper.h> |
@@ -29,25 +30,42 @@ MODULE_DESCRIPTION("SIP NAT helper"); | |||
29 | MODULE_ALIAS("ip_nat_sip"); | 30 | MODULE_ALIAS("ip_nat_sip"); |
30 | 31 | ||
31 | 32 | ||
32 | static unsigned int mangle_packet(struct sk_buff *skb, | 33 | static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff, |
33 | const char **dptr, unsigned int *datalen, | 34 | const char **dptr, unsigned int *datalen, |
34 | unsigned int matchoff, unsigned int matchlen, | 35 | unsigned int matchoff, unsigned int matchlen, |
35 | const char *buffer, unsigned int buflen) | 36 | const char *buffer, unsigned int buflen) |
36 | { | 37 | { |
37 | enum ip_conntrack_info ctinfo; | 38 | enum ip_conntrack_info ctinfo; |
38 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | 39 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
39 | 40 | struct tcphdr *th; | |
40 | if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen, | 41 | unsigned int baseoff; |
41 | buffer, buflen)) | 42 | |
42 | return 0; | 43 | if (nf_ct_protonum(ct) == IPPROTO_TCP) { |
44 | th = (struct tcphdr *)(skb->data + ip_hdrlen(skb)); | ||
45 | baseoff = ip_hdrlen(skb) + th->doff * 4; | ||
46 | matchoff += dataoff - baseoff; | ||
47 | |||
48 | if (!__nf_nat_mangle_tcp_packet(skb, ct, ctinfo, | ||
49 | matchoff, matchlen, | ||
50 | buffer, buflen, false)) | ||
51 | return 0; | ||
52 | } else { | ||
53 | baseoff = ip_hdrlen(skb) + sizeof(struct udphdr); | ||
54 | matchoff += dataoff - baseoff; | ||
55 | |||
56 | if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, | ||
57 | matchoff, matchlen, | ||
58 | buffer, buflen)) | ||
59 | return 0; | ||
60 | } | ||
43 | 61 | ||
44 | /* Reload data pointer and adjust datalen value */ | 62 | /* Reload data pointer and adjust datalen value */ |
45 | *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); | 63 | *dptr = skb->data + dataoff; |
46 | *datalen += buflen - matchlen; | 64 | *datalen += buflen - matchlen; |
47 | return 1; | 65 | return 1; |
48 | } | 66 | } |
49 | 67 | ||
50 | static int map_addr(struct sk_buff *skb, | 68 | static int map_addr(struct sk_buff *skb, unsigned int dataoff, |
51 | const char **dptr, unsigned int *datalen, | 69 | const char **dptr, unsigned int *datalen, |
52 | unsigned int matchoff, unsigned int matchlen, | 70 | unsigned int matchoff, unsigned int matchlen, |
53 | union nf_inet_addr *addr, __be16 port) | 71 | union nf_inet_addr *addr, __be16 port) |
@@ -76,11 +94,11 @@ static int map_addr(struct sk_buff *skb, | |||
76 | 94 | ||
77 | buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport)); | 95 | buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport)); |
78 | 96 | ||
79 | return mangle_packet(skb, dptr, datalen, matchoff, matchlen, | 97 | return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, |
80 | buffer, buflen); | 98 | buffer, buflen); |
81 | } | 99 | } |
82 | 100 | ||
83 | static int map_sip_addr(struct sk_buff *skb, | 101 | static int map_sip_addr(struct sk_buff *skb, unsigned int dataoff, |
84 | const char **dptr, unsigned int *datalen, | 102 | const char **dptr, unsigned int *datalen, |
85 | enum sip_header_types type) | 103 | enum sip_header_types type) |
86 | { | 104 | { |
@@ -93,16 +111,18 @@ static int map_sip_addr(struct sk_buff *skb, | |||
93 | if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL, | 111 | if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL, |
94 | &matchoff, &matchlen, &addr, &port) <= 0) | 112 | &matchoff, &matchlen, &addr, &port) <= 0) |
95 | return 1; | 113 | return 1; |
96 | return map_addr(skb, dptr, datalen, matchoff, matchlen, &addr, port); | 114 | return map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, |
115 | &addr, port); | ||
97 | } | 116 | } |
98 | 117 | ||
99 | static unsigned int ip_nat_sip(struct sk_buff *skb, | 118 | static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, |
100 | const char **dptr, unsigned int *datalen) | 119 | const char **dptr, unsigned int *datalen) |
101 | { | 120 | { |
102 | enum ip_conntrack_info ctinfo; | 121 | enum ip_conntrack_info ctinfo; |
103 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | 122 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
104 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | 123 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
105 | unsigned int dataoff, matchoff, matchlen; | 124 | unsigned int coff, matchoff, matchlen; |
125 | enum sip_header_types hdr; | ||
106 | union nf_inet_addr addr; | 126 | union nf_inet_addr addr; |
107 | __be16 port; | 127 | __be16 port; |
108 | int request, in_header; | 128 | int request, in_header; |
@@ -112,16 +132,21 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, | |||
112 | if (ct_sip_parse_request(ct, *dptr, *datalen, | 132 | if (ct_sip_parse_request(ct, *dptr, *datalen, |
113 | &matchoff, &matchlen, | 133 | &matchoff, &matchlen, |
114 | &addr, &port) > 0 && | 134 | &addr, &port) > 0 && |
115 | !map_addr(skb, dptr, datalen, matchoff, matchlen, | 135 | !map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, |
116 | &addr, port)) | 136 | &addr, port)) |
117 | return NF_DROP; | 137 | return NF_DROP; |
118 | request = 1; | 138 | request = 1; |
119 | } else | 139 | } else |
120 | request = 0; | 140 | request = 0; |
121 | 141 | ||
142 | if (nf_ct_protonum(ct) == IPPROTO_TCP) | ||
143 | hdr = SIP_HDR_VIA_TCP; | ||
144 | else | ||
145 | hdr = SIP_HDR_VIA_UDP; | ||
146 | |||
122 | /* Translate topmost Via header and parameters */ | 147 | /* Translate topmost Via header and parameters */ |
123 | if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, | 148 | if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, |
124 | SIP_HDR_VIA, NULL, &matchoff, &matchlen, | 149 | hdr, NULL, &matchoff, &matchlen, |
125 | &addr, &port) > 0) { | 150 | &addr, &port) > 0) { |
126 | unsigned int matchend, poff, plen, buflen, n; | 151 | unsigned int matchend, poff, plen, buflen, n; |
127 | char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; | 152 | char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; |
@@ -138,7 +163,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, | |||
138 | goto next; | 163 | goto next; |
139 | } | 164 | } |
140 | 165 | ||
141 | if (!map_addr(skb, dptr, datalen, matchoff, matchlen, | 166 | if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, |
142 | &addr, port)) | 167 | &addr, port)) |
143 | return NF_DROP; | 168 | return NF_DROP; |
144 | 169 | ||
@@ -153,8 +178,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, | |||
153 | addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { | 178 | addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { |
154 | buflen = sprintf(buffer, "%pI4", | 179 | buflen = sprintf(buffer, "%pI4", |
155 | &ct->tuplehash[!dir].tuple.dst.u3.ip); | 180 | &ct->tuplehash[!dir].tuple.dst.u3.ip); |
156 | if (!mangle_packet(skb, dptr, datalen, poff, plen, | 181 | if (!mangle_packet(skb, dataoff, dptr, datalen, |
157 | buffer, buflen)) | 182 | poff, plen, buffer, buflen)) |
158 | return NF_DROP; | 183 | return NF_DROP; |
159 | } | 184 | } |
160 | 185 | ||
@@ -167,8 +192,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, | |||
167 | addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { | 192 | addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { |
168 | buflen = sprintf(buffer, "%pI4", | 193 | buflen = sprintf(buffer, "%pI4", |
169 | &ct->tuplehash[!dir].tuple.src.u3.ip); | 194 | &ct->tuplehash[!dir].tuple.src.u3.ip); |
170 | if (!mangle_packet(skb, dptr, datalen, poff, plen, | 195 | if (!mangle_packet(skb, dataoff, dptr, datalen, |
171 | buffer, buflen)) | 196 | poff, plen, buffer, buflen)) |
172 | return NF_DROP; | 197 | return NF_DROP; |
173 | } | 198 | } |
174 | 199 | ||
@@ -181,31 +206,45 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, | |||
181 | htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) { | 206 | htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) { |
182 | __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port; | 207 | __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port; |
183 | buflen = sprintf(buffer, "%u", ntohs(p)); | 208 | buflen = sprintf(buffer, "%u", ntohs(p)); |
184 | if (!mangle_packet(skb, dptr, datalen, poff, plen, | 209 | if (!mangle_packet(skb, dataoff, dptr, datalen, |
185 | buffer, buflen)) | 210 | poff, plen, buffer, buflen)) |
186 | return NF_DROP; | 211 | return NF_DROP; |
187 | } | 212 | } |
188 | } | 213 | } |
189 | 214 | ||
190 | next: | 215 | next: |
191 | /* Translate Contact headers */ | 216 | /* Translate Contact headers */ |
192 | dataoff = 0; | 217 | coff = 0; |
193 | in_header = 0; | 218 | in_header = 0; |
194 | while (ct_sip_parse_header_uri(ct, *dptr, &dataoff, *datalen, | 219 | while (ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen, |
195 | SIP_HDR_CONTACT, &in_header, | 220 | SIP_HDR_CONTACT, &in_header, |
196 | &matchoff, &matchlen, | 221 | &matchoff, &matchlen, |
197 | &addr, &port) > 0) { | 222 | &addr, &port) > 0) { |
198 | if (!map_addr(skb, dptr, datalen, matchoff, matchlen, | 223 | if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, |
199 | &addr, port)) | 224 | &addr, port)) |
200 | return NF_DROP; | 225 | return NF_DROP; |
201 | } | 226 | } |
202 | 227 | ||
203 | if (!map_sip_addr(skb, dptr, datalen, SIP_HDR_FROM) || | 228 | if (!map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_FROM) || |
204 | !map_sip_addr(skb, dptr, datalen, SIP_HDR_TO)) | 229 | !map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_TO)) |
205 | return NF_DROP; | 230 | return NF_DROP; |
231 | |||
206 | return NF_ACCEPT; | 232 | return NF_ACCEPT; |
207 | } | 233 | } |
208 | 234 | ||
235 | static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off) | ||
236 | { | ||
237 | enum ip_conntrack_info ctinfo; | ||
238 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | ||
239 | const struct tcphdr *th; | ||
240 | |||
241 | if (nf_ct_protonum(ct) != IPPROTO_TCP || off == 0) | ||
242 | return; | ||
243 | |||
244 | th = (struct tcphdr *)(skb->data + ip_hdrlen(skb)); | ||
245 | nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); | ||
246 | } | ||
247 | |||
209 | /* Handles expected signalling connections and media streams */ | 248 | /* Handles expected signalling connections and media streams */ |
210 | static void ip_nat_sip_expected(struct nf_conn *ct, | 249 | static void ip_nat_sip_expected(struct nf_conn *ct, |
211 | struct nf_conntrack_expect *exp) | 250 | struct nf_conntrack_expect *exp) |
@@ -232,7 +271,7 @@ static void ip_nat_sip_expected(struct nf_conn *ct, | |||
232 | } | 271 | } |
233 | } | 272 | } |
234 | 273 | ||
235 | static unsigned int ip_nat_sip_expect(struct sk_buff *skb, | 274 | static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff, |
236 | const char **dptr, unsigned int *datalen, | 275 | const char **dptr, unsigned int *datalen, |
237 | struct nf_conntrack_expect *exp, | 276 | struct nf_conntrack_expect *exp, |
238 | unsigned int matchoff, | 277 | unsigned int matchoff, |
@@ -279,8 +318,8 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, | |||
279 | if (exp->tuple.dst.u3.ip != exp->saved_ip || | 318 | if (exp->tuple.dst.u3.ip != exp->saved_ip || |
280 | exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { | 319 | exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { |
281 | buflen = sprintf(buffer, "%pI4:%u", &newip, port); | 320 | buflen = sprintf(buffer, "%pI4:%u", &newip, port); |
282 | if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen, | 321 | if (!mangle_packet(skb, dataoff, dptr, datalen, |
283 | buffer, buflen)) | 322 | matchoff, matchlen, buffer, buflen)) |
284 | goto err; | 323 | goto err; |
285 | } | 324 | } |
286 | return NF_ACCEPT; | 325 | return NF_ACCEPT; |
@@ -290,7 +329,7 @@ err: | |||
290 | return NF_DROP; | 329 | return NF_DROP; |
291 | } | 330 | } |
292 | 331 | ||
293 | static int mangle_content_len(struct sk_buff *skb, | 332 | static int mangle_content_len(struct sk_buff *skb, unsigned int dataoff, |
294 | const char **dptr, unsigned int *datalen) | 333 | const char **dptr, unsigned int *datalen) |
295 | { | 334 | { |
296 | enum ip_conntrack_info ctinfo; | 335 | enum ip_conntrack_info ctinfo; |
@@ -312,12 +351,13 @@ static int mangle_content_len(struct sk_buff *skb, | |||
312 | return 0; | 351 | return 0; |
313 | 352 | ||
314 | buflen = sprintf(buffer, "%u", c_len); | 353 | buflen = sprintf(buffer, "%u", c_len); |
315 | return mangle_packet(skb, dptr, datalen, matchoff, matchlen, | 354 | return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, |
316 | buffer, buflen); | 355 | buffer, buflen); |
317 | } | 356 | } |
318 | 357 | ||
319 | static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr, | 358 | static int mangle_sdp_packet(struct sk_buff *skb, unsigned int dataoff, |
320 | unsigned int dataoff, unsigned int *datalen, | 359 | const char **dptr, unsigned int *datalen, |
360 | unsigned int sdpoff, | ||
321 | enum sdp_header_types type, | 361 | enum sdp_header_types type, |
322 | enum sdp_header_types term, | 362 | enum sdp_header_types term, |
323 | char *buffer, int buflen) | 363 | char *buffer, int buflen) |
@@ -326,16 +366,16 @@ static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr, | |||
326 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | 366 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
327 | unsigned int matchlen, matchoff; | 367 | unsigned int matchlen, matchoff; |
328 | 368 | ||
329 | if (ct_sip_get_sdp_header(ct, *dptr, dataoff, *datalen, type, term, | 369 | if (ct_sip_get_sdp_header(ct, *dptr, sdpoff, *datalen, type, term, |
330 | &matchoff, &matchlen) <= 0) | 370 | &matchoff, &matchlen) <= 0) |
331 | return -ENOENT; | 371 | return -ENOENT; |
332 | return mangle_packet(skb, dptr, datalen, matchoff, matchlen, | 372 | return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, |
333 | buffer, buflen) ? 0 : -EINVAL; | 373 | buffer, buflen) ? 0 : -EINVAL; |
334 | } | 374 | } |
335 | 375 | ||
336 | static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, | 376 | static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, unsigned int dataoff, |
337 | unsigned int dataoff, | 377 | const char **dptr, unsigned int *datalen, |
338 | unsigned int *datalen, | 378 | unsigned int sdpoff, |
339 | enum sdp_header_types type, | 379 | enum sdp_header_types type, |
340 | enum sdp_header_types term, | 380 | enum sdp_header_types term, |
341 | const union nf_inet_addr *addr) | 381 | const union nf_inet_addr *addr) |
@@ -344,16 +384,15 @@ static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, | |||
344 | unsigned int buflen; | 384 | unsigned int buflen; |
345 | 385 | ||
346 | buflen = sprintf(buffer, "%pI4", &addr->ip); | 386 | buflen = sprintf(buffer, "%pI4", &addr->ip); |
347 | if (mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term, | 387 | if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, type, term, |
348 | buffer, buflen)) | 388 | buffer, buflen)) |
349 | return 0; | 389 | return 0; |
350 | 390 | ||
351 | return mangle_content_len(skb, dptr, datalen); | 391 | return mangle_content_len(skb, dataoff, dptr, datalen); |
352 | } | 392 | } |
353 | 393 | ||
354 | static unsigned int ip_nat_sdp_port(struct sk_buff *skb, | 394 | static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int dataoff, |
355 | const char **dptr, | 395 | const char **dptr, unsigned int *datalen, |
356 | unsigned int *datalen, | ||
357 | unsigned int matchoff, | 396 | unsigned int matchoff, |
358 | unsigned int matchlen, | 397 | unsigned int matchlen, |
359 | u_int16_t port) | 398 | u_int16_t port) |
@@ -362,16 +401,16 @@ static unsigned int ip_nat_sdp_port(struct sk_buff *skb, | |||
362 | unsigned int buflen; | 401 | unsigned int buflen; |
363 | 402 | ||
364 | buflen = sprintf(buffer, "%u", port); | 403 | buflen = sprintf(buffer, "%u", port); |
365 | if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen, | 404 | if (!mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, |
366 | buffer, buflen)) | 405 | buffer, buflen)) |
367 | return 0; | 406 | return 0; |
368 | 407 | ||
369 | return mangle_content_len(skb, dptr, datalen); | 408 | return mangle_content_len(skb, dataoff, dptr, datalen); |
370 | } | 409 | } |
371 | 410 | ||
372 | static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, | 411 | static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int dataoff, |
373 | unsigned int dataoff, | 412 | const char **dptr, unsigned int *datalen, |
374 | unsigned int *datalen, | 413 | unsigned int sdpoff, |
375 | const union nf_inet_addr *addr) | 414 | const union nf_inet_addr *addr) |
376 | { | 415 | { |
377 | char buffer[sizeof("nnn.nnn.nnn.nnn")]; | 416 | char buffer[sizeof("nnn.nnn.nnn.nnn")]; |
@@ -379,12 +418,12 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, | |||
379 | 418 | ||
380 | /* Mangle session description owner and contact addresses */ | 419 | /* Mangle session description owner and contact addresses */ |
381 | buflen = sprintf(buffer, "%pI4", &addr->ip); | 420 | buflen = sprintf(buffer, "%pI4", &addr->ip); |
382 | if (mangle_sdp_packet(skb, dptr, dataoff, datalen, | 421 | if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, |
383 | SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, | 422 | SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, |
384 | buffer, buflen)) | 423 | buffer, buflen)) |
385 | return 0; | 424 | return 0; |
386 | 425 | ||
387 | switch (mangle_sdp_packet(skb, dptr, dataoff, datalen, | 426 | switch (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, |
388 | SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, | 427 | SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, |
389 | buffer, buflen)) { | 428 | buffer, buflen)) { |
390 | case 0: | 429 | case 0: |
@@ -401,14 +440,13 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, | |||
401 | return 0; | 440 | return 0; |
402 | } | 441 | } |
403 | 442 | ||
404 | return mangle_content_len(skb, dptr, datalen); | 443 | return mangle_content_len(skb, dataoff, dptr, datalen); |
405 | } | 444 | } |
406 | 445 | ||
407 | /* So, this packet has hit the connection tracking matching code. | 446 | /* So, this packet has hit the connection tracking matching code. |
408 | Mangle it, and change the expectation to match the new version. */ | 447 | Mangle it, and change the expectation to match the new version. */ |
409 | static unsigned int ip_nat_sdp_media(struct sk_buff *skb, | 448 | static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff, |
410 | const char **dptr, | 449 | const char **dptr, unsigned int *datalen, |
411 | unsigned int *datalen, | ||
412 | struct nf_conntrack_expect *rtp_exp, | 450 | struct nf_conntrack_expect *rtp_exp, |
413 | struct nf_conntrack_expect *rtcp_exp, | 451 | struct nf_conntrack_expect *rtcp_exp, |
414 | unsigned int mediaoff, | 452 | unsigned int mediaoff, |
@@ -456,7 +494,8 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, | |||
456 | 494 | ||
457 | /* Update media port. */ | 495 | /* Update media port. */ |
458 | if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port && | 496 | if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port && |
459 | !ip_nat_sdp_port(skb, dptr, datalen, mediaoff, medialen, port)) | 497 | !ip_nat_sdp_port(skb, dataoff, dptr, datalen, |
498 | mediaoff, medialen, port)) | ||
460 | goto err2; | 499 | goto err2; |
461 | 500 | ||
462 | return NF_ACCEPT; | 501 | return NF_ACCEPT; |
@@ -471,6 +510,7 @@ err1: | |||
471 | static void __exit nf_nat_sip_fini(void) | 510 | static void __exit nf_nat_sip_fini(void) |
472 | { | 511 | { |
473 | rcu_assign_pointer(nf_nat_sip_hook, NULL); | 512 | rcu_assign_pointer(nf_nat_sip_hook, NULL); |
513 | rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, NULL); | ||
474 | rcu_assign_pointer(nf_nat_sip_expect_hook, NULL); | 514 | rcu_assign_pointer(nf_nat_sip_expect_hook, NULL); |
475 | rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL); | 515 | rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL); |
476 | rcu_assign_pointer(nf_nat_sdp_port_hook, NULL); | 516 | rcu_assign_pointer(nf_nat_sdp_port_hook, NULL); |
@@ -482,12 +522,14 @@ static void __exit nf_nat_sip_fini(void) | |||
482 | static int __init nf_nat_sip_init(void) | 522 | static int __init nf_nat_sip_init(void) |
483 | { | 523 | { |
484 | BUG_ON(nf_nat_sip_hook != NULL); | 524 | BUG_ON(nf_nat_sip_hook != NULL); |
525 | BUG_ON(nf_nat_sip_seq_adjust_hook != NULL); | ||
485 | BUG_ON(nf_nat_sip_expect_hook != NULL); | 526 | BUG_ON(nf_nat_sip_expect_hook != NULL); |
486 | BUG_ON(nf_nat_sdp_addr_hook != NULL); | 527 | BUG_ON(nf_nat_sdp_addr_hook != NULL); |
487 | BUG_ON(nf_nat_sdp_port_hook != NULL); | 528 | BUG_ON(nf_nat_sdp_port_hook != NULL); |
488 | BUG_ON(nf_nat_sdp_session_hook != NULL); | 529 | BUG_ON(nf_nat_sdp_session_hook != NULL); |
489 | BUG_ON(nf_nat_sdp_media_hook != NULL); | 530 | BUG_ON(nf_nat_sdp_media_hook != NULL); |
490 | rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); | 531 | rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); |
532 | rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust); | ||
491 | rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect); | 533 | rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect); |
492 | rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr); | 534 | rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr); |
493 | rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port); | 535 | rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port); |
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index d9521f6f9ed0..4d85b6e55f29 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <linux/moduleparam.h> | 43 | #include <linux/moduleparam.h> |
44 | #include <linux/types.h> | 44 | #include <linux/types.h> |
45 | #include <linux/kernel.h> | 45 | #include <linux/kernel.h> |
46 | #include <linux/slab.h> | ||
46 | #include <linux/in.h> | 47 | #include <linux/in.h> |
47 | #include <linux/ip.h> | 48 | #include <linux/ip.h> |
48 | #include <linux/udp.h> | 49 | #include <linux/udp.h> |
@@ -1038,7 +1039,7 @@ static int snmp_parse_mangle(unsigned char *msg, | |||
1038 | unsigned int cls, con, tag, vers, pdutype; | 1039 | unsigned int cls, con, tag, vers, pdutype; |
1039 | struct asn1_ctx ctx; | 1040 | struct asn1_ctx ctx; |
1040 | struct asn1_octstr comm; | 1041 | struct asn1_octstr comm; |
1041 | struct snmp_object **obj; | 1042 | struct snmp_object *obj; |
1042 | 1043 | ||
1043 | if (debug > 1) | 1044 | if (debug > 1) |
1044 | hex_dump(msg, len); | 1045 | hex_dump(msg, len); |
@@ -1148,43 +1149,34 @@ static int snmp_parse_mangle(unsigned char *msg, | |||
1148 | if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) | 1149 | if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) |
1149 | return 0; | 1150 | return 0; |
1150 | 1151 | ||
1151 | obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); | ||
1152 | if (obj == NULL) { | ||
1153 | if (net_ratelimit()) | ||
1154 | printk(KERN_WARNING "OOM in bsalg(%d)\n", __LINE__); | ||
1155 | return 0; | ||
1156 | } | ||
1157 | |||
1158 | while (!asn1_eoc_decode(&ctx, eoc)) { | 1152 | while (!asn1_eoc_decode(&ctx, eoc)) { |
1159 | unsigned int i; | 1153 | unsigned int i; |
1160 | 1154 | ||
1161 | if (!snmp_object_decode(&ctx, obj)) { | 1155 | if (!snmp_object_decode(&ctx, &obj)) { |
1162 | if (*obj) { | 1156 | if (obj) { |
1163 | kfree((*obj)->id); | 1157 | kfree(obj->id); |
1164 | kfree(*obj); | 1158 | kfree(obj); |
1165 | } | 1159 | } |
1166 | kfree(obj); | ||
1167 | return 0; | 1160 | return 0; |
1168 | } | 1161 | } |
1169 | 1162 | ||
1170 | if (debug > 1) { | 1163 | if (debug > 1) { |
1171 | printk(KERN_DEBUG "bsalg: object: "); | 1164 | printk(KERN_DEBUG "bsalg: object: "); |
1172 | for (i = 0; i < (*obj)->id_len; i++) { | 1165 | for (i = 0; i < obj->id_len; i++) { |
1173 | if (i > 0) | 1166 | if (i > 0) |
1174 | printk("."); | 1167 | printk("."); |
1175 | printk("%lu", (*obj)->id[i]); | 1168 | printk("%lu", obj->id[i]); |
1176 | } | 1169 | } |
1177 | printk(": type=%u\n", (*obj)->type); | 1170 | printk(": type=%u\n", obj->type); |
1178 | 1171 | ||
1179 | } | 1172 | } |
1180 | 1173 | ||
1181 | if ((*obj)->type == SNMP_IPADDR) | 1174 | if (obj->type == SNMP_IPADDR) |
1182 | mangle_address(ctx.begin, ctx.pointer - 4 , map, check); | 1175 | mangle_address(ctx.begin, ctx.pointer - 4 , map, check); |
1183 | 1176 | ||
1184 | kfree((*obj)->id); | 1177 | kfree(obj->id); |
1185 | kfree(*obj); | 1178 | kfree(obj); |
1186 | } | 1179 | } |
1187 | kfree(obj); | ||
1188 | 1180 | ||
1189 | if (!asn1_eoc_decode(&ctx, eoc)) | 1181 | if (!asn1_eoc_decode(&ctx, eoc)) |
1190 | return 0; | 1182 | return 0; |
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 5f41d017ddd8..c39c9cf6bee6 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c | |||
@@ -7,6 +7,7 @@ | |||
7 | */ | 7 | */ |
8 | #include <linux/types.h> | 8 | #include <linux/types.h> |
9 | #include <linux/icmp.h> | 9 | #include <linux/icmp.h> |
10 | #include <linux/gfp.h> | ||
10 | #include <linux/ip.h> | 11 | #include <linux/ip.h> |
11 | #include <linux/netfilter.h> | 12 | #include <linux/netfilter.h> |
12 | #include <linux/netfilter_ipv4.h> | 13 | #include <linux/netfilter_ipv4.h> |
@@ -197,11 +198,11 @@ nf_nat_out(unsigned int hooknum, | |||
197 | (ct = nf_ct_get(skb, &ctinfo)) != NULL) { | 198 | (ct = nf_ct_get(skb, &ctinfo)) != NULL) { |
198 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | 199 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
199 | 200 | ||
200 | if (ct->tuplehash[dir].tuple.src.u3.ip != | 201 | if ((ct->tuplehash[dir].tuple.src.u3.ip != |
201 | ct->tuplehash[!dir].tuple.dst.u3.ip | 202 | ct->tuplehash[!dir].tuple.dst.u3.ip) || |
202 | || ct->tuplehash[dir].tuple.src.u.all != | 203 | (ct->tuplehash[dir].tuple.src.u.all != |
203 | ct->tuplehash[!dir].tuple.dst.u.all | 204 | ct->tuplehash[!dir].tuple.dst.u.all) |
204 | ) | 205 | ) |
205 | return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP; | 206 | return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP; |
206 | } | 207 | } |
207 | #endif | 208 | #endif |
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index f25542c48b7d..4f1f337f4337 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -127,8 +127,8 @@ static const struct snmp_mib snmp4_ipextstats_list[] = { | |||
127 | SNMP_MIB_SENTINEL | 127 | SNMP_MIB_SENTINEL |
128 | }; | 128 | }; |
129 | 129 | ||
130 | static struct { | 130 | static const struct { |
131 | char *name; | 131 | const char *name; |
132 | int index; | 132 | int index; |
133 | } icmpmibmap[] = { | 133 | } icmpmibmap[] = { |
134 | { "DestUnreachs", ICMP_DEST_UNREACH }, | 134 | { "DestUnreachs", ICMP_DEST_UNREACH }, |
@@ -249,6 +249,8 @@ static const struct snmp_mib snmp4_net_list[] = { | |||
249 | SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED), | 249 | SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED), |
250 | SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED), | 250 | SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED), |
251 | SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK), | 251 | SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK), |
252 | SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP), | ||
253 | SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), | ||
252 | SNMP_MIB_SENTINEL | 254 | SNMP_MIB_SENTINEL |
253 | }; | 255 | }; |
254 | 256 | ||
@@ -280,7 +282,7 @@ static void icmpmsg_put(struct seq_file *seq) | |||
280 | 282 | ||
281 | count = 0; | 283 | count = 0; |
282 | for (i = 0; i < ICMPMSG_MIB_MAX; i++) { | 284 | for (i = 0; i < ICMPMSG_MIB_MAX; i++) { |
283 | val = snmp_fold_field((void **) net->mib.icmpmsg_statistics, i); | 285 | val = snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, i); |
284 | if (val) { | 286 | if (val) { |
285 | type[count] = i; | 287 | type[count] = i; |
286 | vals[count++] = val; | 288 | vals[count++] = val; |
@@ -307,18 +309,18 @@ static void icmp_put(struct seq_file *seq) | |||
307 | for (i=0; icmpmibmap[i].name != NULL; i++) | 309 | for (i=0; icmpmibmap[i].name != NULL; i++) |
308 | seq_printf(seq, " Out%s", icmpmibmap[i].name); | 310 | seq_printf(seq, " Out%s", icmpmibmap[i].name); |
309 | seq_printf(seq, "\nIcmp: %lu %lu", | 311 | seq_printf(seq, "\nIcmp: %lu %lu", |
310 | snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), | 312 | snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), |
311 | snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INERRORS)); | 313 | snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS)); |
312 | for (i=0; icmpmibmap[i].name != NULL; i++) | 314 | for (i=0; icmpmibmap[i].name != NULL; i++) |
313 | seq_printf(seq, " %lu", | 315 | seq_printf(seq, " %lu", |
314 | snmp_fold_field((void **) net->mib.icmpmsg_statistics, | 316 | snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, |
315 | icmpmibmap[i].index)); | 317 | icmpmibmap[i].index)); |
316 | seq_printf(seq, " %lu %lu", | 318 | seq_printf(seq, " %lu %lu", |
317 | snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), | 319 | snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), |
318 | snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); | 320 | snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); |
319 | for (i=0; icmpmibmap[i].name != NULL; i++) | 321 | for (i=0; icmpmibmap[i].name != NULL; i++) |
320 | seq_printf(seq, " %lu", | 322 | seq_printf(seq, " %lu", |
321 | snmp_fold_field((void **) net->mib.icmpmsg_statistics, | 323 | snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, |
322 | icmpmibmap[i].index | 0x100)); | 324 | icmpmibmap[i].index | 0x100)); |
323 | } | 325 | } |
324 | 326 | ||
@@ -341,7 +343,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
341 | 343 | ||
342 | for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) | 344 | for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) |
343 | seq_printf(seq, " %lu", | 345 | seq_printf(seq, " %lu", |
344 | snmp_fold_field((void **)net->mib.ip_statistics, | 346 | snmp_fold_field((void __percpu **)net->mib.ip_statistics, |
345 | snmp4_ipstats_list[i].entry)); | 347 | snmp4_ipstats_list[i].entry)); |
346 | 348 | ||
347 | icmp_put(seq); /* RFC 2011 compatibility */ | 349 | icmp_put(seq); /* RFC 2011 compatibility */ |
@@ -356,11 +358,11 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
356 | /* MaxConn field is signed, RFC 2012 */ | 358 | /* MaxConn field is signed, RFC 2012 */ |
357 | if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) | 359 | if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) |
358 | seq_printf(seq, " %ld", | 360 | seq_printf(seq, " %ld", |
359 | snmp_fold_field((void **)net->mib.tcp_statistics, | 361 | snmp_fold_field((void __percpu **)net->mib.tcp_statistics, |
360 | snmp4_tcp_list[i].entry)); | 362 | snmp4_tcp_list[i].entry)); |
361 | else | 363 | else |
362 | seq_printf(seq, " %lu", | 364 | seq_printf(seq, " %lu", |
363 | snmp_fold_field((void **)net->mib.tcp_statistics, | 365 | snmp_fold_field((void __percpu **)net->mib.tcp_statistics, |
364 | snmp4_tcp_list[i].entry)); | 366 | snmp4_tcp_list[i].entry)); |
365 | } | 367 | } |
366 | 368 | ||
@@ -371,7 +373,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
371 | seq_puts(seq, "\nUdp:"); | 373 | seq_puts(seq, "\nUdp:"); |
372 | for (i = 0; snmp4_udp_list[i].name != NULL; i++) | 374 | for (i = 0; snmp4_udp_list[i].name != NULL; i++) |
373 | seq_printf(seq, " %lu", | 375 | seq_printf(seq, " %lu", |
374 | snmp_fold_field((void **)net->mib.udp_statistics, | 376 | snmp_fold_field((void __percpu **)net->mib.udp_statistics, |
375 | snmp4_udp_list[i].entry)); | 377 | snmp4_udp_list[i].entry)); |
376 | 378 | ||
377 | /* the UDP and UDP-Lite MIBs are the same */ | 379 | /* the UDP and UDP-Lite MIBs are the same */ |
@@ -382,7 +384,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
382 | seq_puts(seq, "\nUdpLite:"); | 384 | seq_puts(seq, "\nUdpLite:"); |
383 | for (i = 0; snmp4_udp_list[i].name != NULL; i++) | 385 | for (i = 0; snmp4_udp_list[i].name != NULL; i++) |
384 | seq_printf(seq, " %lu", | 386 | seq_printf(seq, " %lu", |
385 | snmp_fold_field((void **)net->mib.udplite_statistics, | 387 | snmp_fold_field((void __percpu **)net->mib.udplite_statistics, |
386 | snmp4_udp_list[i].entry)); | 388 | snmp4_udp_list[i].entry)); |
387 | 389 | ||
388 | seq_putc(seq, '\n'); | 390 | seq_putc(seq, '\n'); |
@@ -419,7 +421,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) | |||
419 | seq_puts(seq, "\nTcpExt:"); | 421 | seq_puts(seq, "\nTcpExt:"); |
420 | for (i = 0; snmp4_net_list[i].name != NULL; i++) | 422 | for (i = 0; snmp4_net_list[i].name != NULL; i++) |
421 | seq_printf(seq, " %lu", | 423 | seq_printf(seq, " %lu", |
422 | snmp_fold_field((void **)net->mib.net_statistics, | 424 | snmp_fold_field((void __percpu **)net->mib.net_statistics, |
423 | snmp4_net_list[i].entry)); | 425 | snmp4_net_list[i].entry)); |
424 | 426 | ||
425 | seq_puts(seq, "\nIpExt:"); | 427 | seq_puts(seq, "\nIpExt:"); |
@@ -429,7 +431,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) | |||
429 | seq_puts(seq, "\nIpExt:"); | 431 | seq_puts(seq, "\nIpExt:"); |
430 | for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) | 432 | for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) |
431 | seq_printf(seq, " %lu", | 433 | seq_printf(seq, " %lu", |
432 | snmp_fold_field((void **)net->mib.ip_statistics, | 434 | snmp_fold_field((void __percpu **)net->mib.ip_statistics, |
433 | snmp4_ipextstats_list[i].entry)); | 435 | snmp4_ipextstats_list[i].entry)); |
434 | 436 | ||
435 | seq_putc(seq, '\n'); | 437 | seq_putc(seq, '\n'); |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index ab996f9c0fe0..cc6f097fbd5f 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -60,7 +60,6 @@ | |||
60 | #include <net/net_namespace.h> | 60 | #include <net/net_namespace.h> |
61 | #include <net/dst.h> | 61 | #include <net/dst.h> |
62 | #include <net/sock.h> | 62 | #include <net/sock.h> |
63 | #include <linux/gfp.h> | ||
64 | #include <linux/ip.h> | 63 | #include <linux/ip.h> |
65 | #include <linux/net.h> | 64 | #include <linux/net.h> |
66 | #include <net/ip.h> | 65 | #include <net/ip.h> |
@@ -87,7 +86,7 @@ void raw_hash_sk(struct sock *sk) | |||
87 | struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; | 86 | struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; |
88 | struct hlist_head *head; | 87 | struct hlist_head *head; |
89 | 88 | ||
90 | head = &h->ht[inet_sk(sk)->num & (RAW_HTABLE_SIZE - 1)]; | 89 | head = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)]; |
91 | 90 | ||
92 | write_lock_bh(&h->lock); | 91 | write_lock_bh(&h->lock); |
93 | sk_add_node(sk, head); | 92 | sk_add_node(sk, head); |
@@ -115,9 +114,9 @@ static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, | |||
115 | sk_for_each_from(sk, node) { | 114 | sk_for_each_from(sk, node) { |
116 | struct inet_sock *inet = inet_sk(sk); | 115 | struct inet_sock *inet = inet_sk(sk); |
117 | 116 | ||
118 | if (net_eq(sock_net(sk), net) && inet->num == num && | 117 | if (net_eq(sock_net(sk), net) && inet->inet_num == num && |
119 | !(inet->daddr && inet->daddr != raddr) && | 118 | !(inet->inet_daddr && inet->inet_daddr != raddr) && |
120 | !(inet->rcv_saddr && inet->rcv_saddr != laddr) && | 119 | !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && |
121 | !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) | 120 | !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) |
122 | goto found; /* gotcha */ | 121 | goto found; /* gotcha */ |
123 | } | 122 | } |
@@ -292,7 +291,6 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb) | |||
292 | /* Charge it to the socket. */ | 291 | /* Charge it to the socket. */ |
293 | 292 | ||
294 | if (sock_queue_rcv_skb(sk, skb) < 0) { | 293 | if (sock_queue_rcv_skb(sk, skb) < 0) { |
295 | atomic_inc(&sk->sk_drops); | ||
296 | kfree_skb(skb); | 294 | kfree_skb(skb); |
297 | return NET_RX_DROP; | 295 | return NET_RX_DROP; |
298 | } | 296 | } |
@@ -327,7 +325,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, | |||
327 | int err; | 325 | int err; |
328 | 326 | ||
329 | if (length > rt->u.dst.dev->mtu) { | 327 | if (length > rt->u.dst.dev->mtu) { |
330 | ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, | 328 | ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, |
331 | rt->u.dst.dev->mtu); | 329 | rt->u.dst.dev->mtu); |
332 | return -EMSGSIZE; | 330 | return -EMSGSIZE; |
333 | } | 331 | } |
@@ -500,10 +498,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
500 | err = -EDESTADDRREQ; | 498 | err = -EDESTADDRREQ; |
501 | if (sk->sk_state != TCP_ESTABLISHED) | 499 | if (sk->sk_state != TCP_ESTABLISHED) |
502 | goto out; | 500 | goto out; |
503 | daddr = inet->daddr; | 501 | daddr = inet->inet_daddr; |
504 | } | 502 | } |
505 | 503 | ||
506 | ipc.addr = inet->saddr; | 504 | ipc.addr = inet->inet_saddr; |
507 | ipc.opt = NULL; | 505 | ipc.opt = NULL; |
508 | ipc.shtx.flags = 0; | 506 | ipc.shtx.flags = 0; |
509 | ipc.oif = sk->sk_bound_dev_if; | 507 | ipc.oif = sk->sk_bound_dev_if; |
@@ -645,9 +643,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
645 | if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL && | 643 | if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL && |
646 | chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) | 644 | chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) |
647 | goto out; | 645 | goto out; |
648 | inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; | 646 | inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr; |
649 | if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) | 647 | if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) |
650 | inet->saddr = 0; /* Use device */ | 648 | inet->inet_saddr = 0; /* Use device */ |
651 | sk_dst_reset(sk); | 649 | sk_dst_reset(sk); |
652 | ret = 0; | 650 | ret = 0; |
653 | out: return ret; | 651 | out: return ret; |
@@ -692,7 +690,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
692 | if (err) | 690 | if (err) |
693 | goto done; | 691 | goto done; |
694 | 692 | ||
695 | sock_recv_timestamp(msg, sk, skb); | 693 | sock_recv_ts_and_drops(msg, sk, skb); |
696 | 694 | ||
697 | /* Copy the address. */ | 695 | /* Copy the address. */ |
698 | if (sin) { | 696 | if (sin) { |
@@ -717,7 +715,7 @@ static int raw_init(struct sock *sk) | |||
717 | { | 715 | { |
718 | struct raw_sock *rp = raw_sk(sk); | 716 | struct raw_sock *rp = raw_sk(sk); |
719 | 717 | ||
720 | if (inet_sk(sk)->num == IPPROTO_ICMP) | 718 | if (inet_sk(sk)->inet_num == IPPROTO_ICMP) |
721 | memset(&rp->filter, 0, sizeof(rp->filter)); | 719 | memset(&rp->filter, 0, sizeof(rp->filter)); |
722 | return 0; | 720 | return 0; |
723 | } | 721 | } |
@@ -754,7 +752,7 @@ static int do_raw_setsockopt(struct sock *sk, int level, int optname, | |||
754 | char __user *optval, unsigned int optlen) | 752 | char __user *optval, unsigned int optlen) |
755 | { | 753 | { |
756 | if (optname == ICMP_FILTER) { | 754 | if (optname == ICMP_FILTER) { |
757 | if (inet_sk(sk)->num != IPPROTO_ICMP) | 755 | if (inet_sk(sk)->inet_num != IPPROTO_ICMP) |
758 | return -EOPNOTSUPP; | 756 | return -EOPNOTSUPP; |
759 | else | 757 | else |
760 | return raw_seticmpfilter(sk, optval, optlen); | 758 | return raw_seticmpfilter(sk, optval, optlen); |
@@ -784,7 +782,7 @@ static int do_raw_getsockopt(struct sock *sk, int level, int optname, | |||
784 | char __user *optval, int __user *optlen) | 782 | char __user *optval, int __user *optlen) |
785 | { | 783 | { |
786 | if (optname == ICMP_FILTER) { | 784 | if (optname == ICMP_FILTER) { |
787 | if (inet_sk(sk)->num != IPPROTO_ICMP) | 785 | if (inet_sk(sk)->inet_num != IPPROTO_ICMP) |
788 | return -EOPNOTSUPP; | 786 | return -EOPNOTSUPP; |
789 | else | 787 | else |
790 | return raw_geticmpfilter(sk, optval, optlen); | 788 | return raw_geticmpfilter(sk, optval, optlen); |
@@ -943,10 +941,10 @@ EXPORT_SYMBOL_GPL(raw_seq_stop); | |||
943 | static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) | 941 | static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) |
944 | { | 942 | { |
945 | struct inet_sock *inet = inet_sk(sp); | 943 | struct inet_sock *inet = inet_sk(sp); |
946 | __be32 dest = inet->daddr, | 944 | __be32 dest = inet->inet_daddr, |
947 | src = inet->rcv_saddr; | 945 | src = inet->inet_rcv_saddr; |
948 | __u16 destp = 0, | 946 | __u16 destp = 0, |
949 | srcp = inet->num; | 947 | srcp = inet->inet_num; |
950 | 948 | ||
951 | seq_printf(seq, "%4d: %08X:%04X %08X:%04X" | 949 | seq_printf(seq, "%4d: %08X:%04X %08X:%04X" |
952 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", | 950 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5b1050a5d874..cb562fdd9b9a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -90,6 +90,7 @@ | |||
90 | #include <linux/jhash.h> | 90 | #include <linux/jhash.h> |
91 | #include <linux/rcupdate.h> | 91 | #include <linux/rcupdate.h> |
92 | #include <linux/times.h> | 92 | #include <linux/times.h> |
93 | #include <linux/slab.h> | ||
93 | #include <net/dst.h> | 94 | #include <net/dst.h> |
94 | #include <net/net_namespace.h> | 95 | #include <net/net_namespace.h> |
95 | #include <net/protocol.h> | 96 | #include <net/protocol.h> |
@@ -146,7 +147,6 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); | |||
146 | static void ipv4_link_failure(struct sk_buff *skb); | 147 | static void ipv4_link_failure(struct sk_buff *skb); |
147 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); | 148 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); |
148 | static int rt_garbage_collect(struct dst_ops *ops); | 149 | static int rt_garbage_collect(struct dst_ops *ops); |
149 | static void rt_emergency_hash_rebuild(struct net *net); | ||
150 | 150 | ||
151 | 151 | ||
152 | static struct dst_ops ipv4_dst_ops = { | 152 | static struct dst_ops ipv4_dst_ops = { |
@@ -287,12 +287,12 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) | |||
287 | if (!rt_hash_table[st->bucket].chain) | 287 | if (!rt_hash_table[st->bucket].chain) |
288 | continue; | 288 | continue; |
289 | rcu_read_lock_bh(); | 289 | rcu_read_lock_bh(); |
290 | r = rcu_dereference(rt_hash_table[st->bucket].chain); | 290 | r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); |
291 | while (r) { | 291 | while (r) { |
292 | if (dev_net(r->u.dst.dev) == seq_file_net(seq) && | 292 | if (dev_net(r->u.dst.dev) == seq_file_net(seq) && |
293 | r->rt_genid == st->genid) | 293 | r->rt_genid == st->genid) |
294 | return r; | 294 | return r; |
295 | r = rcu_dereference(r->u.dst.rt_next); | 295 | r = rcu_dereference_bh(r->u.dst.rt_next); |
296 | } | 296 | } |
297 | rcu_read_unlock_bh(); | 297 | rcu_read_unlock_bh(); |
298 | } | 298 | } |
@@ -314,7 +314,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, | |||
314 | rcu_read_lock_bh(); | 314 | rcu_read_lock_bh(); |
315 | r = rt_hash_table[st->bucket].chain; | 315 | r = rt_hash_table[st->bucket].chain; |
316 | } | 316 | } |
317 | return rcu_dereference(r); | 317 | return rcu_dereference_bh(r); |
318 | } | 318 | } |
319 | 319 | ||
320 | static struct rtable *rt_cache_get_next(struct seq_file *seq, | 320 | static struct rtable *rt_cache_get_next(struct seq_file *seq, |
@@ -513,43 +513,42 @@ static const struct file_operations rt_cpu_seq_fops = { | |||
513 | }; | 513 | }; |
514 | 514 | ||
515 | #ifdef CONFIG_NET_CLS_ROUTE | 515 | #ifdef CONFIG_NET_CLS_ROUTE |
516 | static int ip_rt_acct_read(char *buffer, char **start, off_t offset, | 516 | static int rt_acct_proc_show(struct seq_file *m, void *v) |
517 | int length, int *eof, void *data) | 517 | { |
518 | { | 518 | struct ip_rt_acct *dst, *src; |
519 | unsigned int i; | 519 | unsigned int i, j; |
520 | 520 | ||
521 | if ((offset & 3) || (length & 3)) | 521 | dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL); |
522 | return -EIO; | 522 | if (!dst) |
523 | 523 | return -ENOMEM; | |
524 | if (offset >= sizeof(struct ip_rt_acct) * 256) { | 524 | |
525 | *eof = 1; | 525 | for_each_possible_cpu(i) { |
526 | return 0; | 526 | src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i); |
527 | } | 527 | for (j = 0; j < 256; j++) { |
528 | 528 | dst[j].o_bytes += src[j].o_bytes; | |
529 | if (offset + length >= sizeof(struct ip_rt_acct) * 256) { | 529 | dst[j].o_packets += src[j].o_packets; |
530 | length = sizeof(struct ip_rt_acct) * 256 - offset; | 530 | dst[j].i_bytes += src[j].i_bytes; |
531 | *eof = 1; | 531 | dst[j].i_packets += src[j].i_packets; |
532 | } | ||
532 | } | 533 | } |
533 | 534 | ||
534 | offset /= sizeof(u32); | 535 | seq_write(m, dst, 256 * sizeof(struct ip_rt_acct)); |
535 | 536 | kfree(dst); | |
536 | if (length > 0) { | 537 | return 0; |
537 | u32 *dst = (u32 *) buffer; | 538 | } |
538 | |||
539 | *start = buffer; | ||
540 | memset(dst, 0, length); | ||
541 | |||
542 | for_each_possible_cpu(i) { | ||
543 | unsigned int j; | ||
544 | u32 *src; | ||
545 | 539 | ||
546 | src = ((u32 *) per_cpu_ptr(ip_rt_acct, i)) + offset; | 540 | static int rt_acct_proc_open(struct inode *inode, struct file *file) |
547 | for (j = 0; j < length/4; j++) | 541 | { |
548 | dst[j] += src[j]; | 542 | return single_open(file, rt_acct_proc_show, NULL); |
549 | } | ||
550 | } | ||
551 | return length; | ||
552 | } | 543 | } |
544 | |||
545 | static const struct file_operations rt_acct_proc_fops = { | ||
546 | .owner = THIS_MODULE, | ||
547 | .open = rt_acct_proc_open, | ||
548 | .read = seq_read, | ||
549 | .llseek = seq_lseek, | ||
550 | .release = single_release, | ||
551 | }; | ||
553 | #endif | 552 | #endif |
554 | 553 | ||
555 | static int __net_init ip_rt_do_proc_init(struct net *net) | 554 | static int __net_init ip_rt_do_proc_init(struct net *net) |
@@ -567,8 +566,7 @@ static int __net_init ip_rt_do_proc_init(struct net *net) | |||
567 | goto err2; | 566 | goto err2; |
568 | 567 | ||
569 | #ifdef CONFIG_NET_CLS_ROUTE | 568 | #ifdef CONFIG_NET_CLS_ROUTE |
570 | pde = create_proc_read_entry("rt_acct", 0, net->proc_net, | 569 | pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); |
571 | ip_rt_acct_read, NULL); | ||
572 | if (!pde) | 570 | if (!pde) |
573 | goto err3; | 571 | goto err3; |
574 | #endif | 572 | #endif |
@@ -588,7 +586,9 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net) | |||
588 | { | 586 | { |
589 | remove_proc_entry("rt_cache", net->proc_net_stat); | 587 | remove_proc_entry("rt_cache", net->proc_net_stat); |
590 | remove_proc_entry("rt_cache", net->proc_net); | 588 | remove_proc_entry("rt_cache", net->proc_net); |
589 | #ifdef CONFIG_NET_CLS_ROUTE | ||
591 | remove_proc_entry("rt_acct", net->proc_net); | 590 | remove_proc_entry("rt_acct", net->proc_net); |
591 | #endif | ||
592 | } | 592 | } |
593 | 593 | ||
594 | static struct pernet_operations ip_rt_proc_ops __net_initdata = { | 594 | static struct pernet_operations ip_rt_proc_ops __net_initdata = { |
@@ -703,7 +703,7 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) | |||
703 | 703 | ||
704 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) | 704 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) |
705 | { | 705 | { |
706 | return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev); | 706 | return net_eq(dev_net(rt1->u.dst.dev), dev_net(rt2->u.dst.dev)); |
707 | } | 707 | } |
708 | 708 | ||
709 | static inline int rt_is_expired(struct rtable *rth) | 709 | static inline int rt_is_expired(struct rtable *rth) |
@@ -780,11 +780,30 @@ static void rt_do_flush(int process_context) | |||
780 | #define FRACT_BITS 3 | 780 | #define FRACT_BITS 3 |
781 | #define ONE (1UL << FRACT_BITS) | 781 | #define ONE (1UL << FRACT_BITS) |
782 | 782 | ||
783 | /* | ||
784 | * Given a hash chain and an item in this hash chain, | ||
785 | * find if a previous entry has the same hash_inputs | ||
786 | * (but differs on tos, mark or oif) | ||
787 | * Returns 0 if an alias is found. | ||
788 | * Returns ONE if rth has no alias before itself. | ||
789 | */ | ||
790 | static int has_noalias(const struct rtable *head, const struct rtable *rth) | ||
791 | { | ||
792 | const struct rtable *aux = head; | ||
793 | |||
794 | while (aux != rth) { | ||
795 | if (compare_hash_inputs(&aux->fl, &rth->fl)) | ||
796 | return 0; | ||
797 | aux = aux->u.dst.rt_next; | ||
798 | } | ||
799 | return ONE; | ||
800 | } | ||
801 | |||
783 | static void rt_check_expire(void) | 802 | static void rt_check_expire(void) |
784 | { | 803 | { |
785 | static unsigned int rover; | 804 | static unsigned int rover; |
786 | unsigned int i = rover, goal; | 805 | unsigned int i = rover, goal; |
787 | struct rtable *rth, *aux, **rthp; | 806 | struct rtable *rth, **rthp; |
788 | unsigned long samples = 0; | 807 | unsigned long samples = 0; |
789 | unsigned long sum = 0, sum2 = 0; | 808 | unsigned long sum = 0, sum2 = 0; |
790 | unsigned long delta; | 809 | unsigned long delta; |
@@ -835,15 +854,7 @@ nofree: | |||
835 | * attributes don't unfairly skew | 854 | * attributes don't unfairly skew |
836 | * the length computation | 855 | * the length computation |
837 | */ | 856 | */ |
838 | for (aux = rt_hash_table[i].chain;;) { | 857 | length += has_noalias(rt_hash_table[i].chain, rth); |
839 | if (aux == rth) { | ||
840 | length += ONE; | ||
841 | break; | ||
842 | } | ||
843 | if (compare_hash_inputs(&aux->fl, &rth->fl)) | ||
844 | break; | ||
845 | aux = aux->u.dst.rt_next; | ||
846 | } | ||
847 | continue; | 858 | continue; |
848 | } | 859 | } |
849 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) | 860 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) |
@@ -902,6 +913,12 @@ void rt_cache_flush(struct net *net, int delay) | |||
902 | rt_do_flush(!in_softirq()); | 913 | rt_do_flush(!in_softirq()); |
903 | } | 914 | } |
904 | 915 | ||
916 | /* Flush previous cache invalidated entries from the cache */ | ||
917 | void rt_cache_flush_batch(void) | ||
918 | { | ||
919 | rt_do_flush(!in_softirq()); | ||
920 | } | ||
921 | |||
905 | /* | 922 | /* |
906 | * We change rt_genid and let gc do the cleanup | 923 | * We change rt_genid and let gc do the cleanup |
907 | */ | 924 | */ |
@@ -916,10 +933,8 @@ static void rt_secret_rebuild_oneshot(struct net *net) | |||
916 | { | 933 | { |
917 | del_timer_sync(&net->ipv4.rt_secret_timer); | 934 | del_timer_sync(&net->ipv4.rt_secret_timer); |
918 | rt_cache_invalidate(net); | 935 | rt_cache_invalidate(net); |
919 | if (ip_rt_secret_interval) { | 936 | if (ip_rt_secret_interval) |
920 | net->ipv4.rt_secret_timer.expires += ip_rt_secret_interval; | 937 | mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); |
921 | add_timer(&net->ipv4.rt_secret_timer); | ||
922 | } | ||
923 | } | 938 | } |
924 | 939 | ||
925 | static void rt_emergency_hash_rebuild(struct net *net) | 940 | static void rt_emergency_hash_rebuild(struct net *net) |
@@ -1067,8 +1082,23 @@ work_done: | |||
1067 | out: return 0; | 1082 | out: return 0; |
1068 | } | 1083 | } |
1069 | 1084 | ||
1085 | /* | ||
1086 | * Returns number of entries in a hash chain that have different hash_inputs | ||
1087 | */ | ||
1088 | static int slow_chain_length(const struct rtable *head) | ||
1089 | { | ||
1090 | int length = 0; | ||
1091 | const struct rtable *rth = head; | ||
1092 | |||
1093 | while (rth) { | ||
1094 | length += has_noalias(head, rth); | ||
1095 | rth = rth->u.dst.rt_next; | ||
1096 | } | ||
1097 | return length >> FRACT_BITS; | ||
1098 | } | ||
1099 | |||
1070 | static int rt_intern_hash(unsigned hash, struct rtable *rt, | 1100 | static int rt_intern_hash(unsigned hash, struct rtable *rt, |
1071 | struct rtable **rp, struct sk_buff *skb) | 1101 | struct rtable **rp, struct sk_buff *skb, int ifindex) |
1072 | { | 1102 | { |
1073 | struct rtable *rth, **rthp; | 1103 | struct rtable *rth, **rthp; |
1074 | unsigned long now; | 1104 | unsigned long now; |
@@ -1179,14 +1209,20 @@ restart: | |||
1179 | rt_free(cand); | 1209 | rt_free(cand); |
1180 | } | 1210 | } |
1181 | } else { | 1211 | } else { |
1182 | if (chain_length > rt_chain_length_max) { | 1212 | if (chain_length > rt_chain_length_max && |
1213 | slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { | ||
1183 | struct net *net = dev_net(rt->u.dst.dev); | 1214 | struct net *net = dev_net(rt->u.dst.dev); |
1184 | int num = ++net->ipv4.current_rt_cache_rebuild_count; | 1215 | int num = ++net->ipv4.current_rt_cache_rebuild_count; |
1185 | if (!rt_caching(dev_net(rt->u.dst.dev))) { | 1216 | if (!rt_caching(net)) { |
1186 | printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", | 1217 | printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", |
1187 | rt->u.dst.dev->name, num); | 1218 | rt->u.dst.dev->name, num); |
1188 | } | 1219 | } |
1189 | rt_emergency_hash_rebuild(dev_net(rt->u.dst.dev)); | 1220 | rt_emergency_hash_rebuild(net); |
1221 | spin_unlock_bh(rt_hash_lock_addr(hash)); | ||
1222 | |||
1223 | hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, | ||
1224 | ifindex, rt_genid(net)); | ||
1225 | goto restart; | ||
1190 | } | 1226 | } |
1191 | } | 1227 | } |
1192 | 1228 | ||
@@ -1346,9 +1382,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1346 | return; | 1382 | return; |
1347 | 1383 | ||
1348 | net = dev_net(dev); | 1384 | net = dev_net(dev); |
1349 | if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) | 1385 | if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) || |
1350 | || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) | 1386 | ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) || |
1351 | || ipv4_is_zeronet(new_gw)) | 1387 | ipv4_is_zeronet(new_gw)) |
1352 | goto reject_redirect; | 1388 | goto reject_redirect; |
1353 | 1389 | ||
1354 | if (!rt_caching(net)) | 1390 | if (!rt_caching(net)) |
@@ -1411,7 +1447,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1411 | dev_hold(rt->u.dst.dev); | 1447 | dev_hold(rt->u.dst.dev); |
1412 | if (rt->idev) | 1448 | if (rt->idev) |
1413 | in_dev_hold(rt->idev); | 1449 | in_dev_hold(rt->idev); |
1414 | rt->u.dst.obsolete = 0; | 1450 | rt->u.dst.obsolete = -1; |
1415 | rt->u.dst.lastuse = jiffies; | 1451 | rt->u.dst.lastuse = jiffies; |
1416 | rt->u.dst.path = &rt->u.dst; | 1452 | rt->u.dst.path = &rt->u.dst; |
1417 | rt->u.dst.neighbour = NULL; | 1453 | rt->u.dst.neighbour = NULL; |
@@ -1447,7 +1483,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1447 | &netevent); | 1483 | &netevent); |
1448 | 1484 | ||
1449 | rt_del(hash, rth); | 1485 | rt_del(hash, rth); |
1450 | if (!rt_intern_hash(hash, rt, &rt, NULL)) | 1486 | if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif)) |
1451 | ip_rt_put(rt); | 1487 | ip_rt_put(rt); |
1452 | goto do_next; | 1488 | goto do_next; |
1453 | } | 1489 | } |
@@ -1476,11 +1512,12 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1476 | struct dst_entry *ret = dst; | 1512 | struct dst_entry *ret = dst; |
1477 | 1513 | ||
1478 | if (rt) { | 1514 | if (rt) { |
1479 | if (dst->obsolete) { | 1515 | if (dst->obsolete > 0) { |
1480 | ip_rt_put(rt); | 1516 | ip_rt_put(rt); |
1481 | ret = NULL; | 1517 | ret = NULL; |
1482 | } else if ((rt->rt_flags & RTCF_REDIRECTED) || | 1518 | } else if ((rt->rt_flags & RTCF_REDIRECTED) || |
1483 | rt->u.dst.expires) { | 1519 | (rt->u.dst.expires && |
1520 | time_after_eq(jiffies, rt->u.dst.expires))) { | ||
1484 | unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, | 1521 | unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, |
1485 | rt->fl.oif, | 1522 | rt->fl.oif, |
1486 | rt_genid(dev_net(dst->dev))); | 1523 | rt_genid(dev_net(dst->dev))); |
@@ -1628,9 +1665,6 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
1628 | __be32 daddr = iph->daddr; | 1665 | __be32 daddr = iph->daddr; |
1629 | unsigned short est_mtu = 0; | 1666 | unsigned short est_mtu = 0; |
1630 | 1667 | ||
1631 | if (ipv4_config.no_pmtu_disc) | ||
1632 | return 0; | ||
1633 | |||
1634 | for (k = 0; k < 2; k++) { | 1668 | for (k = 0; k < 2; k++) { |
1635 | for (i = 0; i < 2; i++) { | 1669 | for (i = 0; i < 2; i++) { |
1636 | unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], | 1670 | unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], |
@@ -1699,7 +1733,9 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) | |||
1699 | 1733 | ||
1700 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) | 1734 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) |
1701 | { | 1735 | { |
1702 | return NULL; | 1736 | if (rt_is_expired((struct rtable *)dst)) |
1737 | return NULL; | ||
1738 | return dst; | ||
1703 | } | 1739 | } |
1704 | 1740 | ||
1705 | static void ipv4_dst_destroy(struct dst_entry *dst) | 1741 | static void ipv4_dst_destroy(struct dst_entry *dst) |
@@ -1861,7 +1897,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1861 | if (!rth) | 1897 | if (!rth) |
1862 | goto e_nobufs; | 1898 | goto e_nobufs; |
1863 | 1899 | ||
1864 | rth->u.dst.output= ip_rt_bug; | 1900 | rth->u.dst.output = ip_rt_bug; |
1901 | rth->u.dst.obsolete = -1; | ||
1865 | 1902 | ||
1866 | atomic_set(&rth->u.dst.__refcnt, 1); | 1903 | atomic_set(&rth->u.dst.__refcnt, 1); |
1867 | rth->u.dst.flags= DST_HOST; | 1904 | rth->u.dst.flags= DST_HOST; |
@@ -1900,7 +1937,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1900 | 1937 | ||
1901 | in_dev_put(in_dev); | 1938 | in_dev_put(in_dev); |
1902 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); | 1939 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); |
1903 | return rt_intern_hash(hash, rth, NULL, skb); | 1940 | return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); |
1904 | 1941 | ||
1905 | e_nobufs: | 1942 | e_nobufs: |
1906 | in_dev_put(in_dev); | 1943 | in_dev_put(in_dev); |
@@ -1987,8 +2024,13 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1987 | if (skb->protocol != htons(ETH_P_IP)) { | 2024 | if (skb->protocol != htons(ETH_P_IP)) { |
1988 | /* Not IP (i.e. ARP). Do not create route, if it is | 2025 | /* Not IP (i.e. ARP). Do not create route, if it is |
1989 | * invalid for proxy arp. DNAT routes are always valid. | 2026 | * invalid for proxy arp. DNAT routes are always valid. |
2027 | * | ||
2028 | * Proxy arp feature have been extended to allow, ARP | ||
2029 | * replies back to the same interface, to support | ||
2030 | * Private VLAN switch technologies. See arp.c. | ||
1990 | */ | 2031 | */ |
1991 | if (out_dev == in_dev) { | 2032 | if (out_dev == in_dev && |
2033 | IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) { | ||
1992 | err = -EINVAL; | 2034 | err = -EINVAL; |
1993 | goto cleanup; | 2035 | goto cleanup; |
1994 | } | 2036 | } |
@@ -2022,6 +2064,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2022 | rth->fl.oif = 0; | 2064 | rth->fl.oif = 0; |
2023 | rth->rt_spec_dst= spec_dst; | 2065 | rth->rt_spec_dst= spec_dst; |
2024 | 2066 | ||
2067 | rth->u.dst.obsolete = -1; | ||
2025 | rth->u.dst.input = ip_forward; | 2068 | rth->u.dst.input = ip_forward; |
2026 | rth->u.dst.output = ip_output; | 2069 | rth->u.dst.output = ip_output; |
2027 | rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); | 2070 | rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); |
@@ -2061,7 +2104,7 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
2061 | /* put it into the cache */ | 2104 | /* put it into the cache */ |
2062 | hash = rt_hash(daddr, saddr, fl->iif, | 2105 | hash = rt_hash(daddr, saddr, fl->iif, |
2063 | rt_genid(dev_net(rth->u.dst.dev))); | 2106 | rt_genid(dev_net(rth->u.dst.dev))); |
2064 | return rt_intern_hash(hash, rth, NULL, skb); | 2107 | return rt_intern_hash(hash, rth, NULL, skb, fl->iif); |
2065 | } | 2108 | } |
2066 | 2109 | ||
2067 | /* | 2110 | /* |
@@ -2186,6 +2229,7 @@ local_input: | |||
2186 | goto e_nobufs; | 2229 | goto e_nobufs; |
2187 | 2230 | ||
2188 | rth->u.dst.output= ip_rt_bug; | 2231 | rth->u.dst.output= ip_rt_bug; |
2232 | rth->u.dst.obsolete = -1; | ||
2189 | rth->rt_genid = rt_genid(net); | 2233 | rth->rt_genid = rt_genid(net); |
2190 | 2234 | ||
2191 | atomic_set(&rth->u.dst.__refcnt, 1); | 2235 | atomic_set(&rth->u.dst.__refcnt, 1); |
@@ -2217,7 +2261,7 @@ local_input: | |||
2217 | } | 2261 | } |
2218 | rth->rt_type = res.type; | 2262 | rth->rt_type = res.type; |
2219 | hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); | 2263 | hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); |
2220 | err = rt_intern_hash(hash, rth, NULL, skb); | 2264 | err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); |
2221 | goto done; | 2265 | goto done; |
2222 | 2266 | ||
2223 | no_route: | 2267 | no_route: |
@@ -2314,10 +2358,11 @@ skip_cache: | |||
2314 | ip_hdr(skb)->protocol); | 2358 | ip_hdr(skb)->protocol); |
2315 | if (our | 2359 | if (our |
2316 | #ifdef CONFIG_IP_MROUTE | 2360 | #ifdef CONFIG_IP_MROUTE |
2317 | || (!ipv4_is_local_multicast(daddr) && | 2361 | || |
2318 | IN_DEV_MFORWARD(in_dev)) | 2362 | (!ipv4_is_local_multicast(daddr) && |
2363 | IN_DEV_MFORWARD(in_dev)) | ||
2319 | #endif | 2364 | #endif |
2320 | ) { | 2365 | ) { |
2321 | rcu_read_unlock(); | 2366 | rcu_read_unlock(); |
2322 | return ip_route_input_mc(skb, daddr, saddr, | 2367 | return ip_route_input_mc(skb, daddr, saddr, |
2323 | tos, dev, our); | 2368 | tos, dev, our); |
@@ -2411,6 +2456,7 @@ static int __mkroute_output(struct rtable **result, | |||
2411 | rth->rt_spec_dst= fl->fl4_src; | 2456 | rth->rt_spec_dst= fl->fl4_src; |
2412 | 2457 | ||
2413 | rth->u.dst.output=ip_output; | 2458 | rth->u.dst.output=ip_output; |
2459 | rth->u.dst.obsolete = -1; | ||
2414 | rth->rt_genid = rt_genid(dev_net(dev_out)); | 2460 | rth->rt_genid = rt_genid(dev_net(dev_out)); |
2415 | 2461 | ||
2416 | RT_CACHE_STAT_INC(out_slow_tot); | 2462 | RT_CACHE_STAT_INC(out_slow_tot); |
@@ -2462,7 +2508,7 @@ static int ip_mkroute_output(struct rtable **rp, | |||
2462 | if (err == 0) { | 2508 | if (err == 0) { |
2463 | hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, | 2509 | hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, |
2464 | rt_genid(dev_net(dev_out))); | 2510 | rt_genid(dev_net(dev_out))); |
2465 | err = rt_intern_hash(hash, rth, rp, NULL); | 2511 | err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif); |
2466 | } | 2512 | } |
2467 | 2513 | ||
2468 | return err; | 2514 | return err; |
@@ -2514,9 +2560,9 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2514 | of another iface. --ANK | 2560 | of another iface. --ANK |
2515 | */ | 2561 | */ |
2516 | 2562 | ||
2517 | if (oldflp->oif == 0 | 2563 | if (oldflp->oif == 0 && |
2518 | && (ipv4_is_multicast(oldflp->fl4_dst) || | 2564 | (ipv4_is_multicast(oldflp->fl4_dst) || |
2519 | oldflp->fl4_dst == htonl(0xFFFFFFFF))) { | 2565 | oldflp->fl4_dst == htonl(0xFFFFFFFF))) { |
2520 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ | 2566 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ |
2521 | dev_out = ip_dev_find(net, oldflp->fl4_src); | 2567 | dev_out = ip_dev_find(net, oldflp->fl4_src); |
2522 | if (dev_out == NULL) | 2568 | if (dev_out == NULL) |
@@ -2685,8 +2731,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
2685 | hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); | 2731 | hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); |
2686 | 2732 | ||
2687 | rcu_read_lock_bh(); | 2733 | rcu_read_lock_bh(); |
2688 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 2734 | for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; |
2689 | rth = rcu_dereference(rth->u.dst.rt_next)) { | 2735 | rth = rcu_dereference_bh(rth->u.dst.rt_next)) { |
2690 | if (rth->fl.fl4_dst == flp->fl4_dst && | 2736 | if (rth->fl.fl4_dst == flp->fl4_dst && |
2691 | rth->fl.fl4_src == flp->fl4_src && | 2737 | rth->fl.fl4_src == flp->fl4_src && |
2692 | rth->fl.iif == 0 && | 2738 | rth->fl.iif == 0 && |
@@ -2855,7 +2901,7 @@ static int rt_fill_info(struct net *net, | |||
2855 | error = rt->u.dst.error; | 2901 | error = rt->u.dst.error; |
2856 | expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; | 2902 | expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; |
2857 | if (rt->peer) { | 2903 | if (rt->peer) { |
2858 | id = rt->peer->ip_id_count; | 2904 | id = atomic_read(&rt->peer->ip_id_count) & 0xffff; |
2859 | if (rt->peer->tcp_ts_stamp) { | 2905 | if (rt->peer->tcp_ts_stamp) { |
2860 | ts = rt->peer->tcp_ts; | 2906 | ts = rt->peer->tcp_ts; |
2861 | tsage = get_seconds() - rt->peer->tcp_ts_stamp; | 2907 | tsage = get_seconds() - rt->peer->tcp_ts_stamp; |
@@ -3004,8 +3050,8 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
3004 | if (!rt_hash_table[h].chain) | 3050 | if (!rt_hash_table[h].chain) |
3005 | continue; | 3051 | continue; |
3006 | rcu_read_lock_bh(); | 3052 | rcu_read_lock_bh(); |
3007 | for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; | 3053 | for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; |
3008 | rt = rcu_dereference(rt->u.dst.rt_next), idx++) { | 3054 | rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) { |
3009 | if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) | 3055 | if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) |
3010 | continue; | 3056 | continue; |
3011 | if (rt_is_expired(rt)) | 3057 | if (rt_is_expired(rt)) |
@@ -3056,23 +3102,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, | |||
3056 | return -EINVAL; | 3102 | return -EINVAL; |
3057 | } | 3103 | } |
3058 | 3104 | ||
3059 | static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, | ||
3060 | void __user *oldval, | ||
3061 | size_t __user *oldlenp, | ||
3062 | void __user *newval, | ||
3063 | size_t newlen) | ||
3064 | { | ||
3065 | int delay; | ||
3066 | struct net *net; | ||
3067 | if (newlen != sizeof(int)) | ||
3068 | return -EINVAL; | ||
3069 | if (get_user(delay, (int __user *)newval)) | ||
3070 | return -EFAULT; | ||
3071 | net = (struct net *)table->extra1; | ||
3072 | rt_cache_flush(net, delay); | ||
3073 | return 0; | ||
3074 | } | ||
3075 | |||
3076 | static void rt_secret_reschedule(int old) | 3105 | static void rt_secret_reschedule(int old) |
3077 | { | 3106 | { |
3078 | struct net *net; | 3107 | struct net *net; |
@@ -3085,22 +3114,20 @@ static void rt_secret_reschedule(int old) | |||
3085 | rtnl_lock(); | 3114 | rtnl_lock(); |
3086 | for_each_net(net) { | 3115 | for_each_net(net) { |
3087 | int deleted = del_timer_sync(&net->ipv4.rt_secret_timer); | 3116 | int deleted = del_timer_sync(&net->ipv4.rt_secret_timer); |
3117 | long time; | ||
3088 | 3118 | ||
3089 | if (!new) | 3119 | if (!new) |
3090 | continue; | 3120 | continue; |
3091 | 3121 | ||
3092 | if (deleted) { | 3122 | if (deleted) { |
3093 | long time = net->ipv4.rt_secret_timer.expires - jiffies; | 3123 | time = net->ipv4.rt_secret_timer.expires - jiffies; |
3094 | 3124 | ||
3095 | if (time <= 0 || (time += diff) <= 0) | 3125 | if (time <= 0 || (time += diff) <= 0) |
3096 | time = 0; | 3126 | time = 0; |
3097 | |||
3098 | net->ipv4.rt_secret_timer.expires = time; | ||
3099 | } else | 3127 | } else |
3100 | net->ipv4.rt_secret_timer.expires = new; | 3128 | time = new; |
3101 | 3129 | ||
3102 | net->ipv4.rt_secret_timer.expires += jiffies; | 3130 | mod_timer(&net->ipv4.rt_secret_timer, jiffies + time); |
3103 | add_timer(&net->ipv4.rt_secret_timer); | ||
3104 | } | 3131 | } |
3105 | rtnl_unlock(); | 3132 | rtnl_unlock(); |
3106 | } | 3133 | } |
@@ -3117,23 +3144,8 @@ static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, | |||
3117 | return ret; | 3144 | return ret; |
3118 | } | 3145 | } |
3119 | 3146 | ||
3120 | static int ipv4_sysctl_rt_secret_interval_strategy(ctl_table *table, | ||
3121 | void __user *oldval, | ||
3122 | size_t __user *oldlenp, | ||
3123 | void __user *newval, | ||
3124 | size_t newlen) | ||
3125 | { | ||
3126 | int old = ip_rt_secret_interval; | ||
3127 | int ret = sysctl_jiffies(table, oldval, oldlenp, newval, newlen); | ||
3128 | |||
3129 | rt_secret_reschedule(old); | ||
3130 | |||
3131 | return ret; | ||
3132 | } | ||
3133 | |||
3134 | static ctl_table ipv4_route_table[] = { | 3147 | static ctl_table ipv4_route_table[] = { |
3135 | { | 3148 | { |
3136 | .ctl_name = NET_IPV4_ROUTE_GC_THRESH, | ||
3137 | .procname = "gc_thresh", | 3149 | .procname = "gc_thresh", |
3138 | .data = &ipv4_dst_ops.gc_thresh, | 3150 | .data = &ipv4_dst_ops.gc_thresh, |
3139 | .maxlen = sizeof(int), | 3151 | .maxlen = sizeof(int), |
@@ -3141,7 +3153,6 @@ static ctl_table ipv4_route_table[] = { | |||
3141 | .proc_handler = proc_dointvec, | 3153 | .proc_handler = proc_dointvec, |
3142 | }, | 3154 | }, |
3143 | { | 3155 | { |
3144 | .ctl_name = NET_IPV4_ROUTE_MAX_SIZE, | ||
3145 | .procname = "max_size", | 3156 | .procname = "max_size", |
3146 | .data = &ip_rt_max_size, | 3157 | .data = &ip_rt_max_size, |
3147 | .maxlen = sizeof(int), | 3158 | .maxlen = sizeof(int), |
@@ -3151,43 +3162,34 @@ static ctl_table ipv4_route_table[] = { | |||
3151 | { | 3162 | { |
3152 | /* Deprecated. Use gc_min_interval_ms */ | 3163 | /* Deprecated. Use gc_min_interval_ms */ |
3153 | 3164 | ||
3154 | .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL, | ||
3155 | .procname = "gc_min_interval", | 3165 | .procname = "gc_min_interval", |
3156 | .data = &ip_rt_gc_min_interval, | 3166 | .data = &ip_rt_gc_min_interval, |
3157 | .maxlen = sizeof(int), | 3167 | .maxlen = sizeof(int), |
3158 | .mode = 0644, | 3168 | .mode = 0644, |
3159 | .proc_handler = proc_dointvec_jiffies, | 3169 | .proc_handler = proc_dointvec_jiffies, |
3160 | .strategy = sysctl_jiffies, | ||
3161 | }, | 3170 | }, |
3162 | { | 3171 | { |
3163 | .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, | ||
3164 | .procname = "gc_min_interval_ms", | 3172 | .procname = "gc_min_interval_ms", |
3165 | .data = &ip_rt_gc_min_interval, | 3173 | .data = &ip_rt_gc_min_interval, |
3166 | .maxlen = sizeof(int), | 3174 | .maxlen = sizeof(int), |
3167 | .mode = 0644, | 3175 | .mode = 0644, |
3168 | .proc_handler = proc_dointvec_ms_jiffies, | 3176 | .proc_handler = proc_dointvec_ms_jiffies, |
3169 | .strategy = sysctl_ms_jiffies, | ||
3170 | }, | 3177 | }, |
3171 | { | 3178 | { |
3172 | .ctl_name = NET_IPV4_ROUTE_GC_TIMEOUT, | ||
3173 | .procname = "gc_timeout", | 3179 | .procname = "gc_timeout", |
3174 | .data = &ip_rt_gc_timeout, | 3180 | .data = &ip_rt_gc_timeout, |
3175 | .maxlen = sizeof(int), | 3181 | .maxlen = sizeof(int), |
3176 | .mode = 0644, | 3182 | .mode = 0644, |
3177 | .proc_handler = proc_dointvec_jiffies, | 3183 | .proc_handler = proc_dointvec_jiffies, |
3178 | .strategy = sysctl_jiffies, | ||
3179 | }, | 3184 | }, |
3180 | { | 3185 | { |
3181 | .ctl_name = NET_IPV4_ROUTE_GC_INTERVAL, | ||
3182 | .procname = "gc_interval", | 3186 | .procname = "gc_interval", |
3183 | .data = &ip_rt_gc_interval, | 3187 | .data = &ip_rt_gc_interval, |
3184 | .maxlen = sizeof(int), | 3188 | .maxlen = sizeof(int), |
3185 | .mode = 0644, | 3189 | .mode = 0644, |
3186 | .proc_handler = proc_dointvec_jiffies, | 3190 | .proc_handler = proc_dointvec_jiffies, |
3187 | .strategy = sysctl_jiffies, | ||
3188 | }, | 3191 | }, |
3189 | { | 3192 | { |
3190 | .ctl_name = NET_IPV4_ROUTE_REDIRECT_LOAD, | ||
3191 | .procname = "redirect_load", | 3193 | .procname = "redirect_load", |
3192 | .data = &ip_rt_redirect_load, | 3194 | .data = &ip_rt_redirect_load, |
3193 | .maxlen = sizeof(int), | 3195 | .maxlen = sizeof(int), |
@@ -3195,7 +3197,6 @@ static ctl_table ipv4_route_table[] = { | |||
3195 | .proc_handler = proc_dointvec, | 3197 | .proc_handler = proc_dointvec, |
3196 | }, | 3198 | }, |
3197 | { | 3199 | { |
3198 | .ctl_name = NET_IPV4_ROUTE_REDIRECT_NUMBER, | ||
3199 | .procname = "redirect_number", | 3200 | .procname = "redirect_number", |
3200 | .data = &ip_rt_redirect_number, | 3201 | .data = &ip_rt_redirect_number, |
3201 | .maxlen = sizeof(int), | 3202 | .maxlen = sizeof(int), |
@@ -3203,7 +3204,6 @@ static ctl_table ipv4_route_table[] = { | |||
3203 | .proc_handler = proc_dointvec, | 3204 | .proc_handler = proc_dointvec, |
3204 | }, | 3205 | }, |
3205 | { | 3206 | { |
3206 | .ctl_name = NET_IPV4_ROUTE_REDIRECT_SILENCE, | ||
3207 | .procname = "redirect_silence", | 3207 | .procname = "redirect_silence", |
3208 | .data = &ip_rt_redirect_silence, | 3208 | .data = &ip_rt_redirect_silence, |
3209 | .maxlen = sizeof(int), | 3209 | .maxlen = sizeof(int), |
@@ -3211,7 +3211,6 @@ static ctl_table ipv4_route_table[] = { | |||
3211 | .proc_handler = proc_dointvec, | 3211 | .proc_handler = proc_dointvec, |
3212 | }, | 3212 | }, |
3213 | { | 3213 | { |
3214 | .ctl_name = NET_IPV4_ROUTE_ERROR_COST, | ||
3215 | .procname = "error_cost", | 3214 | .procname = "error_cost", |
3216 | .data = &ip_rt_error_cost, | 3215 | .data = &ip_rt_error_cost, |
3217 | .maxlen = sizeof(int), | 3216 | .maxlen = sizeof(int), |
@@ -3219,7 +3218,6 @@ static ctl_table ipv4_route_table[] = { | |||
3219 | .proc_handler = proc_dointvec, | 3218 | .proc_handler = proc_dointvec, |
3220 | }, | 3219 | }, |
3221 | { | 3220 | { |
3222 | .ctl_name = NET_IPV4_ROUTE_ERROR_BURST, | ||
3223 | .procname = "error_burst", | 3221 | .procname = "error_burst", |
3224 | .data = &ip_rt_error_burst, | 3222 | .data = &ip_rt_error_burst, |
3225 | .maxlen = sizeof(int), | 3223 | .maxlen = sizeof(int), |
@@ -3227,7 +3225,6 @@ static ctl_table ipv4_route_table[] = { | |||
3227 | .proc_handler = proc_dointvec, | 3225 | .proc_handler = proc_dointvec, |
3228 | }, | 3226 | }, |
3229 | { | 3227 | { |
3230 | .ctl_name = NET_IPV4_ROUTE_GC_ELASTICITY, | ||
3231 | .procname = "gc_elasticity", | 3228 | .procname = "gc_elasticity", |
3232 | .data = &ip_rt_gc_elasticity, | 3229 | .data = &ip_rt_gc_elasticity, |
3233 | .maxlen = sizeof(int), | 3230 | .maxlen = sizeof(int), |
@@ -3235,16 +3232,13 @@ static ctl_table ipv4_route_table[] = { | |||
3235 | .proc_handler = proc_dointvec, | 3232 | .proc_handler = proc_dointvec, |
3236 | }, | 3233 | }, |
3237 | { | 3234 | { |
3238 | .ctl_name = NET_IPV4_ROUTE_MTU_EXPIRES, | ||
3239 | .procname = "mtu_expires", | 3235 | .procname = "mtu_expires", |
3240 | .data = &ip_rt_mtu_expires, | 3236 | .data = &ip_rt_mtu_expires, |
3241 | .maxlen = sizeof(int), | 3237 | .maxlen = sizeof(int), |
3242 | .mode = 0644, | 3238 | .mode = 0644, |
3243 | .proc_handler = proc_dointvec_jiffies, | 3239 | .proc_handler = proc_dointvec_jiffies, |
3244 | .strategy = sysctl_jiffies, | ||
3245 | }, | 3240 | }, |
3246 | { | 3241 | { |
3247 | .ctl_name = NET_IPV4_ROUTE_MIN_PMTU, | ||
3248 | .procname = "min_pmtu", | 3242 | .procname = "min_pmtu", |
3249 | .data = &ip_rt_min_pmtu, | 3243 | .data = &ip_rt_min_pmtu, |
3250 | .maxlen = sizeof(int), | 3244 | .maxlen = sizeof(int), |
@@ -3252,7 +3246,6 @@ static ctl_table ipv4_route_table[] = { | |||
3252 | .proc_handler = proc_dointvec, | 3246 | .proc_handler = proc_dointvec, |
3253 | }, | 3247 | }, |
3254 | { | 3248 | { |
3255 | .ctl_name = NET_IPV4_ROUTE_MIN_ADVMSS, | ||
3256 | .procname = "min_adv_mss", | 3249 | .procname = "min_adv_mss", |
3257 | .data = &ip_rt_min_advmss, | 3250 | .data = &ip_rt_min_advmss, |
3258 | .maxlen = sizeof(int), | 3251 | .maxlen = sizeof(int), |
@@ -3260,50 +3253,46 @@ static ctl_table ipv4_route_table[] = { | |||
3260 | .proc_handler = proc_dointvec, | 3253 | .proc_handler = proc_dointvec, |
3261 | }, | 3254 | }, |
3262 | { | 3255 | { |
3263 | .ctl_name = NET_IPV4_ROUTE_SECRET_INTERVAL, | ||
3264 | .procname = "secret_interval", | 3256 | .procname = "secret_interval", |
3265 | .data = &ip_rt_secret_interval, | 3257 | .data = &ip_rt_secret_interval, |
3266 | .maxlen = sizeof(int), | 3258 | .maxlen = sizeof(int), |
3267 | .mode = 0644, | 3259 | .mode = 0644, |
3268 | .proc_handler = ipv4_sysctl_rt_secret_interval, | 3260 | .proc_handler = ipv4_sysctl_rt_secret_interval, |
3269 | .strategy = ipv4_sysctl_rt_secret_interval_strategy, | ||
3270 | }, | 3261 | }, |
3271 | { .ctl_name = 0 } | 3262 | { } |
3272 | }; | 3263 | }; |
3273 | 3264 | ||
3274 | static struct ctl_table empty[1]; | 3265 | static struct ctl_table empty[1]; |
3275 | 3266 | ||
3276 | static struct ctl_table ipv4_skeleton[] = | 3267 | static struct ctl_table ipv4_skeleton[] = |
3277 | { | 3268 | { |
3278 | { .procname = "route", .ctl_name = NET_IPV4_ROUTE, | 3269 | { .procname = "route", |
3279 | .mode = 0555, .child = ipv4_route_table}, | 3270 | .mode = 0555, .child = ipv4_route_table}, |
3280 | { .procname = "neigh", .ctl_name = NET_IPV4_NEIGH, | 3271 | { .procname = "neigh", |
3281 | .mode = 0555, .child = empty}, | 3272 | .mode = 0555, .child = empty}, |
3282 | { } | 3273 | { } |
3283 | }; | 3274 | }; |
3284 | 3275 | ||
3285 | static __net_initdata struct ctl_path ipv4_path[] = { | 3276 | static __net_initdata struct ctl_path ipv4_path[] = { |
3286 | { .procname = "net", .ctl_name = CTL_NET, }, | 3277 | { .procname = "net", }, |
3287 | { .procname = "ipv4", .ctl_name = NET_IPV4, }, | 3278 | { .procname = "ipv4", }, |
3288 | { }, | 3279 | { }, |
3289 | }; | 3280 | }; |
3290 | 3281 | ||
3291 | static struct ctl_table ipv4_route_flush_table[] = { | 3282 | static struct ctl_table ipv4_route_flush_table[] = { |
3292 | { | 3283 | { |
3293 | .ctl_name = NET_IPV4_ROUTE_FLUSH, | ||
3294 | .procname = "flush", | 3284 | .procname = "flush", |
3295 | .maxlen = sizeof(int), | 3285 | .maxlen = sizeof(int), |
3296 | .mode = 0200, | 3286 | .mode = 0200, |
3297 | .proc_handler = ipv4_sysctl_rtcache_flush, | 3287 | .proc_handler = ipv4_sysctl_rtcache_flush, |
3298 | .strategy = ipv4_sysctl_rtcache_flush_strategy, | ||
3299 | }, | 3288 | }, |
3300 | { .ctl_name = 0 }, | 3289 | { }, |
3301 | }; | 3290 | }; |
3302 | 3291 | ||
3303 | static __net_initdata struct ctl_path ipv4_route_path[] = { | 3292 | static __net_initdata struct ctl_path ipv4_route_path[] = { |
3304 | { .procname = "net", .ctl_name = CTL_NET, }, | 3293 | { .procname = "net", }, |
3305 | { .procname = "ipv4", .ctl_name = NET_IPV4, }, | 3294 | { .procname = "ipv4", }, |
3306 | { .procname = "route", .ctl_name = NET_IPV4_ROUTE, }, | 3295 | { .procname = "route", }, |
3307 | { }, | 3296 | { }, |
3308 | }; | 3297 | }; |
3309 | 3298 | ||
@@ -3312,7 +3301,7 @@ static __net_init int sysctl_route_net_init(struct net *net) | |||
3312 | struct ctl_table *tbl; | 3301 | struct ctl_table *tbl; |
3313 | 3302 | ||
3314 | tbl = ipv4_route_flush_table; | 3303 | tbl = ipv4_route_flush_table; |
3315 | if (net != &init_net) { | 3304 | if (!net_eq(net, &init_net)) { |
3316 | tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL); | 3305 | tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL); |
3317 | if (tbl == NULL) | 3306 | if (tbl == NULL) |
3318 | goto err_dup; | 3307 | goto err_dup; |
@@ -3380,7 +3369,7 @@ static __net_initdata struct pernet_operations rt_secret_timer_ops = { | |||
3380 | 3369 | ||
3381 | 3370 | ||
3382 | #ifdef CONFIG_NET_CLS_ROUTE | 3371 | #ifdef CONFIG_NET_CLS_ROUTE |
3383 | struct ip_rt_acct *ip_rt_acct __read_mostly; | 3372 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; |
3384 | #endif /* CONFIG_NET_CLS_ROUTE */ | 3373 | #endif /* CONFIG_NET_CLS_ROUTE */ |
3385 | 3374 | ||
3386 | static __initdata unsigned long rhash_entries; | 3375 | static __initdata unsigned long rhash_entries; |
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index a6e0e077ac33..5c24db4a3c91 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
@@ -253,6 +253,8 @@ EXPORT_SYMBOL(cookie_check_timestamp); | |||
253 | struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | 253 | struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, |
254 | struct ip_options *opt) | 254 | struct ip_options *opt) |
255 | { | 255 | { |
256 | struct tcp_options_received tcp_opt; | ||
257 | u8 *hash_location; | ||
256 | struct inet_request_sock *ireq; | 258 | struct inet_request_sock *ireq; |
257 | struct tcp_request_sock *treq; | 259 | struct tcp_request_sock *treq; |
258 | struct tcp_sock *tp = tcp_sk(sk); | 260 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -263,7 +265,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
263 | int mss; | 265 | int mss; |
264 | struct rtable *rt; | 266 | struct rtable *rt; |
265 | __u8 rcv_wscale; | 267 | __u8 rcv_wscale; |
266 | struct tcp_options_received tcp_opt; | ||
267 | 268 | ||
268 | if (!sysctl_tcp_syncookies || !th->ack) | 269 | if (!sysctl_tcp_syncookies || !th->ack) |
269 | goto out; | 270 | goto out; |
@@ -278,7 +279,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
278 | 279 | ||
279 | /* check for timestamp cookie support */ | 280 | /* check for timestamp cookie support */ |
280 | memset(&tcp_opt, 0, sizeof(tcp_opt)); | 281 | memset(&tcp_opt, 0, sizeof(tcp_opt)); |
281 | tcp_parse_options(skb, &tcp_opt, 0); | 282 | tcp_parse_options(skb, &tcp_opt, &hash_location, 0); |
282 | 283 | ||
283 | if (tcp_opt.saw_tstamp) | 284 | if (tcp_opt.saw_tstamp) |
284 | cookie_check_timestamp(&tcp_opt); | 285 | cookie_check_timestamp(&tcp_opt); |
@@ -333,7 +334,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
333 | * no easy way to do this. | 334 | * no easy way to do this. |
334 | */ | 335 | */ |
335 | { | 336 | { |
336 | struct flowi fl = { .nl_u = { .ip4_u = | 337 | struct flowi fl = { .mark = sk->sk_mark, |
338 | .nl_u = { .ip4_u = | ||
337 | { .daddr = ((opt && opt->srr) ? | 339 | { .daddr = ((opt && opt->srr) ? |
338 | opt->faddr : | 340 | opt->faddr : |
339 | ireq->rmt_addr), | 341 | ireq->rmt_addr), |
@@ -356,7 +358,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
356 | 358 | ||
357 | tcp_select_initial_window(tcp_full_space(sk), req->mss, | 359 | tcp_select_initial_window(tcp_full_space(sk), req->mss, |
358 | &req->rcv_wnd, &req->window_clamp, | 360 | &req->rcv_wnd, &req->window_clamp, |
359 | ireq->wscale_ok, &rcv_wscale); | 361 | ireq->wscale_ok, &rcv_wscale, |
362 | dst_metric(&rt->u.dst, RTAX_INITRWND)); | ||
360 | 363 | ||
361 | ireq->rcv_wscale = rcv_wscale; | 364 | ireq->rcv_wscale = rcv_wscale; |
362 | 365 | ||
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 2dcf04d9b005..1cd5c15174b8 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/inetdevice.h> | 12 | #include <linux/inetdevice.h> |
13 | #include <linux/seqlock.h> | 13 | #include <linux/seqlock.h> |
14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <linux/slab.h> | ||
15 | #include <net/snmp.h> | 16 | #include <net/snmp.h> |
16 | #include <net/icmp.h> | 17 | #include <net/icmp.h> |
17 | #include <net/ip.h> | 18 | #include <net/ip.h> |
@@ -63,34 +64,6 @@ static int ipv4_local_port_range(ctl_table *table, int write, | |||
63 | return ret; | 64 | return ret; |
64 | } | 65 | } |
65 | 66 | ||
66 | /* Validate changes from sysctl interface. */ | ||
67 | static int ipv4_sysctl_local_port_range(ctl_table *table, | ||
68 | void __user *oldval, | ||
69 | size_t __user *oldlenp, | ||
70 | void __user *newval, size_t newlen) | ||
71 | { | ||
72 | int ret; | ||
73 | int range[2]; | ||
74 | ctl_table tmp = { | ||
75 | .data = &range, | ||
76 | .maxlen = sizeof(range), | ||
77 | .mode = table->mode, | ||
78 | .extra1 = &ip_local_port_range_min, | ||
79 | .extra2 = &ip_local_port_range_max, | ||
80 | }; | ||
81 | |||
82 | inet_get_local_port_range(range, range + 1); | ||
83 | ret = sysctl_intvec(&tmp, oldval, oldlenp, newval, newlen); | ||
84 | if (ret == 0 && newval && newlen) { | ||
85 | if (range[1] < range[0]) | ||
86 | ret = -EINVAL; | ||
87 | else | ||
88 | set_local_port_range(range); | ||
89 | } | ||
90 | return ret; | ||
91 | } | ||
92 | |||
93 | |||
94 | static int proc_tcp_congestion_control(ctl_table *ctl, int write, | 67 | static int proc_tcp_congestion_control(ctl_table *ctl, int write, |
95 | void __user *buffer, size_t *lenp, loff_t *ppos) | 68 | void __user *buffer, size_t *lenp, loff_t *ppos) |
96 | { | 69 | { |
@@ -109,25 +82,6 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, | |||
109 | return ret; | 82 | return ret; |
110 | } | 83 | } |
111 | 84 | ||
112 | static int sysctl_tcp_congestion_control(ctl_table *table, | ||
113 | void __user *oldval, | ||
114 | size_t __user *oldlenp, | ||
115 | void __user *newval, size_t newlen) | ||
116 | { | ||
117 | char val[TCP_CA_NAME_MAX]; | ||
118 | ctl_table tbl = { | ||
119 | .data = val, | ||
120 | .maxlen = TCP_CA_NAME_MAX, | ||
121 | }; | ||
122 | int ret; | ||
123 | |||
124 | tcp_get_default_congestion_control(val); | ||
125 | ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen); | ||
126 | if (ret == 1 && newval && newlen) | ||
127 | ret = tcp_set_default_congestion_control(val); | ||
128 | return ret; | ||
129 | } | ||
130 | |||
131 | static int proc_tcp_available_congestion_control(ctl_table *ctl, | 85 | static int proc_tcp_available_congestion_control(ctl_table *ctl, |
132 | int write, | 86 | int write, |
133 | void __user *buffer, size_t *lenp, | 87 | void __user *buffer, size_t *lenp, |
@@ -165,32 +119,8 @@ static int proc_allowed_congestion_control(ctl_table *ctl, | |||
165 | return ret; | 119 | return ret; |
166 | } | 120 | } |
167 | 121 | ||
168 | static int strategy_allowed_congestion_control(ctl_table *table, | ||
169 | void __user *oldval, | ||
170 | size_t __user *oldlenp, | ||
171 | void __user *newval, | ||
172 | size_t newlen) | ||
173 | { | ||
174 | ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX }; | ||
175 | int ret; | ||
176 | |||
177 | tbl.data = kmalloc(tbl.maxlen, GFP_USER); | ||
178 | if (!tbl.data) | ||
179 | return -ENOMEM; | ||
180 | |||
181 | tcp_get_available_congestion_control(tbl.data, tbl.maxlen); | ||
182 | ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen); | ||
183 | if (ret == 1 && newval && newlen) | ||
184 | ret = tcp_set_allowed_congestion_control(tbl.data); | ||
185 | kfree(tbl.data); | ||
186 | |||
187 | return ret; | ||
188 | |||
189 | } | ||
190 | |||
191 | static struct ctl_table ipv4_table[] = { | 122 | static struct ctl_table ipv4_table[] = { |
192 | { | 123 | { |
193 | .ctl_name = NET_IPV4_TCP_TIMESTAMPS, | ||
194 | .procname = "tcp_timestamps", | 124 | .procname = "tcp_timestamps", |
195 | .data = &sysctl_tcp_timestamps, | 125 | .data = &sysctl_tcp_timestamps, |
196 | .maxlen = sizeof(int), | 126 | .maxlen = sizeof(int), |
@@ -198,7 +128,6 @@ static struct ctl_table ipv4_table[] = { | |||
198 | .proc_handler = proc_dointvec | 128 | .proc_handler = proc_dointvec |
199 | }, | 129 | }, |
200 | { | 130 | { |
201 | .ctl_name = NET_IPV4_TCP_WINDOW_SCALING, | ||
202 | .procname = "tcp_window_scaling", | 131 | .procname = "tcp_window_scaling", |
203 | .data = &sysctl_tcp_window_scaling, | 132 | .data = &sysctl_tcp_window_scaling, |
204 | .maxlen = sizeof(int), | 133 | .maxlen = sizeof(int), |
@@ -206,7 +135,6 @@ static struct ctl_table ipv4_table[] = { | |||
206 | .proc_handler = proc_dointvec | 135 | .proc_handler = proc_dointvec |
207 | }, | 136 | }, |
208 | { | 137 | { |
209 | .ctl_name = NET_IPV4_TCP_SACK, | ||
210 | .procname = "tcp_sack", | 138 | .procname = "tcp_sack", |
211 | .data = &sysctl_tcp_sack, | 139 | .data = &sysctl_tcp_sack, |
212 | .maxlen = sizeof(int), | 140 | .maxlen = sizeof(int), |
@@ -214,7 +142,6 @@ static struct ctl_table ipv4_table[] = { | |||
214 | .proc_handler = proc_dointvec | 142 | .proc_handler = proc_dointvec |
215 | }, | 143 | }, |
216 | { | 144 | { |
217 | .ctl_name = NET_IPV4_TCP_RETRANS_COLLAPSE, | ||
218 | .procname = "tcp_retrans_collapse", | 145 | .procname = "tcp_retrans_collapse", |
219 | .data = &sysctl_tcp_retrans_collapse, | 146 | .data = &sysctl_tcp_retrans_collapse, |
220 | .maxlen = sizeof(int), | 147 | .maxlen = sizeof(int), |
@@ -222,17 +149,14 @@ static struct ctl_table ipv4_table[] = { | |||
222 | .proc_handler = proc_dointvec | 149 | .proc_handler = proc_dointvec |
223 | }, | 150 | }, |
224 | { | 151 | { |
225 | .ctl_name = NET_IPV4_DEFAULT_TTL, | ||
226 | .procname = "ip_default_ttl", | 152 | .procname = "ip_default_ttl", |
227 | .data = &sysctl_ip_default_ttl, | 153 | .data = &sysctl_ip_default_ttl, |
228 | .maxlen = sizeof(int), | 154 | .maxlen = sizeof(int), |
229 | .mode = 0644, | 155 | .mode = 0644, |
230 | .proc_handler = ipv4_doint_and_flush, | 156 | .proc_handler = ipv4_doint_and_flush, |
231 | .strategy = ipv4_doint_and_flush_strategy, | ||
232 | .extra2 = &init_net, | 157 | .extra2 = &init_net, |
233 | }, | 158 | }, |
234 | { | 159 | { |
235 | .ctl_name = NET_IPV4_NO_PMTU_DISC, | ||
236 | .procname = "ip_no_pmtu_disc", | 160 | .procname = "ip_no_pmtu_disc", |
237 | .data = &ipv4_config.no_pmtu_disc, | 161 | .data = &ipv4_config.no_pmtu_disc, |
238 | .maxlen = sizeof(int), | 162 | .maxlen = sizeof(int), |
@@ -240,7 +164,6 @@ static struct ctl_table ipv4_table[] = { | |||
240 | .proc_handler = proc_dointvec | 164 | .proc_handler = proc_dointvec |
241 | }, | 165 | }, |
242 | { | 166 | { |
243 | .ctl_name = NET_IPV4_NONLOCAL_BIND, | ||
244 | .procname = "ip_nonlocal_bind", | 167 | .procname = "ip_nonlocal_bind", |
245 | .data = &sysctl_ip_nonlocal_bind, | 168 | .data = &sysctl_ip_nonlocal_bind, |
246 | .maxlen = sizeof(int), | 169 | .maxlen = sizeof(int), |
@@ -248,7 +171,6 @@ static struct ctl_table ipv4_table[] = { | |||
248 | .proc_handler = proc_dointvec | 171 | .proc_handler = proc_dointvec |
249 | }, | 172 | }, |
250 | { | 173 | { |
251 | .ctl_name = NET_IPV4_TCP_SYN_RETRIES, | ||
252 | .procname = "tcp_syn_retries", | 174 | .procname = "tcp_syn_retries", |
253 | .data = &sysctl_tcp_syn_retries, | 175 | .data = &sysctl_tcp_syn_retries, |
254 | .maxlen = sizeof(int), | 176 | .maxlen = sizeof(int), |
@@ -256,7 +178,6 @@ static struct ctl_table ipv4_table[] = { | |||
256 | .proc_handler = proc_dointvec | 178 | .proc_handler = proc_dointvec |
257 | }, | 179 | }, |
258 | { | 180 | { |
259 | .ctl_name = NET_TCP_SYNACK_RETRIES, | ||
260 | .procname = "tcp_synack_retries", | 181 | .procname = "tcp_synack_retries", |
261 | .data = &sysctl_tcp_synack_retries, | 182 | .data = &sysctl_tcp_synack_retries, |
262 | .maxlen = sizeof(int), | 183 | .maxlen = sizeof(int), |
@@ -264,7 +185,6 @@ static struct ctl_table ipv4_table[] = { | |||
264 | .proc_handler = proc_dointvec | 185 | .proc_handler = proc_dointvec |
265 | }, | 186 | }, |
266 | { | 187 | { |
267 | .ctl_name = NET_TCP_MAX_ORPHANS, | ||
268 | .procname = "tcp_max_orphans", | 188 | .procname = "tcp_max_orphans", |
269 | .data = &sysctl_tcp_max_orphans, | 189 | .data = &sysctl_tcp_max_orphans, |
270 | .maxlen = sizeof(int), | 190 | .maxlen = sizeof(int), |
@@ -272,7 +192,6 @@ static struct ctl_table ipv4_table[] = { | |||
272 | .proc_handler = proc_dointvec | 192 | .proc_handler = proc_dointvec |
273 | }, | 193 | }, |
274 | { | 194 | { |
275 | .ctl_name = NET_TCP_MAX_TW_BUCKETS, | ||
276 | .procname = "tcp_max_tw_buckets", | 195 | .procname = "tcp_max_tw_buckets", |
277 | .data = &tcp_death_row.sysctl_max_tw_buckets, | 196 | .data = &tcp_death_row.sysctl_max_tw_buckets, |
278 | .maxlen = sizeof(int), | 197 | .maxlen = sizeof(int), |
@@ -280,7 +199,6 @@ static struct ctl_table ipv4_table[] = { | |||
280 | .proc_handler = proc_dointvec | 199 | .proc_handler = proc_dointvec |
281 | }, | 200 | }, |
282 | { | 201 | { |
283 | .ctl_name = NET_IPV4_DYNADDR, | ||
284 | .procname = "ip_dynaddr", | 202 | .procname = "ip_dynaddr", |
285 | .data = &sysctl_ip_dynaddr, | 203 | .data = &sysctl_ip_dynaddr, |
286 | .maxlen = sizeof(int), | 204 | .maxlen = sizeof(int), |
@@ -288,16 +206,13 @@ static struct ctl_table ipv4_table[] = { | |||
288 | .proc_handler = proc_dointvec | 206 | .proc_handler = proc_dointvec |
289 | }, | 207 | }, |
290 | { | 208 | { |
291 | .ctl_name = NET_IPV4_TCP_KEEPALIVE_TIME, | ||
292 | .procname = "tcp_keepalive_time", | 209 | .procname = "tcp_keepalive_time", |
293 | .data = &sysctl_tcp_keepalive_time, | 210 | .data = &sysctl_tcp_keepalive_time, |
294 | .maxlen = sizeof(int), | 211 | .maxlen = sizeof(int), |
295 | .mode = 0644, | 212 | .mode = 0644, |
296 | .proc_handler = proc_dointvec_jiffies, | 213 | .proc_handler = proc_dointvec_jiffies, |
297 | .strategy = sysctl_jiffies | ||
298 | }, | 214 | }, |
299 | { | 215 | { |
300 | .ctl_name = NET_IPV4_TCP_KEEPALIVE_PROBES, | ||
301 | .procname = "tcp_keepalive_probes", | 216 | .procname = "tcp_keepalive_probes", |
302 | .data = &sysctl_tcp_keepalive_probes, | 217 | .data = &sysctl_tcp_keepalive_probes, |
303 | .maxlen = sizeof(int), | 218 | .maxlen = sizeof(int), |
@@ -305,26 +220,21 @@ static struct ctl_table ipv4_table[] = { | |||
305 | .proc_handler = proc_dointvec | 220 | .proc_handler = proc_dointvec |
306 | }, | 221 | }, |
307 | { | 222 | { |
308 | .ctl_name = NET_IPV4_TCP_KEEPALIVE_INTVL, | ||
309 | .procname = "tcp_keepalive_intvl", | 223 | .procname = "tcp_keepalive_intvl", |
310 | .data = &sysctl_tcp_keepalive_intvl, | 224 | .data = &sysctl_tcp_keepalive_intvl, |
311 | .maxlen = sizeof(int), | 225 | .maxlen = sizeof(int), |
312 | .mode = 0644, | 226 | .mode = 0644, |
313 | .proc_handler = proc_dointvec_jiffies, | 227 | .proc_handler = proc_dointvec_jiffies, |
314 | .strategy = sysctl_jiffies | ||
315 | }, | 228 | }, |
316 | { | 229 | { |
317 | .ctl_name = NET_IPV4_TCP_RETRIES1, | ||
318 | .procname = "tcp_retries1", | 230 | .procname = "tcp_retries1", |
319 | .data = &sysctl_tcp_retries1, | 231 | .data = &sysctl_tcp_retries1, |
320 | .maxlen = sizeof(int), | 232 | .maxlen = sizeof(int), |
321 | .mode = 0644, | 233 | .mode = 0644, |
322 | .proc_handler = proc_dointvec_minmax, | 234 | .proc_handler = proc_dointvec_minmax, |
323 | .strategy = sysctl_intvec, | ||
324 | .extra2 = &tcp_retr1_max | 235 | .extra2 = &tcp_retr1_max |
325 | }, | 236 | }, |
326 | { | 237 | { |
327 | .ctl_name = NET_IPV4_TCP_RETRIES2, | ||
328 | .procname = "tcp_retries2", | 238 | .procname = "tcp_retries2", |
329 | .data = &sysctl_tcp_retries2, | 239 | .data = &sysctl_tcp_retries2, |
330 | .maxlen = sizeof(int), | 240 | .maxlen = sizeof(int), |
@@ -332,17 +242,14 @@ static struct ctl_table ipv4_table[] = { | |||
332 | .proc_handler = proc_dointvec | 242 | .proc_handler = proc_dointvec |
333 | }, | 243 | }, |
334 | { | 244 | { |
335 | .ctl_name = NET_IPV4_TCP_FIN_TIMEOUT, | ||
336 | .procname = "tcp_fin_timeout", | 245 | .procname = "tcp_fin_timeout", |
337 | .data = &sysctl_tcp_fin_timeout, | 246 | .data = &sysctl_tcp_fin_timeout, |
338 | .maxlen = sizeof(int), | 247 | .maxlen = sizeof(int), |
339 | .mode = 0644, | 248 | .mode = 0644, |
340 | .proc_handler = proc_dointvec_jiffies, | 249 | .proc_handler = proc_dointvec_jiffies, |
341 | .strategy = sysctl_jiffies | ||
342 | }, | 250 | }, |
343 | #ifdef CONFIG_SYN_COOKIES | 251 | #ifdef CONFIG_SYN_COOKIES |
344 | { | 252 | { |
345 | .ctl_name = NET_TCP_SYNCOOKIES, | ||
346 | .procname = "tcp_syncookies", | 253 | .procname = "tcp_syncookies", |
347 | .data = &sysctl_tcp_syncookies, | 254 | .data = &sysctl_tcp_syncookies, |
348 | .maxlen = sizeof(int), | 255 | .maxlen = sizeof(int), |
@@ -351,7 +258,6 @@ static struct ctl_table ipv4_table[] = { | |||
351 | }, | 258 | }, |
352 | #endif | 259 | #endif |
353 | { | 260 | { |
354 | .ctl_name = NET_TCP_TW_RECYCLE, | ||
355 | .procname = "tcp_tw_recycle", | 261 | .procname = "tcp_tw_recycle", |
356 | .data = &tcp_death_row.sysctl_tw_recycle, | 262 | .data = &tcp_death_row.sysctl_tw_recycle, |
357 | .maxlen = sizeof(int), | 263 | .maxlen = sizeof(int), |
@@ -359,7 +265,6 @@ static struct ctl_table ipv4_table[] = { | |||
359 | .proc_handler = proc_dointvec | 265 | .proc_handler = proc_dointvec |
360 | }, | 266 | }, |
361 | { | 267 | { |
362 | .ctl_name = NET_TCP_ABORT_ON_OVERFLOW, | ||
363 | .procname = "tcp_abort_on_overflow", | 268 | .procname = "tcp_abort_on_overflow", |
364 | .data = &sysctl_tcp_abort_on_overflow, | 269 | .data = &sysctl_tcp_abort_on_overflow, |
365 | .maxlen = sizeof(int), | 270 | .maxlen = sizeof(int), |
@@ -367,7 +272,6 @@ static struct ctl_table ipv4_table[] = { | |||
367 | .proc_handler = proc_dointvec | 272 | .proc_handler = proc_dointvec |
368 | }, | 273 | }, |
369 | { | 274 | { |
370 | .ctl_name = NET_TCP_STDURG, | ||
371 | .procname = "tcp_stdurg", | 275 | .procname = "tcp_stdurg", |
372 | .data = &sysctl_tcp_stdurg, | 276 | .data = &sysctl_tcp_stdurg, |
373 | .maxlen = sizeof(int), | 277 | .maxlen = sizeof(int), |
@@ -375,7 +279,6 @@ static struct ctl_table ipv4_table[] = { | |||
375 | .proc_handler = proc_dointvec | 279 | .proc_handler = proc_dointvec |
376 | }, | 280 | }, |
377 | { | 281 | { |
378 | .ctl_name = NET_TCP_RFC1337, | ||
379 | .procname = "tcp_rfc1337", | 282 | .procname = "tcp_rfc1337", |
380 | .data = &sysctl_tcp_rfc1337, | 283 | .data = &sysctl_tcp_rfc1337, |
381 | .maxlen = sizeof(int), | 284 | .maxlen = sizeof(int), |
@@ -383,7 +286,6 @@ static struct ctl_table ipv4_table[] = { | |||
383 | .proc_handler = proc_dointvec | 286 | .proc_handler = proc_dointvec |
384 | }, | 287 | }, |
385 | { | 288 | { |
386 | .ctl_name = NET_TCP_MAX_SYN_BACKLOG, | ||
387 | .procname = "tcp_max_syn_backlog", | 289 | .procname = "tcp_max_syn_backlog", |
388 | .data = &sysctl_max_syn_backlog, | 290 | .data = &sysctl_max_syn_backlog, |
389 | .maxlen = sizeof(int), | 291 | .maxlen = sizeof(int), |
@@ -391,17 +293,14 @@ static struct ctl_table ipv4_table[] = { | |||
391 | .proc_handler = proc_dointvec | 293 | .proc_handler = proc_dointvec |
392 | }, | 294 | }, |
393 | { | 295 | { |
394 | .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, | ||
395 | .procname = "ip_local_port_range", | 296 | .procname = "ip_local_port_range", |
396 | .data = &sysctl_local_ports.range, | 297 | .data = &sysctl_local_ports.range, |
397 | .maxlen = sizeof(sysctl_local_ports.range), | 298 | .maxlen = sizeof(sysctl_local_ports.range), |
398 | .mode = 0644, | 299 | .mode = 0644, |
399 | .proc_handler = ipv4_local_port_range, | 300 | .proc_handler = ipv4_local_port_range, |
400 | .strategy = ipv4_sysctl_local_port_range, | ||
401 | }, | 301 | }, |
402 | #ifdef CONFIG_IP_MULTICAST | 302 | #ifdef CONFIG_IP_MULTICAST |
403 | { | 303 | { |
404 | .ctl_name = NET_IPV4_IGMP_MAX_MEMBERSHIPS, | ||
405 | .procname = "igmp_max_memberships", | 304 | .procname = "igmp_max_memberships", |
406 | .data = &sysctl_igmp_max_memberships, | 305 | .data = &sysctl_igmp_max_memberships, |
407 | .maxlen = sizeof(int), | 306 | .maxlen = sizeof(int), |
@@ -411,7 +310,6 @@ static struct ctl_table ipv4_table[] = { | |||
411 | 310 | ||
412 | #endif | 311 | #endif |
413 | { | 312 | { |
414 | .ctl_name = NET_IPV4_IGMP_MAX_MSF, | ||
415 | .procname = "igmp_max_msf", | 313 | .procname = "igmp_max_msf", |
416 | .data = &sysctl_igmp_max_msf, | 314 | .data = &sysctl_igmp_max_msf, |
417 | .maxlen = sizeof(int), | 315 | .maxlen = sizeof(int), |
@@ -419,7 +317,6 @@ static struct ctl_table ipv4_table[] = { | |||
419 | .proc_handler = proc_dointvec | 317 | .proc_handler = proc_dointvec |
420 | }, | 318 | }, |
421 | { | 319 | { |
422 | .ctl_name = NET_IPV4_INET_PEER_THRESHOLD, | ||
423 | .procname = "inet_peer_threshold", | 320 | .procname = "inet_peer_threshold", |
424 | .data = &inet_peer_threshold, | 321 | .data = &inet_peer_threshold, |
425 | .maxlen = sizeof(int), | 322 | .maxlen = sizeof(int), |
@@ -427,43 +324,34 @@ static struct ctl_table ipv4_table[] = { | |||
427 | .proc_handler = proc_dointvec | 324 | .proc_handler = proc_dointvec |
428 | }, | 325 | }, |
429 | { | 326 | { |
430 | .ctl_name = NET_IPV4_INET_PEER_MINTTL, | ||
431 | .procname = "inet_peer_minttl", | 327 | .procname = "inet_peer_minttl", |
432 | .data = &inet_peer_minttl, | 328 | .data = &inet_peer_minttl, |
433 | .maxlen = sizeof(int), | 329 | .maxlen = sizeof(int), |
434 | .mode = 0644, | 330 | .mode = 0644, |
435 | .proc_handler = proc_dointvec_jiffies, | 331 | .proc_handler = proc_dointvec_jiffies, |
436 | .strategy = sysctl_jiffies | ||
437 | }, | 332 | }, |
438 | { | 333 | { |
439 | .ctl_name = NET_IPV4_INET_PEER_MAXTTL, | ||
440 | .procname = "inet_peer_maxttl", | 334 | .procname = "inet_peer_maxttl", |
441 | .data = &inet_peer_maxttl, | 335 | .data = &inet_peer_maxttl, |
442 | .maxlen = sizeof(int), | 336 | .maxlen = sizeof(int), |
443 | .mode = 0644, | 337 | .mode = 0644, |
444 | .proc_handler = proc_dointvec_jiffies, | 338 | .proc_handler = proc_dointvec_jiffies, |
445 | .strategy = sysctl_jiffies | ||
446 | }, | 339 | }, |
447 | { | 340 | { |
448 | .ctl_name = NET_IPV4_INET_PEER_GC_MINTIME, | ||
449 | .procname = "inet_peer_gc_mintime", | 341 | .procname = "inet_peer_gc_mintime", |
450 | .data = &inet_peer_gc_mintime, | 342 | .data = &inet_peer_gc_mintime, |
451 | .maxlen = sizeof(int), | 343 | .maxlen = sizeof(int), |
452 | .mode = 0644, | 344 | .mode = 0644, |
453 | .proc_handler = proc_dointvec_jiffies, | 345 | .proc_handler = proc_dointvec_jiffies, |
454 | .strategy = sysctl_jiffies | ||
455 | }, | 346 | }, |
456 | { | 347 | { |
457 | .ctl_name = NET_IPV4_INET_PEER_GC_MAXTIME, | ||
458 | .procname = "inet_peer_gc_maxtime", | 348 | .procname = "inet_peer_gc_maxtime", |
459 | .data = &inet_peer_gc_maxtime, | 349 | .data = &inet_peer_gc_maxtime, |
460 | .maxlen = sizeof(int), | 350 | .maxlen = sizeof(int), |
461 | .mode = 0644, | 351 | .mode = 0644, |
462 | .proc_handler = proc_dointvec_jiffies, | 352 | .proc_handler = proc_dointvec_jiffies, |
463 | .strategy = sysctl_jiffies | ||
464 | }, | 353 | }, |
465 | { | 354 | { |
466 | .ctl_name = NET_TCP_ORPHAN_RETRIES, | ||
467 | .procname = "tcp_orphan_retries", | 355 | .procname = "tcp_orphan_retries", |
468 | .data = &sysctl_tcp_orphan_retries, | 356 | .data = &sysctl_tcp_orphan_retries, |
469 | .maxlen = sizeof(int), | 357 | .maxlen = sizeof(int), |
@@ -471,7 +359,6 @@ static struct ctl_table ipv4_table[] = { | |||
471 | .proc_handler = proc_dointvec | 359 | .proc_handler = proc_dointvec |
472 | }, | 360 | }, |
473 | { | 361 | { |
474 | .ctl_name = NET_TCP_FACK, | ||
475 | .procname = "tcp_fack", | 362 | .procname = "tcp_fack", |
476 | .data = &sysctl_tcp_fack, | 363 | .data = &sysctl_tcp_fack, |
477 | .maxlen = sizeof(int), | 364 | .maxlen = sizeof(int), |
@@ -479,7 +366,6 @@ static struct ctl_table ipv4_table[] = { | |||
479 | .proc_handler = proc_dointvec | 366 | .proc_handler = proc_dointvec |
480 | }, | 367 | }, |
481 | { | 368 | { |
482 | .ctl_name = NET_TCP_REORDERING, | ||
483 | .procname = "tcp_reordering", | 369 | .procname = "tcp_reordering", |
484 | .data = &sysctl_tcp_reordering, | 370 | .data = &sysctl_tcp_reordering, |
485 | .maxlen = sizeof(int), | 371 | .maxlen = sizeof(int), |
@@ -487,7 +373,6 @@ static struct ctl_table ipv4_table[] = { | |||
487 | .proc_handler = proc_dointvec | 373 | .proc_handler = proc_dointvec |
488 | }, | 374 | }, |
489 | { | 375 | { |
490 | .ctl_name = NET_TCP_ECN, | ||
491 | .procname = "tcp_ecn", | 376 | .procname = "tcp_ecn", |
492 | .data = &sysctl_tcp_ecn, | 377 | .data = &sysctl_tcp_ecn, |
493 | .maxlen = sizeof(int), | 378 | .maxlen = sizeof(int), |
@@ -495,7 +380,6 @@ static struct ctl_table ipv4_table[] = { | |||
495 | .proc_handler = proc_dointvec | 380 | .proc_handler = proc_dointvec |
496 | }, | 381 | }, |
497 | { | 382 | { |
498 | .ctl_name = NET_TCP_DSACK, | ||
499 | .procname = "tcp_dsack", | 383 | .procname = "tcp_dsack", |
500 | .data = &sysctl_tcp_dsack, | 384 | .data = &sysctl_tcp_dsack, |
501 | .maxlen = sizeof(int), | 385 | .maxlen = sizeof(int), |
@@ -503,7 +387,6 @@ static struct ctl_table ipv4_table[] = { | |||
503 | .proc_handler = proc_dointvec | 387 | .proc_handler = proc_dointvec |
504 | }, | 388 | }, |
505 | { | 389 | { |
506 | .ctl_name = NET_TCP_MEM, | ||
507 | .procname = "tcp_mem", | 390 | .procname = "tcp_mem", |
508 | .data = &sysctl_tcp_mem, | 391 | .data = &sysctl_tcp_mem, |
509 | .maxlen = sizeof(sysctl_tcp_mem), | 392 | .maxlen = sizeof(sysctl_tcp_mem), |
@@ -511,7 +394,6 @@ static struct ctl_table ipv4_table[] = { | |||
511 | .proc_handler = proc_dointvec | 394 | .proc_handler = proc_dointvec |
512 | }, | 395 | }, |
513 | { | 396 | { |
514 | .ctl_name = NET_TCP_WMEM, | ||
515 | .procname = "tcp_wmem", | 397 | .procname = "tcp_wmem", |
516 | .data = &sysctl_tcp_wmem, | 398 | .data = &sysctl_tcp_wmem, |
517 | .maxlen = sizeof(sysctl_tcp_wmem), | 399 | .maxlen = sizeof(sysctl_tcp_wmem), |
@@ -519,7 +401,6 @@ static struct ctl_table ipv4_table[] = { | |||
519 | .proc_handler = proc_dointvec | 401 | .proc_handler = proc_dointvec |
520 | }, | 402 | }, |
521 | { | 403 | { |
522 | .ctl_name = NET_TCP_RMEM, | ||
523 | .procname = "tcp_rmem", | 404 | .procname = "tcp_rmem", |
524 | .data = &sysctl_tcp_rmem, | 405 | .data = &sysctl_tcp_rmem, |
525 | .maxlen = sizeof(sysctl_tcp_rmem), | 406 | .maxlen = sizeof(sysctl_tcp_rmem), |
@@ -527,7 +408,6 @@ static struct ctl_table ipv4_table[] = { | |||
527 | .proc_handler = proc_dointvec | 408 | .proc_handler = proc_dointvec |
528 | }, | 409 | }, |
529 | { | 410 | { |
530 | .ctl_name = NET_TCP_APP_WIN, | ||
531 | .procname = "tcp_app_win", | 411 | .procname = "tcp_app_win", |
532 | .data = &sysctl_tcp_app_win, | 412 | .data = &sysctl_tcp_app_win, |
533 | .maxlen = sizeof(int), | 413 | .maxlen = sizeof(int), |
@@ -535,7 +415,6 @@ static struct ctl_table ipv4_table[] = { | |||
535 | .proc_handler = proc_dointvec | 415 | .proc_handler = proc_dointvec |
536 | }, | 416 | }, |
537 | { | 417 | { |
538 | .ctl_name = NET_TCP_ADV_WIN_SCALE, | ||
539 | .procname = "tcp_adv_win_scale", | 418 | .procname = "tcp_adv_win_scale", |
540 | .data = &sysctl_tcp_adv_win_scale, | 419 | .data = &sysctl_tcp_adv_win_scale, |
541 | .maxlen = sizeof(int), | 420 | .maxlen = sizeof(int), |
@@ -543,7 +422,6 @@ static struct ctl_table ipv4_table[] = { | |||
543 | .proc_handler = proc_dointvec | 422 | .proc_handler = proc_dointvec |
544 | }, | 423 | }, |
545 | { | 424 | { |
546 | .ctl_name = NET_TCP_TW_REUSE, | ||
547 | .procname = "tcp_tw_reuse", | 425 | .procname = "tcp_tw_reuse", |
548 | .data = &sysctl_tcp_tw_reuse, | 426 | .data = &sysctl_tcp_tw_reuse, |
549 | .maxlen = sizeof(int), | 427 | .maxlen = sizeof(int), |
@@ -551,7 +429,6 @@ static struct ctl_table ipv4_table[] = { | |||
551 | .proc_handler = proc_dointvec | 429 | .proc_handler = proc_dointvec |
552 | }, | 430 | }, |
553 | { | 431 | { |
554 | .ctl_name = NET_TCP_FRTO, | ||
555 | .procname = "tcp_frto", | 432 | .procname = "tcp_frto", |
556 | .data = &sysctl_tcp_frto, | 433 | .data = &sysctl_tcp_frto, |
557 | .maxlen = sizeof(int), | 434 | .maxlen = sizeof(int), |
@@ -559,7 +436,6 @@ static struct ctl_table ipv4_table[] = { | |||
559 | .proc_handler = proc_dointvec | 436 | .proc_handler = proc_dointvec |
560 | }, | 437 | }, |
561 | { | 438 | { |
562 | .ctl_name = NET_TCP_FRTO_RESPONSE, | ||
563 | .procname = "tcp_frto_response", | 439 | .procname = "tcp_frto_response", |
564 | .data = &sysctl_tcp_frto_response, | 440 | .data = &sysctl_tcp_frto_response, |
565 | .maxlen = sizeof(int), | 441 | .maxlen = sizeof(int), |
@@ -567,7 +443,6 @@ static struct ctl_table ipv4_table[] = { | |||
567 | .proc_handler = proc_dointvec | 443 | .proc_handler = proc_dointvec |
568 | }, | 444 | }, |
569 | { | 445 | { |
570 | .ctl_name = NET_TCP_LOW_LATENCY, | ||
571 | .procname = "tcp_low_latency", | 446 | .procname = "tcp_low_latency", |
572 | .data = &sysctl_tcp_low_latency, | 447 | .data = &sysctl_tcp_low_latency, |
573 | .maxlen = sizeof(int), | 448 | .maxlen = sizeof(int), |
@@ -575,7 +450,6 @@ static struct ctl_table ipv4_table[] = { | |||
575 | .proc_handler = proc_dointvec | 450 | .proc_handler = proc_dointvec |
576 | }, | 451 | }, |
577 | { | 452 | { |
578 | .ctl_name = NET_TCP_NO_METRICS_SAVE, | ||
579 | .procname = "tcp_no_metrics_save", | 453 | .procname = "tcp_no_metrics_save", |
580 | .data = &sysctl_tcp_nometrics_save, | 454 | .data = &sysctl_tcp_nometrics_save, |
581 | .maxlen = sizeof(int), | 455 | .maxlen = sizeof(int), |
@@ -583,7 +457,6 @@ static struct ctl_table ipv4_table[] = { | |||
583 | .proc_handler = proc_dointvec, | 457 | .proc_handler = proc_dointvec, |
584 | }, | 458 | }, |
585 | { | 459 | { |
586 | .ctl_name = NET_TCP_MODERATE_RCVBUF, | ||
587 | .procname = "tcp_moderate_rcvbuf", | 460 | .procname = "tcp_moderate_rcvbuf", |
588 | .data = &sysctl_tcp_moderate_rcvbuf, | 461 | .data = &sysctl_tcp_moderate_rcvbuf, |
589 | .maxlen = sizeof(int), | 462 | .maxlen = sizeof(int), |
@@ -591,7 +464,6 @@ static struct ctl_table ipv4_table[] = { | |||
591 | .proc_handler = proc_dointvec, | 464 | .proc_handler = proc_dointvec, |
592 | }, | 465 | }, |
593 | { | 466 | { |
594 | .ctl_name = NET_TCP_TSO_WIN_DIVISOR, | ||
595 | .procname = "tcp_tso_win_divisor", | 467 | .procname = "tcp_tso_win_divisor", |
596 | .data = &sysctl_tcp_tso_win_divisor, | 468 | .data = &sysctl_tcp_tso_win_divisor, |
597 | .maxlen = sizeof(int), | 469 | .maxlen = sizeof(int), |
@@ -599,15 +471,12 @@ static struct ctl_table ipv4_table[] = { | |||
599 | .proc_handler = proc_dointvec, | 471 | .proc_handler = proc_dointvec, |
600 | }, | 472 | }, |
601 | { | 473 | { |
602 | .ctl_name = NET_TCP_CONG_CONTROL, | ||
603 | .procname = "tcp_congestion_control", | 474 | .procname = "tcp_congestion_control", |
604 | .mode = 0644, | 475 | .mode = 0644, |
605 | .maxlen = TCP_CA_NAME_MAX, | 476 | .maxlen = TCP_CA_NAME_MAX, |
606 | .proc_handler = proc_tcp_congestion_control, | 477 | .proc_handler = proc_tcp_congestion_control, |
607 | .strategy = sysctl_tcp_congestion_control, | ||
608 | }, | 478 | }, |
609 | { | 479 | { |
610 | .ctl_name = NET_TCP_ABC, | ||
611 | .procname = "tcp_abc", | 480 | .procname = "tcp_abc", |
612 | .data = &sysctl_tcp_abc, | 481 | .data = &sysctl_tcp_abc, |
613 | .maxlen = sizeof(int), | 482 | .maxlen = sizeof(int), |
@@ -615,7 +484,6 @@ static struct ctl_table ipv4_table[] = { | |||
615 | .proc_handler = proc_dointvec, | 484 | .proc_handler = proc_dointvec, |
616 | }, | 485 | }, |
617 | { | 486 | { |
618 | .ctl_name = NET_TCP_MTU_PROBING, | ||
619 | .procname = "tcp_mtu_probing", | 487 | .procname = "tcp_mtu_probing", |
620 | .data = &sysctl_tcp_mtu_probing, | 488 | .data = &sysctl_tcp_mtu_probing, |
621 | .maxlen = sizeof(int), | 489 | .maxlen = sizeof(int), |
@@ -623,7 +491,6 @@ static struct ctl_table ipv4_table[] = { | |||
623 | .proc_handler = proc_dointvec, | 491 | .proc_handler = proc_dointvec, |
624 | }, | 492 | }, |
625 | { | 493 | { |
626 | .ctl_name = NET_TCP_BASE_MSS, | ||
627 | .procname = "tcp_base_mss", | 494 | .procname = "tcp_base_mss", |
628 | .data = &sysctl_tcp_base_mss, | 495 | .data = &sysctl_tcp_base_mss, |
629 | .maxlen = sizeof(int), | 496 | .maxlen = sizeof(int), |
@@ -631,7 +498,6 @@ static struct ctl_table ipv4_table[] = { | |||
631 | .proc_handler = proc_dointvec, | 498 | .proc_handler = proc_dointvec, |
632 | }, | 499 | }, |
633 | { | 500 | { |
634 | .ctl_name = NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS, | ||
635 | .procname = "tcp_workaround_signed_windows", | 501 | .procname = "tcp_workaround_signed_windows", |
636 | .data = &sysctl_tcp_workaround_signed_windows, | 502 | .data = &sysctl_tcp_workaround_signed_windows, |
637 | .maxlen = sizeof(int), | 503 | .maxlen = sizeof(int), |
@@ -640,7 +506,6 @@ static struct ctl_table ipv4_table[] = { | |||
640 | }, | 506 | }, |
641 | #ifdef CONFIG_NET_DMA | 507 | #ifdef CONFIG_NET_DMA |
642 | { | 508 | { |
643 | .ctl_name = NET_TCP_DMA_COPYBREAK, | ||
644 | .procname = "tcp_dma_copybreak", | 509 | .procname = "tcp_dma_copybreak", |
645 | .data = &sysctl_tcp_dma_copybreak, | 510 | .data = &sysctl_tcp_dma_copybreak, |
646 | .maxlen = sizeof(int), | 511 | .maxlen = sizeof(int), |
@@ -649,7 +514,6 @@ static struct ctl_table ipv4_table[] = { | |||
649 | }, | 514 | }, |
650 | #endif | 515 | #endif |
651 | { | 516 | { |
652 | .ctl_name = NET_TCP_SLOW_START_AFTER_IDLE, | ||
653 | .procname = "tcp_slow_start_after_idle", | 517 | .procname = "tcp_slow_start_after_idle", |
654 | .data = &sysctl_tcp_slow_start_after_idle, | 518 | .data = &sysctl_tcp_slow_start_after_idle, |
655 | .maxlen = sizeof(int), | 519 | .maxlen = sizeof(int), |
@@ -658,7 +522,6 @@ static struct ctl_table ipv4_table[] = { | |||
658 | }, | 522 | }, |
659 | #ifdef CONFIG_NETLABEL | 523 | #ifdef CONFIG_NETLABEL |
660 | { | 524 | { |
661 | .ctl_name = NET_CIPSOV4_CACHE_ENABLE, | ||
662 | .procname = "cipso_cache_enable", | 525 | .procname = "cipso_cache_enable", |
663 | .data = &cipso_v4_cache_enabled, | 526 | .data = &cipso_v4_cache_enabled, |
664 | .maxlen = sizeof(int), | 527 | .maxlen = sizeof(int), |
@@ -666,7 +529,6 @@ static struct ctl_table ipv4_table[] = { | |||
666 | .proc_handler = proc_dointvec, | 529 | .proc_handler = proc_dointvec, |
667 | }, | 530 | }, |
668 | { | 531 | { |
669 | .ctl_name = NET_CIPSOV4_CACHE_BUCKET_SIZE, | ||
670 | .procname = "cipso_cache_bucket_size", | 532 | .procname = "cipso_cache_bucket_size", |
671 | .data = &cipso_v4_cache_bucketsize, | 533 | .data = &cipso_v4_cache_bucketsize, |
672 | .maxlen = sizeof(int), | 534 | .maxlen = sizeof(int), |
@@ -674,7 +536,6 @@ static struct ctl_table ipv4_table[] = { | |||
674 | .proc_handler = proc_dointvec, | 536 | .proc_handler = proc_dointvec, |
675 | }, | 537 | }, |
676 | { | 538 | { |
677 | .ctl_name = NET_CIPSOV4_RBM_OPTFMT, | ||
678 | .procname = "cipso_rbm_optfmt", | 539 | .procname = "cipso_rbm_optfmt", |
679 | .data = &cipso_v4_rbm_optfmt, | 540 | .data = &cipso_v4_rbm_optfmt, |
680 | .maxlen = sizeof(int), | 541 | .maxlen = sizeof(int), |
@@ -682,7 +543,6 @@ static struct ctl_table ipv4_table[] = { | |||
682 | .proc_handler = proc_dointvec, | 543 | .proc_handler = proc_dointvec, |
683 | }, | 544 | }, |
684 | { | 545 | { |
685 | .ctl_name = NET_CIPSOV4_RBM_STRICTVALID, | ||
686 | .procname = "cipso_rbm_strictvalid", | 546 | .procname = "cipso_rbm_strictvalid", |
687 | .data = &cipso_v4_rbm_strictvalid, | 547 | .data = &cipso_v4_rbm_strictvalid, |
688 | .maxlen = sizeof(int), | 548 | .maxlen = sizeof(int), |
@@ -697,15 +557,12 @@ static struct ctl_table ipv4_table[] = { | |||
697 | .proc_handler = proc_tcp_available_congestion_control, | 557 | .proc_handler = proc_tcp_available_congestion_control, |
698 | }, | 558 | }, |
699 | { | 559 | { |
700 | .ctl_name = NET_TCP_ALLOWED_CONG_CONTROL, | ||
701 | .procname = "tcp_allowed_congestion_control", | 560 | .procname = "tcp_allowed_congestion_control", |
702 | .maxlen = TCP_CA_BUF_MAX, | 561 | .maxlen = TCP_CA_BUF_MAX, |
703 | .mode = 0644, | 562 | .mode = 0644, |
704 | .proc_handler = proc_allowed_congestion_control, | 563 | .proc_handler = proc_allowed_congestion_control, |
705 | .strategy = strategy_allowed_congestion_control, | ||
706 | }, | 564 | }, |
707 | { | 565 | { |
708 | .ctl_name = NET_TCP_MAX_SSTHRESH, | ||
709 | .procname = "tcp_max_ssthresh", | 566 | .procname = "tcp_max_ssthresh", |
710 | .data = &sysctl_tcp_max_ssthresh, | 567 | .data = &sysctl_tcp_max_ssthresh, |
711 | .maxlen = sizeof(int), | 568 | .maxlen = sizeof(int), |
@@ -713,41 +570,55 @@ static struct ctl_table ipv4_table[] = { | |||
713 | .proc_handler = proc_dointvec, | 570 | .proc_handler = proc_dointvec, |
714 | }, | 571 | }, |
715 | { | 572 | { |
716 | .ctl_name = CTL_UNNUMBERED, | 573 | .procname = "tcp_cookie_size", |
574 | .data = &sysctl_tcp_cookie_size, | ||
575 | .maxlen = sizeof(int), | ||
576 | .mode = 0644, | ||
577 | .proc_handler = proc_dointvec | ||
578 | }, | ||
579 | { | ||
580 | .procname = "tcp_thin_linear_timeouts", | ||
581 | .data = &sysctl_tcp_thin_linear_timeouts, | ||
582 | .maxlen = sizeof(int), | ||
583 | .mode = 0644, | ||
584 | .proc_handler = proc_dointvec | ||
585 | }, | ||
586 | { | ||
587 | .procname = "tcp_thin_dupack", | ||
588 | .data = &sysctl_tcp_thin_dupack, | ||
589 | .maxlen = sizeof(int), | ||
590 | .mode = 0644, | ||
591 | .proc_handler = proc_dointvec | ||
592 | }, | ||
593 | { | ||
717 | .procname = "udp_mem", | 594 | .procname = "udp_mem", |
718 | .data = &sysctl_udp_mem, | 595 | .data = &sysctl_udp_mem, |
719 | .maxlen = sizeof(sysctl_udp_mem), | 596 | .maxlen = sizeof(sysctl_udp_mem), |
720 | .mode = 0644, | 597 | .mode = 0644, |
721 | .proc_handler = proc_dointvec_minmax, | 598 | .proc_handler = proc_dointvec_minmax, |
722 | .strategy = sysctl_intvec, | ||
723 | .extra1 = &zero | 599 | .extra1 = &zero |
724 | }, | 600 | }, |
725 | { | 601 | { |
726 | .ctl_name = CTL_UNNUMBERED, | ||
727 | .procname = "udp_rmem_min", | 602 | .procname = "udp_rmem_min", |
728 | .data = &sysctl_udp_rmem_min, | 603 | .data = &sysctl_udp_rmem_min, |
729 | .maxlen = sizeof(sysctl_udp_rmem_min), | 604 | .maxlen = sizeof(sysctl_udp_rmem_min), |
730 | .mode = 0644, | 605 | .mode = 0644, |
731 | .proc_handler = proc_dointvec_minmax, | 606 | .proc_handler = proc_dointvec_minmax, |
732 | .strategy = sysctl_intvec, | ||
733 | .extra1 = &zero | 607 | .extra1 = &zero |
734 | }, | 608 | }, |
735 | { | 609 | { |
736 | .ctl_name = CTL_UNNUMBERED, | ||
737 | .procname = "udp_wmem_min", | 610 | .procname = "udp_wmem_min", |
738 | .data = &sysctl_udp_wmem_min, | 611 | .data = &sysctl_udp_wmem_min, |
739 | .maxlen = sizeof(sysctl_udp_wmem_min), | 612 | .maxlen = sizeof(sysctl_udp_wmem_min), |
740 | .mode = 0644, | 613 | .mode = 0644, |
741 | .proc_handler = proc_dointvec_minmax, | 614 | .proc_handler = proc_dointvec_minmax, |
742 | .strategy = sysctl_intvec, | ||
743 | .extra1 = &zero | 615 | .extra1 = &zero |
744 | }, | 616 | }, |
745 | { .ctl_name = 0 } | 617 | { } |
746 | }; | 618 | }; |
747 | 619 | ||
748 | static struct ctl_table ipv4_net_table[] = { | 620 | static struct ctl_table ipv4_net_table[] = { |
749 | { | 621 | { |
750 | .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_ALL, | ||
751 | .procname = "icmp_echo_ignore_all", | 622 | .procname = "icmp_echo_ignore_all", |
752 | .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all, | 623 | .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all, |
753 | .maxlen = sizeof(int), | 624 | .maxlen = sizeof(int), |
@@ -755,7 +626,6 @@ static struct ctl_table ipv4_net_table[] = { | |||
755 | .proc_handler = proc_dointvec | 626 | .proc_handler = proc_dointvec |
756 | }, | 627 | }, |
757 | { | 628 | { |
758 | .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS, | ||
759 | .procname = "icmp_echo_ignore_broadcasts", | 629 | .procname = "icmp_echo_ignore_broadcasts", |
760 | .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts, | 630 | .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts, |
761 | .maxlen = sizeof(int), | 631 | .maxlen = sizeof(int), |
@@ -763,7 +633,6 @@ static struct ctl_table ipv4_net_table[] = { | |||
763 | .proc_handler = proc_dointvec | 633 | .proc_handler = proc_dointvec |
764 | }, | 634 | }, |
765 | { | 635 | { |
766 | .ctl_name = NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES, | ||
767 | .procname = "icmp_ignore_bogus_error_responses", | 636 | .procname = "icmp_ignore_bogus_error_responses", |
768 | .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, | 637 | .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, |
769 | .maxlen = sizeof(int), | 638 | .maxlen = sizeof(int), |
@@ -771,7 +640,6 @@ static struct ctl_table ipv4_net_table[] = { | |||
771 | .proc_handler = proc_dointvec | 640 | .proc_handler = proc_dointvec |
772 | }, | 641 | }, |
773 | { | 642 | { |
774 | .ctl_name = NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR, | ||
775 | .procname = "icmp_errors_use_inbound_ifaddr", | 643 | .procname = "icmp_errors_use_inbound_ifaddr", |
776 | .data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr, | 644 | .data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr, |
777 | .maxlen = sizeof(int), | 645 | .maxlen = sizeof(int), |
@@ -779,16 +647,13 @@ static struct ctl_table ipv4_net_table[] = { | |||
779 | .proc_handler = proc_dointvec | 647 | .proc_handler = proc_dointvec |
780 | }, | 648 | }, |
781 | { | 649 | { |
782 | .ctl_name = NET_IPV4_ICMP_RATELIMIT, | ||
783 | .procname = "icmp_ratelimit", | 650 | .procname = "icmp_ratelimit", |
784 | .data = &init_net.ipv4.sysctl_icmp_ratelimit, | 651 | .data = &init_net.ipv4.sysctl_icmp_ratelimit, |
785 | .maxlen = sizeof(int), | 652 | .maxlen = sizeof(int), |
786 | .mode = 0644, | 653 | .mode = 0644, |
787 | .proc_handler = proc_dointvec_ms_jiffies, | 654 | .proc_handler = proc_dointvec_ms_jiffies, |
788 | .strategy = sysctl_ms_jiffies | ||
789 | }, | 655 | }, |
790 | { | 656 | { |
791 | .ctl_name = NET_IPV4_ICMP_RATEMASK, | ||
792 | .procname = "icmp_ratemask", | 657 | .procname = "icmp_ratemask", |
793 | .data = &init_net.ipv4.sysctl_icmp_ratemask, | 658 | .data = &init_net.ipv4.sysctl_icmp_ratemask, |
794 | .maxlen = sizeof(int), | 659 | .maxlen = sizeof(int), |
@@ -796,7 +661,6 @@ static struct ctl_table ipv4_net_table[] = { | |||
796 | .proc_handler = proc_dointvec | 661 | .proc_handler = proc_dointvec |
797 | }, | 662 | }, |
798 | { | 663 | { |
799 | .ctl_name = CTL_UNNUMBERED, | ||
800 | .procname = "rt_cache_rebuild_count", | 664 | .procname = "rt_cache_rebuild_count", |
801 | .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count, | 665 | .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count, |
802 | .maxlen = sizeof(int), | 666 | .maxlen = sizeof(int), |
@@ -807,8 +671,8 @@ static struct ctl_table ipv4_net_table[] = { | |||
807 | }; | 671 | }; |
808 | 672 | ||
809 | struct ctl_path net_ipv4_ctl_path[] = { | 673 | struct ctl_path net_ipv4_ctl_path[] = { |
810 | { .procname = "net", .ctl_name = CTL_NET, }, | 674 | { .procname = "net", }, |
811 | { .procname = "ipv4", .ctl_name = NET_IPV4, }, | 675 | { .procname = "ipv4", }, |
812 | { }, | 676 | { }, |
813 | }; | 677 | }; |
814 | EXPORT_SYMBOL_GPL(net_ipv4_ctl_path); | 678 | EXPORT_SYMBOL_GPL(net_ipv4_ctl_path); |
@@ -818,7 +682,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) | |||
818 | struct ctl_table *table; | 682 | struct ctl_table *table; |
819 | 683 | ||
820 | table = ipv4_net_table; | 684 | table = ipv4_net_table; |
821 | if (net != &init_net) { | 685 | if (!net_eq(net, &init_net)) { |
822 | table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL); | 686 | table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL); |
823 | if (table == NULL) | 687 | if (table == NULL) |
824 | goto err_alloc; | 688 | goto err_alloc; |
@@ -849,7 +713,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) | |||
849 | return 0; | 713 | return 0; |
850 | 714 | ||
851 | err_reg: | 715 | err_reg: |
852 | if (net != &init_net) | 716 | if (!net_eq(net, &init_net)) |
853 | kfree(table); | 717 | kfree(table); |
854 | err_alloc: | 718 | err_alloc: |
855 | return -ENOMEM; | 719 | return -ENOMEM; |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f1813bc71088..296150b2a62f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -264,6 +264,8 @@ | |||
264 | #include <linux/cache.h> | 264 | #include <linux/cache.h> |
265 | #include <linux/err.h> | 265 | #include <linux/err.h> |
266 | #include <linux/crypto.h> | 266 | #include <linux/crypto.h> |
267 | #include <linux/time.h> | ||
268 | #include <linux/slab.h> | ||
267 | 269 | ||
268 | #include <net/icmp.h> | 270 | #include <net/icmp.h> |
269 | #include <net/tcp.h> | 271 | #include <net/tcp.h> |
@@ -428,7 +430,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
428 | if (tp->urg_seq == tp->copied_seq && | 430 | if (tp->urg_seq == tp->copied_seq && |
429 | !sock_flag(sk, SOCK_URGINLINE) && | 431 | !sock_flag(sk, SOCK_URGINLINE) && |
430 | tp->urg_data) | 432 | tp->urg_data) |
431 | target--; | 433 | target++; |
432 | 434 | ||
433 | /* Potential race condition. If read of tp below will | 435 | /* Potential race condition. If read of tp below will |
434 | * escape above sk->sk_state, we can be illegally awaken | 436 | * escape above sk->sk_state, we can be illegally awaken |
@@ -535,8 +537,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) | |||
535 | tp->nonagle &= ~TCP_NAGLE_PUSH; | 537 | tp->nonagle &= ~TCP_NAGLE_PUSH; |
536 | } | 538 | } |
537 | 539 | ||
538 | static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, | 540 | static inline void tcp_mark_urg(struct tcp_sock *tp, int flags) |
539 | struct sk_buff *skb) | ||
540 | { | 541 | { |
541 | if (flags & MSG_OOB) | 542 | if (flags & MSG_OOB) |
542 | tp->snd_up = tp->write_seq; | 543 | tp->snd_up = tp->write_seq; |
@@ -545,13 +546,13 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, | |||
545 | static inline void tcp_push(struct sock *sk, int flags, int mss_now, | 546 | static inline void tcp_push(struct sock *sk, int flags, int mss_now, |
546 | int nonagle) | 547 | int nonagle) |
547 | { | 548 | { |
548 | struct tcp_sock *tp = tcp_sk(sk); | ||
549 | |||
550 | if (tcp_send_head(sk)) { | 549 | if (tcp_send_head(sk)) { |
551 | struct sk_buff *skb = tcp_write_queue_tail(sk); | 550 | struct tcp_sock *tp = tcp_sk(sk); |
551 | |||
552 | if (!(flags & MSG_MORE) || forced_push(tp)) | 552 | if (!(flags & MSG_MORE) || forced_push(tp)) |
553 | tcp_mark_push(tp, skb); | 553 | tcp_mark_push(tp, tcp_write_queue_tail(sk)); |
554 | tcp_mark_urg(tp, flags, skb); | 554 | |
555 | tcp_mark_urg(tp, flags); | ||
555 | __tcp_push_pending_frames(sk, mss_now, | 556 | __tcp_push_pending_frames(sk, mss_now, |
556 | (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); | 557 | (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); |
557 | } | 558 | } |
@@ -876,12 +877,12 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, | |||
876 | #define TCP_PAGE(sk) (sk->sk_sndmsg_page) | 877 | #define TCP_PAGE(sk) (sk->sk_sndmsg_page) |
877 | #define TCP_OFF(sk) (sk->sk_sndmsg_off) | 878 | #define TCP_OFF(sk) (sk->sk_sndmsg_off) |
878 | 879 | ||
879 | static inline int select_size(struct sock *sk) | 880 | static inline int select_size(struct sock *sk, int sg) |
880 | { | 881 | { |
881 | struct tcp_sock *tp = tcp_sk(sk); | 882 | struct tcp_sock *tp = tcp_sk(sk); |
882 | int tmp = tp->mss_cache; | 883 | int tmp = tp->mss_cache; |
883 | 884 | ||
884 | if (sk->sk_route_caps & NETIF_F_SG) { | 885 | if (sg) { |
885 | if (sk_can_gso(sk)) | 886 | if (sk_can_gso(sk)) |
886 | tmp = 0; | 887 | tmp = 0; |
887 | else { | 888 | else { |
@@ -905,7 +906,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | |||
905 | struct sk_buff *skb; | 906 | struct sk_buff *skb; |
906 | int iovlen, flags; | 907 | int iovlen, flags; |
907 | int mss_now, size_goal; | 908 | int mss_now, size_goal; |
908 | int err, copied; | 909 | int sg, err, copied; |
909 | long timeo; | 910 | long timeo; |
910 | 911 | ||
911 | lock_sock(sk); | 912 | lock_sock(sk); |
@@ -933,6 +934,8 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | |||
933 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) | 934 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) |
934 | goto out_err; | 935 | goto out_err; |
935 | 936 | ||
937 | sg = sk->sk_route_caps & NETIF_F_SG; | ||
938 | |||
936 | while (--iovlen >= 0) { | 939 | while (--iovlen >= 0) { |
937 | int seglen = iov->iov_len; | 940 | int seglen = iov->iov_len; |
938 | unsigned char __user *from = iov->iov_base; | 941 | unsigned char __user *from = iov->iov_base; |
@@ -958,8 +961,9 @@ new_segment: | |||
958 | if (!sk_stream_memory_free(sk)) | 961 | if (!sk_stream_memory_free(sk)) |
959 | goto wait_for_sndbuf; | 962 | goto wait_for_sndbuf; |
960 | 963 | ||
961 | skb = sk_stream_alloc_skb(sk, select_size(sk), | 964 | skb = sk_stream_alloc_skb(sk, |
962 | sk->sk_allocation); | 965 | select_size(sk, sg), |
966 | sk->sk_allocation); | ||
963 | if (!skb) | 967 | if (!skb) |
964 | goto wait_for_memory; | 968 | goto wait_for_memory; |
965 | 969 | ||
@@ -996,9 +1000,7 @@ new_segment: | |||
996 | /* We can extend the last page | 1000 | /* We can extend the last page |
997 | * fragment. */ | 1001 | * fragment. */ |
998 | merge = 1; | 1002 | merge = 1; |
999 | } else if (i == MAX_SKB_FRAGS || | 1003 | } else if (i == MAX_SKB_FRAGS || !sg) { |
1000 | (!i && | ||
1001 | !(sk->sk_route_caps & NETIF_F_SG))) { | ||
1002 | /* Need to add new fragment and cannot | 1004 | /* Need to add new fragment and cannot |
1003 | * do this because interface is non-SG, | 1005 | * do this because interface is non-SG, |
1004 | * or because all the page slots are | 1006 | * or because all the page slots are |
@@ -1253,6 +1255,39 @@ static void tcp_prequeue_process(struct sock *sk) | |||
1253 | tp->ucopy.memory = 0; | 1255 | tp->ucopy.memory = 0; |
1254 | } | 1256 | } |
1255 | 1257 | ||
1258 | #ifdef CONFIG_NET_DMA | ||
1259 | static void tcp_service_net_dma(struct sock *sk, bool wait) | ||
1260 | { | ||
1261 | dma_cookie_t done, used; | ||
1262 | dma_cookie_t last_issued; | ||
1263 | struct tcp_sock *tp = tcp_sk(sk); | ||
1264 | |||
1265 | if (!tp->ucopy.dma_chan) | ||
1266 | return; | ||
1267 | |||
1268 | last_issued = tp->ucopy.dma_cookie; | ||
1269 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | ||
1270 | |||
1271 | do { | ||
1272 | if (dma_async_memcpy_complete(tp->ucopy.dma_chan, | ||
1273 | last_issued, &done, | ||
1274 | &used) == DMA_SUCCESS) { | ||
1275 | /* Safe to free early-copied skbs now */ | ||
1276 | __skb_queue_purge(&sk->sk_async_wait_queue); | ||
1277 | break; | ||
1278 | } else { | ||
1279 | struct sk_buff *skb; | ||
1280 | while ((skb = skb_peek(&sk->sk_async_wait_queue)) && | ||
1281 | (dma_async_is_complete(skb->dma_cookie, done, | ||
1282 | used) == DMA_SUCCESS)) { | ||
1283 | __skb_dequeue(&sk->sk_async_wait_queue); | ||
1284 | kfree_skb(skb); | ||
1285 | } | ||
1286 | } | ||
1287 | } while (wait); | ||
1288 | } | ||
1289 | #endif | ||
1290 | |||
1256 | static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) | 1291 | static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) |
1257 | { | 1292 | { |
1258 | struct sk_buff *skb; | 1293 | struct sk_buff *skb; |
@@ -1334,6 +1369,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | |||
1334 | sk_eat_skb(sk, skb, 0); | 1369 | sk_eat_skb(sk, skb, 0); |
1335 | if (!desc->count) | 1370 | if (!desc->count) |
1336 | break; | 1371 | break; |
1372 | tp->copied_seq = seq; | ||
1337 | } | 1373 | } |
1338 | tp->copied_seq = seq; | 1374 | tp->copied_seq = seq; |
1339 | 1375 | ||
@@ -1545,6 +1581,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1545 | /* __ Set realtime policy in scheduler __ */ | 1581 | /* __ Set realtime policy in scheduler __ */ |
1546 | } | 1582 | } |
1547 | 1583 | ||
1584 | #ifdef CONFIG_NET_DMA | ||
1585 | if (tp->ucopy.dma_chan) | ||
1586 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | ||
1587 | #endif | ||
1548 | if (copied >= target) { | 1588 | if (copied >= target) { |
1549 | /* Do not sleep, just process backlog. */ | 1589 | /* Do not sleep, just process backlog. */ |
1550 | release_sock(sk); | 1590 | release_sock(sk); |
@@ -1553,6 +1593,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1553 | sk_wait_data(sk, &timeo); | 1593 | sk_wait_data(sk, &timeo); |
1554 | 1594 | ||
1555 | #ifdef CONFIG_NET_DMA | 1595 | #ifdef CONFIG_NET_DMA |
1596 | tcp_service_net_dma(sk, false); /* Don't block */ | ||
1556 | tp->ucopy.wakeup = 0; | 1597 | tp->ucopy.wakeup = 0; |
1557 | #endif | 1598 | #endif |
1558 | 1599 | ||
@@ -1632,6 +1673,9 @@ do_prequeue: | |||
1632 | copied = -EFAULT; | 1673 | copied = -EFAULT; |
1633 | break; | 1674 | break; |
1634 | } | 1675 | } |
1676 | |||
1677 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | ||
1678 | |||
1635 | if ((offset + used) == skb->len) | 1679 | if ((offset + used) == skb->len) |
1636 | copied_early = 1; | 1680 | copied_early = 1; |
1637 | 1681 | ||
@@ -1701,27 +1745,9 @@ skip_copy: | |||
1701 | } | 1745 | } |
1702 | 1746 | ||
1703 | #ifdef CONFIG_NET_DMA | 1747 | #ifdef CONFIG_NET_DMA |
1704 | if (tp->ucopy.dma_chan) { | 1748 | tcp_service_net_dma(sk, true); /* Wait for queue to drain */ |
1705 | dma_cookie_t done, used; | 1749 | tp->ucopy.dma_chan = NULL; |
1706 | |||
1707 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | ||
1708 | |||
1709 | while (dma_async_memcpy_complete(tp->ucopy.dma_chan, | ||
1710 | tp->ucopy.dma_cookie, &done, | ||
1711 | &used) == DMA_IN_PROGRESS) { | ||
1712 | /* do partial cleanup of sk_async_wait_queue */ | ||
1713 | while ((skb = skb_peek(&sk->sk_async_wait_queue)) && | ||
1714 | (dma_async_is_complete(skb->dma_cookie, done, | ||
1715 | used) == DMA_SUCCESS)) { | ||
1716 | __skb_dequeue(&sk->sk_async_wait_queue); | ||
1717 | kfree_skb(skb); | ||
1718 | } | ||
1719 | } | ||
1720 | 1750 | ||
1721 | /* Safe to free early-copied skbs now */ | ||
1722 | __skb_queue_purge(&sk->sk_async_wait_queue); | ||
1723 | tp->ucopy.dma_chan = NULL; | ||
1724 | } | ||
1725 | if (tp->ucopy.pinned_list) { | 1751 | if (tp->ucopy.pinned_list) { |
1726 | dma_unpin_iovec_pages(tp->ucopy.pinned_list); | 1752 | dma_unpin_iovec_pages(tp->ucopy.pinned_list); |
1727 | tp->ucopy.pinned_list = NULL; | 1753 | tp->ucopy.pinned_list = NULL; |
@@ -2042,7 +2068,7 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
2042 | __skb_queue_purge(&sk->sk_async_wait_queue); | 2068 | __skb_queue_purge(&sk->sk_async_wait_queue); |
2043 | #endif | 2069 | #endif |
2044 | 2070 | ||
2045 | inet->dport = 0; | 2071 | inet->inet_dport = 0; |
2046 | 2072 | ||
2047 | if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) | 2073 | if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) |
2048 | inet_reset_saddr(sk); | 2074 | inet_reset_saddr(sk); |
@@ -2059,6 +2085,7 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
2059 | tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; | 2085 | tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; |
2060 | tp->snd_cwnd_cnt = 0; | 2086 | tp->snd_cwnd_cnt = 0; |
2061 | tp->bytes_acked = 0; | 2087 | tp->bytes_acked = 0; |
2088 | tp->window_clamp = 0; | ||
2062 | tcp_set_ca_state(sk, TCP_CA_Open); | 2089 | tcp_set_ca_state(sk, TCP_CA_Open); |
2063 | tcp_clear_retrans(tp); | 2090 | tcp_clear_retrans(tp); |
2064 | inet_csk_delack_init(sk); | 2091 | inet_csk_delack_init(sk); |
@@ -2066,7 +2093,7 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
2066 | memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); | 2093 | memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); |
2067 | __sk_dst_reset(sk); | 2094 | __sk_dst_reset(sk); |
2068 | 2095 | ||
2069 | WARN_ON(inet->num && !icsk->icsk_bind_hash); | 2096 | WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); |
2070 | 2097 | ||
2071 | sk->sk_error_report(sk); | 2098 | sk->sk_error_report(sk); |
2072 | return err; | 2099 | return err; |
@@ -2083,8 +2110,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2083 | int val; | 2110 | int val; |
2084 | int err = 0; | 2111 | int err = 0; |
2085 | 2112 | ||
2086 | /* This is a string value all the others are int's */ | 2113 | /* These are data/string values, all the others are ints */ |
2087 | if (optname == TCP_CONGESTION) { | 2114 | switch (optname) { |
2115 | case TCP_CONGESTION: { | ||
2088 | char name[TCP_CA_NAME_MAX]; | 2116 | char name[TCP_CA_NAME_MAX]; |
2089 | 2117 | ||
2090 | if (optlen < 1) | 2118 | if (optlen < 1) |
@@ -2101,6 +2129,93 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2101 | release_sock(sk); | 2129 | release_sock(sk); |
2102 | return err; | 2130 | return err; |
2103 | } | 2131 | } |
2132 | case TCP_COOKIE_TRANSACTIONS: { | ||
2133 | struct tcp_cookie_transactions ctd; | ||
2134 | struct tcp_cookie_values *cvp = NULL; | ||
2135 | |||
2136 | if (sizeof(ctd) > optlen) | ||
2137 | return -EINVAL; | ||
2138 | if (copy_from_user(&ctd, optval, sizeof(ctd))) | ||
2139 | return -EFAULT; | ||
2140 | |||
2141 | if (ctd.tcpct_used > sizeof(ctd.tcpct_value) || | ||
2142 | ctd.tcpct_s_data_desired > TCP_MSS_DESIRED) | ||
2143 | return -EINVAL; | ||
2144 | |||
2145 | if (ctd.tcpct_cookie_desired == 0) { | ||
2146 | /* default to global value */ | ||
2147 | } else if ((0x1 & ctd.tcpct_cookie_desired) || | ||
2148 | ctd.tcpct_cookie_desired > TCP_COOKIE_MAX || | ||
2149 | ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) { | ||
2150 | return -EINVAL; | ||
2151 | } | ||
2152 | |||
2153 | if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) { | ||
2154 | /* Supercedes all other values */ | ||
2155 | lock_sock(sk); | ||
2156 | if (tp->cookie_values != NULL) { | ||
2157 | kref_put(&tp->cookie_values->kref, | ||
2158 | tcp_cookie_values_release); | ||
2159 | tp->cookie_values = NULL; | ||
2160 | } | ||
2161 | tp->rx_opt.cookie_in_always = 0; /* false */ | ||
2162 | tp->rx_opt.cookie_out_never = 1; /* true */ | ||
2163 | release_sock(sk); | ||
2164 | return err; | ||
2165 | } | ||
2166 | |||
2167 | /* Allocate ancillary memory before locking. | ||
2168 | */ | ||
2169 | if (ctd.tcpct_used > 0 || | ||
2170 | (tp->cookie_values == NULL && | ||
2171 | (sysctl_tcp_cookie_size > 0 || | ||
2172 | ctd.tcpct_cookie_desired > 0 || | ||
2173 | ctd.tcpct_s_data_desired > 0))) { | ||
2174 | cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used, | ||
2175 | GFP_KERNEL); | ||
2176 | if (cvp == NULL) | ||
2177 | return -ENOMEM; | ||
2178 | } | ||
2179 | lock_sock(sk); | ||
2180 | tp->rx_opt.cookie_in_always = | ||
2181 | (TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags); | ||
2182 | tp->rx_opt.cookie_out_never = 0; /* false */ | ||
2183 | |||
2184 | if (tp->cookie_values != NULL) { | ||
2185 | if (cvp != NULL) { | ||
2186 | /* Changed values are recorded by a changed | ||
2187 | * pointer, ensuring the cookie will differ, | ||
2188 | * without separately hashing each value later. | ||
2189 | */ | ||
2190 | kref_put(&tp->cookie_values->kref, | ||
2191 | tcp_cookie_values_release); | ||
2192 | kref_init(&cvp->kref); | ||
2193 | tp->cookie_values = cvp; | ||
2194 | } else { | ||
2195 | cvp = tp->cookie_values; | ||
2196 | } | ||
2197 | } | ||
2198 | if (cvp != NULL) { | ||
2199 | cvp->cookie_desired = ctd.tcpct_cookie_desired; | ||
2200 | |||
2201 | if (ctd.tcpct_used > 0) { | ||
2202 | memcpy(cvp->s_data_payload, ctd.tcpct_value, | ||
2203 | ctd.tcpct_used); | ||
2204 | cvp->s_data_desired = ctd.tcpct_used; | ||
2205 | cvp->s_data_constant = 1; /* true */ | ||
2206 | } else { | ||
2207 | /* No constant payload data. */ | ||
2208 | cvp->s_data_desired = ctd.tcpct_s_data_desired; | ||
2209 | cvp->s_data_constant = 0; /* false */ | ||
2210 | } | ||
2211 | } | ||
2212 | release_sock(sk); | ||
2213 | return err; | ||
2214 | } | ||
2215 | default: | ||
2216 | /* fallthru */ | ||
2217 | break; | ||
2218 | }; | ||
2104 | 2219 | ||
2105 | if (optlen < sizeof(int)) | 2220 | if (optlen < sizeof(int)) |
2106 | return -EINVAL; | 2221 | return -EINVAL; |
@@ -2139,6 +2254,20 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2139 | } | 2254 | } |
2140 | break; | 2255 | break; |
2141 | 2256 | ||
2257 | case TCP_THIN_LINEAR_TIMEOUTS: | ||
2258 | if (val < 0 || val > 1) | ||
2259 | err = -EINVAL; | ||
2260 | else | ||
2261 | tp->thin_lto = val; | ||
2262 | break; | ||
2263 | |||
2264 | case TCP_THIN_DUPACK: | ||
2265 | if (val < 0 || val > 1) | ||
2266 | err = -EINVAL; | ||
2267 | else | ||
2268 | tp->thin_dupack = val; | ||
2269 | break; | ||
2270 | |||
2142 | case TCP_CORK: | 2271 | case TCP_CORK: |
2143 | /* When set indicates to always queue non-full frames. | 2272 | /* When set indicates to always queue non-full frames. |
2144 | * Later the user clears this option and we transmit | 2273 | * Later the user clears this option and we transmit |
@@ -2425,6 +2554,42 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
2425 | if (copy_to_user(optval, icsk->icsk_ca_ops->name, len)) | 2554 | if (copy_to_user(optval, icsk->icsk_ca_ops->name, len)) |
2426 | return -EFAULT; | 2555 | return -EFAULT; |
2427 | return 0; | 2556 | return 0; |
2557 | |||
2558 | case TCP_COOKIE_TRANSACTIONS: { | ||
2559 | struct tcp_cookie_transactions ctd; | ||
2560 | struct tcp_cookie_values *cvp = tp->cookie_values; | ||
2561 | |||
2562 | if (get_user(len, optlen)) | ||
2563 | return -EFAULT; | ||
2564 | if (len < sizeof(ctd)) | ||
2565 | return -EINVAL; | ||
2566 | |||
2567 | memset(&ctd, 0, sizeof(ctd)); | ||
2568 | ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ? | ||
2569 | TCP_COOKIE_IN_ALWAYS : 0) | ||
2570 | | (tp->rx_opt.cookie_out_never ? | ||
2571 | TCP_COOKIE_OUT_NEVER : 0); | ||
2572 | |||
2573 | if (cvp != NULL) { | ||
2574 | ctd.tcpct_flags |= (cvp->s_data_in ? | ||
2575 | TCP_S_DATA_IN : 0) | ||
2576 | | (cvp->s_data_out ? | ||
2577 | TCP_S_DATA_OUT : 0); | ||
2578 | |||
2579 | ctd.tcpct_cookie_desired = cvp->cookie_desired; | ||
2580 | ctd.tcpct_s_data_desired = cvp->s_data_desired; | ||
2581 | |||
2582 | memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0], | ||
2583 | cvp->cookie_pair_size); | ||
2584 | ctd.tcpct_used = cvp->cookie_pair_size; | ||
2585 | } | ||
2586 | |||
2587 | if (put_user(sizeof(ctd), optlen)) | ||
2588 | return -EFAULT; | ||
2589 | if (copy_to_user(optval, &ctd, sizeof(ctd))) | ||
2590 | return -EFAULT; | ||
2591 | return 0; | ||
2592 | } | ||
2428 | default: | 2593 | default: |
2429 | return -ENOPROTOOPT; | 2594 | return -ENOPROTOOPT; |
2430 | } | 2595 | } |
@@ -2662,10 +2827,10 @@ EXPORT_SYMBOL(tcp_gro_complete); | |||
2662 | 2827 | ||
2663 | #ifdef CONFIG_TCP_MD5SIG | 2828 | #ifdef CONFIG_TCP_MD5SIG |
2664 | static unsigned long tcp_md5sig_users; | 2829 | static unsigned long tcp_md5sig_users; |
2665 | static struct tcp_md5sig_pool **tcp_md5sig_pool; | 2830 | static struct tcp_md5sig_pool * __percpu *tcp_md5sig_pool; |
2666 | static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); | 2831 | static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); |
2667 | 2832 | ||
2668 | static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool) | 2833 | static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool) |
2669 | { | 2834 | { |
2670 | int cpu; | 2835 | int cpu; |
2671 | for_each_possible_cpu(cpu) { | 2836 | for_each_possible_cpu(cpu) { |
@@ -2674,7 +2839,6 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool) | |||
2674 | if (p->md5_desc.tfm) | 2839 | if (p->md5_desc.tfm) |
2675 | crypto_free_hash(p->md5_desc.tfm); | 2840 | crypto_free_hash(p->md5_desc.tfm); |
2676 | kfree(p); | 2841 | kfree(p); |
2677 | p = NULL; | ||
2678 | } | 2842 | } |
2679 | } | 2843 | } |
2680 | free_percpu(pool); | 2844 | free_percpu(pool); |
@@ -2682,7 +2846,7 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool) | |||
2682 | 2846 | ||
2683 | void tcp_free_md5sig_pool(void) | 2847 | void tcp_free_md5sig_pool(void) |
2684 | { | 2848 | { |
2685 | struct tcp_md5sig_pool **pool = NULL; | 2849 | struct tcp_md5sig_pool * __percpu *pool = NULL; |
2686 | 2850 | ||
2687 | spin_lock_bh(&tcp_md5sig_pool_lock); | 2851 | spin_lock_bh(&tcp_md5sig_pool_lock); |
2688 | if (--tcp_md5sig_users == 0) { | 2852 | if (--tcp_md5sig_users == 0) { |
@@ -2696,10 +2860,11 @@ void tcp_free_md5sig_pool(void) | |||
2696 | 2860 | ||
2697 | EXPORT_SYMBOL(tcp_free_md5sig_pool); | 2861 | EXPORT_SYMBOL(tcp_free_md5sig_pool); |
2698 | 2862 | ||
2699 | static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(struct sock *sk) | 2863 | static struct tcp_md5sig_pool * __percpu * |
2864 | __tcp_alloc_md5sig_pool(struct sock *sk) | ||
2700 | { | 2865 | { |
2701 | int cpu; | 2866 | int cpu; |
2702 | struct tcp_md5sig_pool **pool; | 2867 | struct tcp_md5sig_pool * __percpu *pool; |
2703 | 2868 | ||
2704 | pool = alloc_percpu(struct tcp_md5sig_pool *); | 2869 | pool = alloc_percpu(struct tcp_md5sig_pool *); |
2705 | if (!pool) | 2870 | if (!pool) |
@@ -2726,9 +2891,9 @@ out_free: | |||
2726 | return NULL; | 2891 | return NULL; |
2727 | } | 2892 | } |
2728 | 2893 | ||
2729 | struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *sk) | 2894 | struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *sk) |
2730 | { | 2895 | { |
2731 | struct tcp_md5sig_pool **pool; | 2896 | struct tcp_md5sig_pool * __percpu *pool; |
2732 | int alloc = 0; | 2897 | int alloc = 0; |
2733 | 2898 | ||
2734 | retry: | 2899 | retry: |
@@ -2747,7 +2912,9 @@ retry: | |||
2747 | 2912 | ||
2748 | if (alloc) { | 2913 | if (alloc) { |
2749 | /* we cannot hold spinlock here because this may sleep. */ | 2914 | /* we cannot hold spinlock here because this may sleep. */ |
2750 | struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(sk); | 2915 | struct tcp_md5sig_pool * __percpu *p; |
2916 | |||
2917 | p = __tcp_alloc_md5sig_pool(sk); | ||
2751 | spin_lock_bh(&tcp_md5sig_pool_lock); | 2918 | spin_lock_bh(&tcp_md5sig_pool_lock); |
2752 | if (!p) { | 2919 | if (!p) { |
2753 | tcp_md5sig_users--; | 2920 | tcp_md5sig_users--; |
@@ -2769,25 +2936,40 @@ retry: | |||
2769 | 2936 | ||
2770 | EXPORT_SYMBOL(tcp_alloc_md5sig_pool); | 2937 | EXPORT_SYMBOL(tcp_alloc_md5sig_pool); |
2771 | 2938 | ||
2772 | struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu) | 2939 | |
2940 | /** | ||
2941 | * tcp_get_md5sig_pool - get md5sig_pool for this user | ||
2942 | * | ||
2943 | * We use percpu structure, so if we succeed, we exit with preemption | ||
2944 | * and BH disabled, to make sure another thread or softirq handling | ||
2945 | * wont try to get same context. | ||
2946 | */ | ||
2947 | struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) | ||
2773 | { | 2948 | { |
2774 | struct tcp_md5sig_pool **p; | 2949 | struct tcp_md5sig_pool * __percpu *p; |
2775 | spin_lock_bh(&tcp_md5sig_pool_lock); | 2950 | |
2951 | local_bh_disable(); | ||
2952 | |||
2953 | spin_lock(&tcp_md5sig_pool_lock); | ||
2776 | p = tcp_md5sig_pool; | 2954 | p = tcp_md5sig_pool; |
2777 | if (p) | 2955 | if (p) |
2778 | tcp_md5sig_users++; | 2956 | tcp_md5sig_users++; |
2779 | spin_unlock_bh(&tcp_md5sig_pool_lock); | 2957 | spin_unlock(&tcp_md5sig_pool_lock); |
2780 | return (p ? *per_cpu_ptr(p, cpu) : NULL); | ||
2781 | } | ||
2782 | 2958 | ||
2783 | EXPORT_SYMBOL(__tcp_get_md5sig_pool); | 2959 | if (p) |
2960 | return *per_cpu_ptr(p, smp_processor_id()); | ||
2961 | |||
2962 | local_bh_enable(); | ||
2963 | return NULL; | ||
2964 | } | ||
2965 | EXPORT_SYMBOL(tcp_get_md5sig_pool); | ||
2784 | 2966 | ||
2785 | void __tcp_put_md5sig_pool(void) | 2967 | void tcp_put_md5sig_pool(void) |
2786 | { | 2968 | { |
2969 | local_bh_enable(); | ||
2787 | tcp_free_md5sig_pool(); | 2970 | tcp_free_md5sig_pool(); |
2788 | } | 2971 | } |
2789 | 2972 | EXPORT_SYMBOL(tcp_put_md5sig_pool); | |
2790 | EXPORT_SYMBOL(__tcp_put_md5sig_pool); | ||
2791 | 2973 | ||
2792 | int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, | 2974 | int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, |
2793 | struct tcphdr *th) | 2975 | struct tcphdr *th) |
@@ -2847,6 +3029,135 @@ EXPORT_SYMBOL(tcp_md5_hash_key); | |||
2847 | 3029 | ||
2848 | #endif | 3030 | #endif |
2849 | 3031 | ||
3032 | /** | ||
3033 | * Each Responder maintains up to two secret values concurrently for | ||
3034 | * efficient secret rollover. Each secret value has 4 states: | ||
3035 | * | ||
3036 | * Generating. (tcp_secret_generating != tcp_secret_primary) | ||
3037 | * Generates new Responder-Cookies, but not yet used for primary | ||
3038 | * verification. This is a short-term state, typically lasting only | ||
3039 | * one round trip time (RTT). | ||
3040 | * | ||
3041 | * Primary. (tcp_secret_generating == tcp_secret_primary) | ||
3042 | * Used both for generation and primary verification. | ||
3043 | * | ||
3044 | * Retiring. (tcp_secret_retiring != tcp_secret_secondary) | ||
3045 | * Used for verification, until the first failure that can be | ||
3046 | * verified by the newer Generating secret. At that time, this | ||
3047 | * cookie's state is changed to Secondary, and the Generating | ||
3048 | * cookie's state is changed to Primary. This is a short-term state, | ||
3049 | * typically lasting only one round trip time (RTT). | ||
3050 | * | ||
3051 | * Secondary. (tcp_secret_retiring == tcp_secret_secondary) | ||
3052 | * Used for secondary verification, after primary verification | ||
3053 | * failures. This state lasts no more than twice the Maximum Segment | ||
3054 | * Lifetime (2MSL). Then, the secret is discarded. | ||
3055 | */ | ||
3056 | struct tcp_cookie_secret { | ||
3057 | /* The secret is divided into two parts. The digest part is the | ||
3058 | * equivalent of previously hashing a secret and saving the state, | ||
3059 | * and serves as an initialization vector (IV). The message part | ||
3060 | * serves as the trailing secret. | ||
3061 | */ | ||
3062 | u32 secrets[COOKIE_WORKSPACE_WORDS]; | ||
3063 | unsigned long expires; | ||
3064 | }; | ||
3065 | |||
3066 | #define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL) | ||
3067 | #define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2) | ||
3068 | #define TCP_SECRET_LIFE (HZ * 600) | ||
3069 | |||
3070 | static struct tcp_cookie_secret tcp_secret_one; | ||
3071 | static struct tcp_cookie_secret tcp_secret_two; | ||
3072 | |||
3073 | /* Essentially a circular list, without dynamic allocation. */ | ||
3074 | static struct tcp_cookie_secret *tcp_secret_generating; | ||
3075 | static struct tcp_cookie_secret *tcp_secret_primary; | ||
3076 | static struct tcp_cookie_secret *tcp_secret_retiring; | ||
3077 | static struct tcp_cookie_secret *tcp_secret_secondary; | ||
3078 | |||
3079 | static DEFINE_SPINLOCK(tcp_secret_locker); | ||
3080 | |||
3081 | /* Select a pseudo-random word in the cookie workspace. | ||
3082 | */ | ||
3083 | static inline u32 tcp_cookie_work(const u32 *ws, const int n) | ||
3084 | { | ||
3085 | return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])]; | ||
3086 | } | ||
3087 | |||
3088 | /* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed. | ||
3089 | * Called in softirq context. | ||
3090 | * Returns: 0 for success. | ||
3091 | */ | ||
3092 | int tcp_cookie_generator(u32 *bakery) | ||
3093 | { | ||
3094 | unsigned long jiffy = jiffies; | ||
3095 | |||
3096 | if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) { | ||
3097 | spin_lock_bh(&tcp_secret_locker); | ||
3098 | if (!time_after_eq(jiffy, tcp_secret_generating->expires)) { | ||
3099 | /* refreshed by another */ | ||
3100 | memcpy(bakery, | ||
3101 | &tcp_secret_generating->secrets[0], | ||
3102 | COOKIE_WORKSPACE_WORDS); | ||
3103 | } else { | ||
3104 | /* still needs refreshing */ | ||
3105 | get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS); | ||
3106 | |||
3107 | /* The first time, paranoia assumes that the | ||
3108 | * randomization function isn't as strong. But, | ||
3109 | * this secret initialization is delayed until | ||
3110 | * the last possible moment (packet arrival). | ||
3111 | * Although that time is observable, it is | ||
3112 | * unpredictably variable. Mash in the most | ||
3113 | * volatile clock bits available, and expire the | ||
3114 | * secret extra quickly. | ||
3115 | */ | ||
3116 | if (unlikely(tcp_secret_primary->expires == | ||
3117 | tcp_secret_secondary->expires)) { | ||
3118 | struct timespec tv; | ||
3119 | |||
3120 | getnstimeofday(&tv); | ||
3121 | bakery[COOKIE_DIGEST_WORDS+0] ^= | ||
3122 | (u32)tv.tv_nsec; | ||
3123 | |||
3124 | tcp_secret_secondary->expires = jiffy | ||
3125 | + TCP_SECRET_1MSL | ||
3126 | + (0x0f & tcp_cookie_work(bakery, 0)); | ||
3127 | } else { | ||
3128 | tcp_secret_secondary->expires = jiffy | ||
3129 | + TCP_SECRET_LIFE | ||
3130 | + (0xff & tcp_cookie_work(bakery, 1)); | ||
3131 | tcp_secret_primary->expires = jiffy | ||
3132 | + TCP_SECRET_2MSL | ||
3133 | + (0x1f & tcp_cookie_work(bakery, 2)); | ||
3134 | } | ||
3135 | memcpy(&tcp_secret_secondary->secrets[0], | ||
3136 | bakery, COOKIE_WORKSPACE_WORDS); | ||
3137 | |||
3138 | rcu_assign_pointer(tcp_secret_generating, | ||
3139 | tcp_secret_secondary); | ||
3140 | rcu_assign_pointer(tcp_secret_retiring, | ||
3141 | tcp_secret_primary); | ||
3142 | /* | ||
3143 | * Neither call_rcu() nor synchronize_rcu() needed. | ||
3144 | * Retiring data is not freed. It is replaced after | ||
3145 | * further (locked) pointer updates, and a quiet time | ||
3146 | * (minimum 1MSL, maximum LIFE - 2MSL). | ||
3147 | */ | ||
3148 | } | ||
3149 | spin_unlock_bh(&tcp_secret_locker); | ||
3150 | } else { | ||
3151 | rcu_read_lock_bh(); | ||
3152 | memcpy(bakery, | ||
3153 | &rcu_dereference(tcp_secret_generating)->secrets[0], | ||
3154 | COOKIE_WORKSPACE_WORDS); | ||
3155 | rcu_read_unlock_bh(); | ||
3156 | } | ||
3157 | return 0; | ||
3158 | } | ||
3159 | EXPORT_SYMBOL(tcp_cookie_generator); | ||
3160 | |||
2850 | void tcp_done(struct sock *sk) | 3161 | void tcp_done(struct sock *sk) |
2851 | { | 3162 | { |
2852 | if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) | 3163 | if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) |
@@ -2881,6 +3192,7 @@ void __init tcp_init(void) | |||
2881 | struct sk_buff *skb = NULL; | 3192 | struct sk_buff *skb = NULL; |
2882 | unsigned long nr_pages, limit; | 3193 | unsigned long nr_pages, limit; |
2883 | int order, i, max_share; | 3194 | int order, i, max_share; |
3195 | unsigned long jiffy = jiffies; | ||
2884 | 3196 | ||
2885 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); | 3197 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); |
2886 | 3198 | ||
@@ -2903,11 +3215,10 @@ void __init tcp_init(void) | |||
2903 | (totalram_pages >= 128 * 1024) ? | 3215 | (totalram_pages >= 128 * 1024) ? |
2904 | 13 : 15, | 3216 | 13 : 15, |
2905 | 0, | 3217 | 0, |
2906 | &tcp_hashinfo.ehash_size, | ||
2907 | NULL, | 3218 | NULL, |
3219 | &tcp_hashinfo.ehash_mask, | ||
2908 | thash_entries ? 0 : 512 * 1024); | 3220 | thash_entries ? 0 : 512 * 1024); |
2909 | tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; | 3221 | for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) { |
2910 | for (i = 0; i < tcp_hashinfo.ehash_size; i++) { | ||
2911 | INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); | 3222 | INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); |
2912 | INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i); | 3223 | INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i); |
2913 | } | 3224 | } |
@@ -2916,7 +3227,7 @@ void __init tcp_init(void) | |||
2916 | tcp_hashinfo.bhash = | 3227 | tcp_hashinfo.bhash = |
2917 | alloc_large_system_hash("TCP bind", | 3228 | alloc_large_system_hash("TCP bind", |
2918 | sizeof(struct inet_bind_hashbucket), | 3229 | sizeof(struct inet_bind_hashbucket), |
2919 | tcp_hashinfo.ehash_size, | 3230 | tcp_hashinfo.ehash_mask + 1, |
2920 | (totalram_pages >= 128 * 1024) ? | 3231 | (totalram_pages >= 128 * 1024) ? |
2921 | 13 : 15, | 3232 | 13 : 15, |
2922 | 0, | 3233 | 0, |
@@ -2971,10 +3282,19 @@ void __init tcp_init(void) | |||
2971 | sysctl_tcp_rmem[2] = max(87380, max_share); | 3282 | sysctl_tcp_rmem[2] = max(87380, max_share); |
2972 | 3283 | ||
2973 | printk(KERN_INFO "TCP: Hash tables configured " | 3284 | printk(KERN_INFO "TCP: Hash tables configured " |
2974 | "(established %d bind %d)\n", | 3285 | "(established %u bind %u)\n", |
2975 | tcp_hashinfo.ehash_size, tcp_hashinfo.bhash_size); | 3286 | tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); |
2976 | 3287 | ||
2977 | tcp_register_congestion_control(&tcp_reno); | 3288 | tcp_register_congestion_control(&tcp_reno); |
3289 | |||
3290 | memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets)); | ||
3291 | memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets)); | ||
3292 | tcp_secret_one.expires = jiffy; /* past due */ | ||
3293 | tcp_secret_two.expires = jiffy; /* past due */ | ||
3294 | tcp_secret_generating = &tcp_secret_one; | ||
3295 | tcp_secret_primary = &tcp_secret_one; | ||
3296 | tcp_secret_retiring = &tcp_secret_two; | ||
3297 | tcp_secret_secondary = &tcp_secret_two; | ||
2978 | } | 3298 | } |
2979 | 3299 | ||
2980 | EXPORT_SYMBOL(tcp_close); | 3300 | EXPORT_SYMBOL(tcp_close); |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 6428b342b164..0ec9bd0ae94f 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
11 | #include <linux/types.h> | 11 | #include <linux/types.h> |
12 | #include <linux/list.h> | 12 | #include <linux/list.h> |
13 | #include <linux/gfp.h> | ||
13 | #include <net/tcp.h> | 14 | #include <net/tcp.h> |
14 | 15 | ||
15 | int sysctl_tcp_max_ssthresh = 0; | 16 | int sysctl_tcp_max_ssthresh = 0; |
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index fcbcd4ff6c5f..939edb3b8e4d 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c | |||
@@ -27,7 +27,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, | |||
27 | r->idiag_rqueue = sk->sk_ack_backlog; | 27 | r->idiag_rqueue = sk->sk_ack_backlog; |
28 | r->idiag_wqueue = sk->sk_max_ack_backlog; | 28 | r->idiag_wqueue = sk->sk_max_ack_backlog; |
29 | } else { | 29 | } else { |
30 | r->idiag_rqueue = tp->rcv_nxt - tp->copied_seq; | 30 | r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); |
31 | r->idiag_wqueue = tp->write_seq - tp->snd_una; | 31 | r->idiag_wqueue = tp->write_seq - tp->snd_una; |
32 | } | 32 | } |
33 | if (info != NULL) | 33 | if (info != NULL) |
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 26d5c7fc7de5..7c94a4955416 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c | |||
@@ -92,8 +92,8 @@ static inline void measure_rtt(struct sock *sk, u32 srtt) | |||
92 | if (icsk->icsk_ca_state == TCP_CA_Open) { | 92 | if (icsk->icsk_ca_state == TCP_CA_Open) { |
93 | if (ca->maxRTT < ca->minRTT) | 93 | if (ca->maxRTT < ca->minRTT) |
94 | ca->maxRTT = ca->minRTT; | 94 | ca->maxRTT = ca->minRTT; |
95 | if (ca->maxRTT < srtt | 95 | if (ca->maxRTT < srtt && |
96 | && srtt <= ca->maxRTT + msecs_to_jiffies(20)) | 96 | srtt <= ca->maxRTT + msecs_to_jiffies(20)) |
97 | ca->maxRTT = srtt; | 97 | ca->maxRTT = srtt; |
98 | } | 98 | } |
99 | } | 99 | } |
@@ -123,9 +123,9 @@ static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, s32 rtt | |||
123 | 123 | ||
124 | ca->packetcount += pkts_acked; | 124 | ca->packetcount += pkts_acked; |
125 | 125 | ||
126 | if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1) | 126 | if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1) && |
127 | && now - ca->lasttime >= ca->minRTT | 127 | now - ca->lasttime >= ca->minRTT && |
128 | && ca->minRTT > 0) { | 128 | ca->minRTT > 0) { |
129 | __u32 cur_Bi = ca->packetcount * HZ / (now - ca->lasttime); | 129 | __u32 cur_Bi = ca->packetcount * HZ / (now - ca->lasttime); |
130 | 130 | ||
131 | if (htcp_ccount(ca) <= 3) { | 131 | if (htcp_ccount(ca) <= 3) { |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d86784be7ab3..f240f57b2199 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -62,6 +62,7 @@ | |||
62 | */ | 62 | */ |
63 | 63 | ||
64 | #include <linux/mm.h> | 64 | #include <linux/mm.h> |
65 | #include <linux/slab.h> | ||
65 | #include <linux/module.h> | 66 | #include <linux/module.h> |
66 | #include <linux/sysctl.h> | 67 | #include <linux/sysctl.h> |
67 | #include <linux/kernel.h> | 68 | #include <linux/kernel.h> |
@@ -89,6 +90,8 @@ int sysctl_tcp_frto __read_mostly = 2; | |||
89 | int sysctl_tcp_frto_response __read_mostly; | 90 | int sysctl_tcp_frto_response __read_mostly; |
90 | int sysctl_tcp_nometrics_save __read_mostly; | 91 | int sysctl_tcp_nometrics_save __read_mostly; |
91 | 92 | ||
93 | int sysctl_tcp_thin_dupack __read_mostly; | ||
94 | |||
92 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; | 95 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; |
93 | int sysctl_tcp_abc __read_mostly; | 96 | int sysctl_tcp_abc __read_mostly; |
94 | 97 | ||
@@ -140,7 +143,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) | |||
140 | * "len" is invariant segment length, including TCP header. | 143 | * "len" is invariant segment length, including TCP header. |
141 | */ | 144 | */ |
142 | len += skb->data - skb_transport_header(skb); | 145 | len += skb->data - skb_transport_header(skb); |
143 | if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) || | 146 | if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) || |
144 | /* If PSH is not set, packet should be | 147 | /* If PSH is not set, packet should be |
145 | * full sized, provided peer TCP is not badly broken. | 148 | * full sized, provided peer TCP is not badly broken. |
146 | * This observation (if it is correct 8)) allows | 149 | * This observation (if it is correct 8)) allows |
@@ -411,7 +414,7 @@ void tcp_initialize_rcv_mss(struct sock *sk) | |||
411 | unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); | 414 | unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); |
412 | 415 | ||
413 | hint = min(hint, tp->rcv_wnd / 2); | 416 | hint = min(hint, tp->rcv_wnd / 2); |
414 | hint = min(hint, TCP_MIN_RCVMSS); | 417 | hint = min(hint, TCP_MSS_DEFAULT); |
415 | hint = max(hint, TCP_MIN_MSS); | 418 | hint = max(hint, TCP_MIN_MSS); |
416 | 419 | ||
417 | inet_csk(sk)->icsk_ack.rcv_mss = hint; | 420 | inet_csk(sk)->icsk_ack.rcv_mss = hint; |
@@ -2300,7 +2303,7 @@ static inline int tcp_fackets_out(struct tcp_sock *tp) | |||
2300 | * they differ. Since neither occurs due to loss, TCP should really | 2303 | * they differ. Since neither occurs due to loss, TCP should really |
2301 | * ignore them. | 2304 | * ignore them. |
2302 | */ | 2305 | */ |
2303 | static inline int tcp_dupack_heurestics(struct tcp_sock *tp) | 2306 | static inline int tcp_dupack_heuristics(struct tcp_sock *tp) |
2304 | { | 2307 | { |
2305 | return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; | 2308 | return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; |
2306 | } | 2309 | } |
@@ -2425,7 +2428,7 @@ static int tcp_time_to_recover(struct sock *sk) | |||
2425 | return 1; | 2428 | return 1; |
2426 | 2429 | ||
2427 | /* Not-A-Trick#2 : Classic rule... */ | 2430 | /* Not-A-Trick#2 : Classic rule... */ |
2428 | if (tcp_dupack_heurestics(tp) > tp->reordering) | 2431 | if (tcp_dupack_heuristics(tp) > tp->reordering) |
2429 | return 1; | 2432 | return 1; |
2430 | 2433 | ||
2431 | /* Trick#3 : when we use RFC2988 timer restart, fast | 2434 | /* Trick#3 : when we use RFC2988 timer restart, fast |
@@ -2447,6 +2450,16 @@ static int tcp_time_to_recover(struct sock *sk) | |||
2447 | return 1; | 2450 | return 1; |
2448 | } | 2451 | } |
2449 | 2452 | ||
2453 | /* If a thin stream is detected, retransmit after first | ||
2454 | * received dupack. Employ only if SACK is supported in order | ||
2455 | * to avoid possible corner-case series of spurious retransmissions | ||
2456 | * Use only if there are no unsent data. | ||
2457 | */ | ||
2458 | if ((tp->thin_dupack || sysctl_tcp_thin_dupack) && | ||
2459 | tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 && | ||
2460 | tcp_is_sack(tp) && !tcp_send_head(sk)) | ||
2461 | return 1; | ||
2462 | |||
2450 | return 0; | 2463 | return 0; |
2451 | } | 2464 | } |
2452 | 2465 | ||
@@ -2499,6 +2512,9 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) | |||
2499 | int err; | 2512 | int err; |
2500 | unsigned int mss; | 2513 | unsigned int mss; |
2501 | 2514 | ||
2515 | if (packets == 0) | ||
2516 | return; | ||
2517 | |||
2502 | WARN_ON(packets > tp->packets_out); | 2518 | WARN_ON(packets > tp->packets_out); |
2503 | if (tp->lost_skb_hint) { | 2519 | if (tp->lost_skb_hint) { |
2504 | skb = tp->lost_skb_hint; | 2520 | skb = tp->lost_skb_hint; |
@@ -2717,6 +2733,35 @@ static void tcp_try_undo_dsack(struct sock *sk) | |||
2717 | } | 2733 | } |
2718 | } | 2734 | } |
2719 | 2735 | ||
2736 | /* We can clear retrans_stamp when there are no retransmissions in the | ||
2737 | * window. It would seem that it is trivially available for us in | ||
2738 | * tp->retrans_out, however, that kind of assumptions doesn't consider | ||
2739 | * what will happen if errors occur when sending retransmission for the | ||
2740 | * second time. ...It could the that such segment has only | ||
2741 | * TCPCB_EVER_RETRANS set at the present time. It seems that checking | ||
2742 | * the head skb is enough except for some reneging corner cases that | ||
2743 | * are not worth the effort. | ||
2744 | * | ||
2745 | * Main reason for all this complexity is the fact that connection dying | ||
2746 | * time now depends on the validity of the retrans_stamp, in particular, | ||
2747 | * that successive retransmissions of a segment must not advance | ||
2748 | * retrans_stamp under any conditions. | ||
2749 | */ | ||
2750 | static int tcp_any_retrans_done(struct sock *sk) | ||
2751 | { | ||
2752 | struct tcp_sock *tp = tcp_sk(sk); | ||
2753 | struct sk_buff *skb; | ||
2754 | |||
2755 | if (tp->retrans_out) | ||
2756 | return 1; | ||
2757 | |||
2758 | skb = tcp_write_queue_head(sk); | ||
2759 | if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS)) | ||
2760 | return 1; | ||
2761 | |||
2762 | return 0; | ||
2763 | } | ||
2764 | |||
2720 | /* Undo during fast recovery after partial ACK. */ | 2765 | /* Undo during fast recovery after partial ACK. */ |
2721 | 2766 | ||
2722 | static int tcp_try_undo_partial(struct sock *sk, int acked) | 2767 | static int tcp_try_undo_partial(struct sock *sk, int acked) |
@@ -2729,7 +2774,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked) | |||
2729 | /* Plain luck! Hole if filled with delayed | 2774 | /* Plain luck! Hole if filled with delayed |
2730 | * packet, rather than with a retransmit. | 2775 | * packet, rather than with a retransmit. |
2731 | */ | 2776 | */ |
2732 | if (tp->retrans_out == 0) | 2777 | if (!tcp_any_retrans_done(sk)) |
2733 | tp->retrans_stamp = 0; | 2778 | tp->retrans_stamp = 0; |
2734 | 2779 | ||
2735 | tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); | 2780 | tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); |
@@ -2788,7 +2833,7 @@ static void tcp_try_keep_open(struct sock *sk) | |||
2788 | struct tcp_sock *tp = tcp_sk(sk); | 2833 | struct tcp_sock *tp = tcp_sk(sk); |
2789 | int state = TCP_CA_Open; | 2834 | int state = TCP_CA_Open; |
2790 | 2835 | ||
2791 | if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker) | 2836 | if (tcp_left_out(tp) || tcp_any_retrans_done(sk) || tp->undo_marker) |
2792 | state = TCP_CA_Disorder; | 2837 | state = TCP_CA_Disorder; |
2793 | 2838 | ||
2794 | if (inet_csk(sk)->icsk_ca_state != state) { | 2839 | if (inet_csk(sk)->icsk_ca_state != state) { |
@@ -2803,7 +2848,7 @@ static void tcp_try_to_open(struct sock *sk, int flag) | |||
2803 | 2848 | ||
2804 | tcp_verify_left_out(tp); | 2849 | tcp_verify_left_out(tp); |
2805 | 2850 | ||
2806 | if (!tp->frto_counter && tp->retrans_out == 0) | 2851 | if (!tp->frto_counter && !tcp_any_retrans_done(sk)) |
2807 | tp->retrans_stamp = 0; | 2852 | tp->retrans_stamp = 0; |
2808 | 2853 | ||
2809 | if (flag & FLAG_ECE) | 2854 | if (flag & FLAG_ECE) |
@@ -3698,7 +3743,7 @@ old_ack: | |||
3698 | * the fast version below fails. | 3743 | * the fast version below fails. |
3699 | */ | 3744 | */ |
3700 | void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, | 3745 | void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, |
3701 | int estab) | 3746 | u8 **hvpp, int estab) |
3702 | { | 3747 | { |
3703 | unsigned char *ptr; | 3748 | unsigned char *ptr; |
3704 | struct tcphdr *th = tcp_hdr(skb); | 3749 | struct tcphdr *th = tcp_hdr(skb); |
@@ -3782,7 +3827,30 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, | |||
3782 | */ | 3827 | */ |
3783 | break; | 3828 | break; |
3784 | #endif | 3829 | #endif |
3785 | } | 3830 | case TCPOPT_COOKIE: |
3831 | /* This option is variable length. | ||
3832 | */ | ||
3833 | switch (opsize) { | ||
3834 | case TCPOLEN_COOKIE_BASE: | ||
3835 | /* not yet implemented */ | ||
3836 | break; | ||
3837 | case TCPOLEN_COOKIE_PAIR: | ||
3838 | /* not yet implemented */ | ||
3839 | break; | ||
3840 | case TCPOLEN_COOKIE_MIN+0: | ||
3841 | case TCPOLEN_COOKIE_MIN+2: | ||
3842 | case TCPOLEN_COOKIE_MIN+4: | ||
3843 | case TCPOLEN_COOKIE_MIN+6: | ||
3844 | case TCPOLEN_COOKIE_MAX: | ||
3845 | /* 16-bit multiple */ | ||
3846 | opt_rx->cookie_plus = opsize; | ||
3847 | *hvpp = ptr; | ||
3848 | default: | ||
3849 | /* ignore option */ | ||
3850 | break; | ||
3851 | }; | ||
3852 | break; | ||
3853 | }; | ||
3786 | 3854 | ||
3787 | ptr += opsize-2; | 3855 | ptr += opsize-2; |
3788 | length -= opsize; | 3856 | length -= opsize; |
@@ -3810,17 +3878,20 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) | |||
3810 | * If it is wrong it falls back on tcp_parse_options(). | 3878 | * If it is wrong it falls back on tcp_parse_options(). |
3811 | */ | 3879 | */ |
3812 | static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, | 3880 | static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, |
3813 | struct tcp_sock *tp) | 3881 | struct tcp_sock *tp, u8 **hvpp) |
3814 | { | 3882 | { |
3815 | if (th->doff == sizeof(struct tcphdr) >> 2) { | 3883 | /* In the spirit of fast parsing, compare doff directly to constant |
3884 | * values. Because equality is used, short doff can be ignored here. | ||
3885 | */ | ||
3886 | if (th->doff == (sizeof(*th) / 4)) { | ||
3816 | tp->rx_opt.saw_tstamp = 0; | 3887 | tp->rx_opt.saw_tstamp = 0; |
3817 | return 0; | 3888 | return 0; |
3818 | } else if (tp->rx_opt.tstamp_ok && | 3889 | } else if (tp->rx_opt.tstamp_ok && |
3819 | th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { | 3890 | th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) { |
3820 | if (tcp_parse_aligned_timestamp(tp, th)) | 3891 | if (tcp_parse_aligned_timestamp(tp, th)) |
3821 | return 1; | 3892 | return 1; |
3822 | } | 3893 | } |
3823 | tcp_parse_options(skb, &tp->rx_opt, 1); | 3894 | tcp_parse_options(skb, &tp->rx_opt, hvpp, 1); |
3824 | return 1; | 3895 | return 1; |
3825 | } | 3896 | } |
3826 | 3897 | ||
@@ -4845,11 +4916,11 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) | |||
4845 | struct tcp_sock *tp = tcp_sk(sk); | 4916 | struct tcp_sock *tp = tcp_sk(sk); |
4846 | 4917 | ||
4847 | /* More than one full frame received... */ | 4918 | /* More than one full frame received... */ |
4848 | if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss | 4919 | if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && |
4849 | /* ... and right edge of window advances far enough. | 4920 | /* ... and right edge of window advances far enough. |
4850 | * (tcp_recvmsg() will send ACK otherwise). Or... | 4921 | * (tcp_recvmsg() will send ACK otherwise). Or... |
4851 | */ | 4922 | */ |
4852 | && __tcp_select_window(sk) >= tp->rcv_wnd) || | 4923 | __tcp_select_window(sk) >= tp->rcv_wnd) || |
4853 | /* We ACK each frame or... */ | 4924 | /* We ACK each frame or... */ |
4854 | tcp_in_quickack_mode(sk) || | 4925 | tcp_in_quickack_mode(sk) || |
4855 | /* We have out of order data. */ | 4926 | /* We have out of order data. */ |
@@ -5070,10 +5141,12 @@ out: | |||
5070 | static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, | 5141 | static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, |
5071 | struct tcphdr *th, int syn_inerr) | 5142 | struct tcphdr *th, int syn_inerr) |
5072 | { | 5143 | { |
5144 | u8 *hash_location; | ||
5073 | struct tcp_sock *tp = tcp_sk(sk); | 5145 | struct tcp_sock *tp = tcp_sk(sk); |
5074 | 5146 | ||
5075 | /* RFC1323: H1. Apply PAWS check first. */ | 5147 | /* RFC1323: H1. Apply PAWS check first. */ |
5076 | if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && | 5148 | if (tcp_fast_parse_options(skb, th, tp, &hash_location) && |
5149 | tp->rx_opt.saw_tstamp && | ||
5077 | tcp_paws_discard(sk, skb)) { | 5150 | tcp_paws_discard(sk, skb)) { |
5078 | if (!th->rst) { | 5151 | if (!th->rst) { |
5079 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); | 5152 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); |
@@ -5361,11 +5434,13 @@ discard: | |||
5361 | static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | 5434 | static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, |
5362 | struct tcphdr *th, unsigned len) | 5435 | struct tcphdr *th, unsigned len) |
5363 | { | 5436 | { |
5364 | struct tcp_sock *tp = tcp_sk(sk); | 5437 | u8 *hash_location; |
5365 | struct inet_connection_sock *icsk = inet_csk(sk); | 5438 | struct inet_connection_sock *icsk = inet_csk(sk); |
5439 | struct tcp_sock *tp = tcp_sk(sk); | ||
5440 | struct tcp_cookie_values *cvp = tp->cookie_values; | ||
5366 | int saved_clamp = tp->rx_opt.mss_clamp; | 5441 | int saved_clamp = tp->rx_opt.mss_clamp; |
5367 | 5442 | ||
5368 | tcp_parse_options(skb, &tp->rx_opt, 0); | 5443 | tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0); |
5369 | 5444 | ||
5370 | if (th->ack) { | 5445 | if (th->ack) { |
5371 | /* rfc793: | 5446 | /* rfc793: |
@@ -5462,6 +5537,31 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
5462 | * Change state from SYN-SENT only after copied_seq | 5537 | * Change state from SYN-SENT only after copied_seq |
5463 | * is initialized. */ | 5538 | * is initialized. */ |
5464 | tp->copied_seq = tp->rcv_nxt; | 5539 | tp->copied_seq = tp->rcv_nxt; |
5540 | |||
5541 | if (cvp != NULL && | ||
5542 | cvp->cookie_pair_size > 0 && | ||
5543 | tp->rx_opt.cookie_plus > 0) { | ||
5544 | int cookie_size = tp->rx_opt.cookie_plus | ||
5545 | - TCPOLEN_COOKIE_BASE; | ||
5546 | int cookie_pair_size = cookie_size | ||
5547 | + cvp->cookie_desired; | ||
5548 | |||
5549 | /* A cookie extension option was sent and returned. | ||
5550 | * Note that each incoming SYNACK replaces the | ||
5551 | * Responder cookie. The initial exchange is most | ||
5552 | * fragile, as protection against spoofing relies | ||
5553 | * entirely upon the sequence and timestamp (above). | ||
5554 | * This replacement strategy allows the correct pair to | ||
5555 | * pass through, while any others will be filtered via | ||
5556 | * Responder verification later. | ||
5557 | */ | ||
5558 | if (sizeof(cvp->cookie_pair) >= cookie_pair_size) { | ||
5559 | memcpy(&cvp->cookie_pair[cvp->cookie_desired], | ||
5560 | hash_location, cookie_size); | ||
5561 | cvp->cookie_pair_size = cookie_pair_size; | ||
5562 | } | ||
5563 | } | ||
5564 | |||
5465 | smp_mb(); | 5565 | smp_mb(); |
5466 | tcp_set_state(sk, TCP_ESTABLISHED); | 5566 | tcp_set_state(sk, TCP_ESTABLISHED); |
5467 | 5567 | ||
@@ -5699,11 +5799,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5699 | 5799 | ||
5700 | /* tcp_ack considers this ACK as duplicate | 5800 | /* tcp_ack considers this ACK as duplicate |
5701 | * and does not calculate rtt. | 5801 | * and does not calculate rtt. |
5702 | * Fix it at least with timestamps. | 5802 | * Force it here. |
5703 | */ | 5803 | */ |
5704 | if (tp->rx_opt.saw_tstamp && | 5804 | tcp_ack_update_rtt(sk, 0, 0); |
5705 | tp->rx_opt.rcv_tsecr && !tp->srtt) | ||
5706 | tcp_ack_saw_tstamp(sk, 0); | ||
5707 | 5805 | ||
5708 | if (tp->rx_opt.tstamp_ok) | 5806 | if (tp->rx_opt.tstamp_ok) |
5709 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; | 5807 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7cda24b53f61..3c23e70885f4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -60,6 +60,7 @@ | |||
60 | #include <linux/jhash.h> | 60 | #include <linux/jhash.h> |
61 | #include <linux/init.h> | 61 | #include <linux/init.h> |
62 | #include <linux/times.h> | 62 | #include <linux/times.h> |
63 | #include <linux/slab.h> | ||
63 | 64 | ||
64 | #include <net/net_namespace.h> | 65 | #include <net/net_namespace.h> |
65 | #include <net/icmp.h> | 66 | #include <net/icmp.h> |
@@ -165,10 +166,10 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
165 | nexthop = inet->opt->faddr; | 166 | nexthop = inet->opt->faddr; |
166 | } | 167 | } |
167 | 168 | ||
168 | tmp = ip_route_connect(&rt, nexthop, inet->saddr, | 169 | tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, |
169 | RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, | 170 | RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, |
170 | IPPROTO_TCP, | 171 | IPPROTO_TCP, |
171 | inet->sport, usin->sin_port, sk, 1); | 172 | inet->inet_sport, usin->sin_port, sk, 1); |
172 | if (tmp < 0) { | 173 | if (tmp < 0) { |
173 | if (tmp == -ENETUNREACH) | 174 | if (tmp == -ENETUNREACH) |
174 | IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); | 175 | IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); |
@@ -183,11 +184,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
183 | if (!inet->opt || !inet->opt->srr) | 184 | if (!inet->opt || !inet->opt->srr) |
184 | daddr = rt->rt_dst; | 185 | daddr = rt->rt_dst; |
185 | 186 | ||
186 | if (!inet->saddr) | 187 | if (!inet->inet_saddr) |
187 | inet->saddr = rt->rt_src; | 188 | inet->inet_saddr = rt->rt_src; |
188 | inet->rcv_saddr = inet->saddr; | 189 | inet->inet_rcv_saddr = inet->inet_saddr; |
189 | 190 | ||
190 | if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) { | 191 | if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { |
191 | /* Reset inherited state */ | 192 | /* Reset inherited state */ |
192 | tp->rx_opt.ts_recent = 0; | 193 | tp->rx_opt.ts_recent = 0; |
193 | tp->rx_opt.ts_recent_stamp = 0; | 194 | tp->rx_opt.ts_recent_stamp = 0; |
@@ -204,20 +205,20 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
204 | * when trying new connection. | 205 | * when trying new connection. |
205 | */ | 206 | */ |
206 | if (peer != NULL && | 207 | if (peer != NULL && |
207 | peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) { | 208 | (u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { |
208 | tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; | 209 | tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; |
209 | tp->rx_opt.ts_recent = peer->tcp_ts; | 210 | tp->rx_opt.ts_recent = peer->tcp_ts; |
210 | } | 211 | } |
211 | } | 212 | } |
212 | 213 | ||
213 | inet->dport = usin->sin_port; | 214 | inet->inet_dport = usin->sin_port; |
214 | inet->daddr = daddr; | 215 | inet->inet_daddr = daddr; |
215 | 216 | ||
216 | inet_csk(sk)->icsk_ext_hdr_len = 0; | 217 | inet_csk(sk)->icsk_ext_hdr_len = 0; |
217 | if (inet->opt) | 218 | if (inet->opt) |
218 | inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; | 219 | inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; |
219 | 220 | ||
220 | tp->rx_opt.mss_clamp = 536; | 221 | tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; |
221 | 222 | ||
222 | /* Socket identity is still unknown (sport may be zero). | 223 | /* Socket identity is still unknown (sport may be zero). |
223 | * However we set state to SYN-SENT and not releasing socket | 224 | * However we set state to SYN-SENT and not releasing socket |
@@ -230,7 +231,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
230 | goto failure; | 231 | goto failure; |
231 | 232 | ||
232 | err = ip_route_newports(&rt, IPPROTO_TCP, | 233 | err = ip_route_newports(&rt, IPPROTO_TCP, |
233 | inet->sport, inet->dport, sk); | 234 | inet->inet_sport, inet->inet_dport, sk); |
234 | if (err) | 235 | if (err) |
235 | goto failure; | 236 | goto failure; |
236 | 237 | ||
@@ -239,12 +240,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
239 | sk_setup_caps(sk, &rt->u.dst); | 240 | sk_setup_caps(sk, &rt->u.dst); |
240 | 241 | ||
241 | if (!tp->write_seq) | 242 | if (!tp->write_seq) |
242 | tp->write_seq = secure_tcp_sequence_number(inet->saddr, | 243 | tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, |
243 | inet->daddr, | 244 | inet->inet_daddr, |
244 | inet->sport, | 245 | inet->inet_sport, |
245 | usin->sin_port); | 246 | usin->sin_port); |
246 | 247 | ||
247 | inet->id = tp->write_seq ^ jiffies; | 248 | inet->inet_id = tp->write_seq ^ jiffies; |
248 | 249 | ||
249 | err = tcp_connect(sk); | 250 | err = tcp_connect(sk); |
250 | rt = NULL; | 251 | rt = NULL; |
@@ -261,7 +262,7 @@ failure: | |||
261 | tcp_set_state(sk, TCP_CLOSE); | 262 | tcp_set_state(sk, TCP_CLOSE); |
262 | ip_rt_put(rt); | 263 | ip_rt_put(rt); |
263 | sk->sk_route_caps = 0; | 264 | sk->sk_route_caps = 0; |
264 | inet->dport = 0; | 265 | inet->inet_dport = 0; |
265 | return err; | 266 | return err; |
266 | } | 267 | } |
267 | 268 | ||
@@ -370,6 +371,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
370 | if (sk->sk_state == TCP_CLOSE) | 371 | if (sk->sk_state == TCP_CLOSE) |
371 | goto out; | 372 | goto out; |
372 | 373 | ||
374 | if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { | ||
375 | NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); | ||
376 | goto out; | ||
377 | } | ||
378 | |||
373 | icsk = inet_csk(sk); | 379 | icsk = inet_csk(sk); |
374 | tp = tcp_sk(sk); | 380 | tp = tcp_sk(sk); |
375 | seq = ntohl(th->seq); | 381 | seq = ntohl(th->seq); |
@@ -520,12 +526,13 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) | |||
520 | struct tcphdr *th = tcp_hdr(skb); | 526 | struct tcphdr *th = tcp_hdr(skb); |
521 | 527 | ||
522 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | 528 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
523 | th->check = ~tcp_v4_check(len, inet->saddr, | 529 | th->check = ~tcp_v4_check(len, inet->inet_saddr, |
524 | inet->daddr, 0); | 530 | inet->inet_daddr, 0); |
525 | skb->csum_start = skb_transport_header(skb) - skb->head; | 531 | skb->csum_start = skb_transport_header(skb) - skb->head; |
526 | skb->csum_offset = offsetof(struct tcphdr, check); | 532 | skb->csum_offset = offsetof(struct tcphdr, check); |
527 | } else { | 533 | } else { |
528 | th->check = tcp_v4_check(len, inet->saddr, inet->daddr, | 534 | th->check = tcp_v4_check(len, inet->inet_saddr, |
535 | inet->inet_daddr, | ||
529 | csum_partial(th, | 536 | csum_partial(th, |
530 | th->doff << 2, | 537 | th->doff << 2, |
531 | skb->csum)); | 538 | skb->csum)); |
@@ -741,8 +748,9 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, | |||
741 | * This still operates on a request_sock only, not on a big | 748 | * This still operates on a request_sock only, not on a big |
742 | * socket. | 749 | * socket. |
743 | */ | 750 | */ |
744 | static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, | 751 | static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, |
745 | struct dst_entry *dst) | 752 | struct request_sock *req, |
753 | struct request_values *rvp) | ||
746 | { | 754 | { |
747 | const struct inet_request_sock *ireq = inet_rsk(req); | 755 | const struct inet_request_sock *ireq = inet_rsk(req); |
748 | int err = -1; | 756 | int err = -1; |
@@ -752,7 +760,7 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, | |||
752 | if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) | 760 | if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) |
753 | return -1; | 761 | return -1; |
754 | 762 | ||
755 | skb = tcp_make_synack(sk, dst, req); | 763 | skb = tcp_make_synack(sk, dst, req, rvp); |
756 | 764 | ||
757 | if (skb) { | 765 | if (skb) { |
758 | struct tcphdr *th = tcp_hdr(skb); | 766 | struct tcphdr *th = tcp_hdr(skb); |
@@ -773,9 +781,11 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, | |||
773 | return err; | 781 | return err; |
774 | } | 782 | } |
775 | 783 | ||
776 | static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req) | 784 | static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, |
785 | struct request_values *rvp) | ||
777 | { | 786 | { |
778 | return __tcp_v4_send_synack(sk, req, NULL); | 787 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); |
788 | return tcp_v4_send_synack(sk, NULL, req, rvp); | ||
779 | } | 789 | } |
780 | 790 | ||
781 | /* | 791 | /* |
@@ -848,7 +858,7 @@ static struct tcp_md5sig_key * | |||
848 | struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, | 858 | struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, |
849 | struct sock *addr_sk) | 859 | struct sock *addr_sk) |
850 | { | 860 | { |
851 | return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr); | 861 | return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr); |
852 | } | 862 | } |
853 | 863 | ||
854 | EXPORT_SYMBOL(tcp_v4_md5_lookup); | 864 | EXPORT_SYMBOL(tcp_v4_md5_lookup); |
@@ -923,7 +933,7 @@ EXPORT_SYMBOL(tcp_v4_md5_do_add); | |||
923 | static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, | 933 | static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, |
924 | u8 *newkey, u8 newkeylen) | 934 | u8 *newkey, u8 newkeylen) |
925 | { | 935 | { |
926 | return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr, | 936 | return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr, |
927 | newkey, newkeylen); | 937 | newkey, newkeylen); |
928 | } | 938 | } |
929 | 939 | ||
@@ -1089,8 +1099,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, | |||
1089 | __be32 saddr, daddr; | 1099 | __be32 saddr, daddr; |
1090 | 1100 | ||
1091 | if (sk) { | 1101 | if (sk) { |
1092 | saddr = inet_sk(sk)->saddr; | 1102 | saddr = inet_sk(sk)->inet_saddr; |
1093 | daddr = inet_sk(sk)->daddr; | 1103 | daddr = inet_sk(sk)->inet_daddr; |
1094 | } else if (req) { | 1104 | } else if (req) { |
1095 | saddr = inet_rsk(req)->loc_addr; | 1105 | saddr = inet_rsk(req)->loc_addr; |
1096 | daddr = inet_rsk(req)->rmt_addr; | 1106 | daddr = inet_rsk(req)->rmt_addr; |
@@ -1189,10 +1199,11 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) | |||
1189 | struct request_sock_ops tcp_request_sock_ops __read_mostly = { | 1199 | struct request_sock_ops tcp_request_sock_ops __read_mostly = { |
1190 | .family = PF_INET, | 1200 | .family = PF_INET, |
1191 | .obj_size = sizeof(struct tcp_request_sock), | 1201 | .obj_size = sizeof(struct tcp_request_sock), |
1192 | .rtx_syn_ack = tcp_v4_send_synack, | 1202 | .rtx_syn_ack = tcp_v4_rtx_synack, |
1193 | .send_ack = tcp_v4_reqsk_send_ack, | 1203 | .send_ack = tcp_v4_reqsk_send_ack, |
1194 | .destructor = tcp_v4_reqsk_destructor, | 1204 | .destructor = tcp_v4_reqsk_destructor, |
1195 | .send_reset = tcp_v4_send_reset, | 1205 | .send_reset = tcp_v4_send_reset, |
1206 | .syn_ack_timeout = tcp_syn_ack_timeout, | ||
1196 | }; | 1207 | }; |
1197 | 1208 | ||
1198 | #ifdef CONFIG_TCP_MD5SIG | 1209 | #ifdef CONFIG_TCP_MD5SIG |
@@ -1210,13 +1221,16 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = { | |||
1210 | 1221 | ||
1211 | int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | 1222 | int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) |
1212 | { | 1223 | { |
1213 | struct inet_request_sock *ireq; | 1224 | struct tcp_extend_values tmp_ext; |
1214 | struct tcp_options_received tmp_opt; | 1225 | struct tcp_options_received tmp_opt; |
1226 | u8 *hash_location; | ||
1215 | struct request_sock *req; | 1227 | struct request_sock *req; |
1228 | struct inet_request_sock *ireq; | ||
1229 | struct tcp_sock *tp = tcp_sk(sk); | ||
1230 | struct dst_entry *dst = NULL; | ||
1216 | __be32 saddr = ip_hdr(skb)->saddr; | 1231 | __be32 saddr = ip_hdr(skb)->saddr; |
1217 | __be32 daddr = ip_hdr(skb)->daddr; | 1232 | __be32 daddr = ip_hdr(skb)->daddr; |
1218 | __u32 isn = TCP_SKB_CB(skb)->when; | 1233 | __u32 isn = TCP_SKB_CB(skb)->when; |
1219 | struct dst_entry *dst = NULL; | ||
1220 | #ifdef CONFIG_SYN_COOKIES | 1234 | #ifdef CONFIG_SYN_COOKIES |
1221 | int want_cookie = 0; | 1235 | int want_cookie = 0; |
1222 | #else | 1236 | #else |
@@ -1257,16 +1271,50 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1257 | #endif | 1271 | #endif |
1258 | 1272 | ||
1259 | tcp_clear_options(&tmp_opt); | 1273 | tcp_clear_options(&tmp_opt); |
1260 | tmp_opt.mss_clamp = 536; | 1274 | tmp_opt.mss_clamp = TCP_MSS_DEFAULT; |
1261 | tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss; | 1275 | tmp_opt.user_mss = tp->rx_opt.user_mss; |
1276 | tcp_parse_options(skb, &tmp_opt, &hash_location, 0); | ||
1277 | |||
1278 | if (tmp_opt.cookie_plus > 0 && | ||
1279 | tmp_opt.saw_tstamp && | ||
1280 | !tp->rx_opt.cookie_out_never && | ||
1281 | (sysctl_tcp_cookie_size > 0 || | ||
1282 | (tp->cookie_values != NULL && | ||
1283 | tp->cookie_values->cookie_desired > 0))) { | ||
1284 | u8 *c; | ||
1285 | u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; | ||
1286 | int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; | ||
1287 | |||
1288 | if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) | ||
1289 | goto drop_and_release; | ||
1290 | |||
1291 | /* Secret recipe starts with IP addresses */ | ||
1292 | *mess++ ^= daddr; | ||
1293 | *mess++ ^= saddr; | ||
1262 | 1294 | ||
1263 | tcp_parse_options(skb, &tmp_opt, 0); | 1295 | /* plus variable length Initiator Cookie */ |
1296 | c = (u8 *)mess; | ||
1297 | while (l-- > 0) | ||
1298 | *c++ ^= *hash_location++; | ||
1299 | |||
1300 | #ifdef CONFIG_SYN_COOKIES | ||
1301 | want_cookie = 0; /* not our kind of cookie */ | ||
1302 | #endif | ||
1303 | tmp_ext.cookie_out_never = 0; /* false */ | ||
1304 | tmp_ext.cookie_plus = tmp_opt.cookie_plus; | ||
1305 | } else if (!tp->rx_opt.cookie_in_always) { | ||
1306 | /* redundant indications, but ensure initialization. */ | ||
1307 | tmp_ext.cookie_out_never = 1; /* true */ | ||
1308 | tmp_ext.cookie_plus = 0; | ||
1309 | } else { | ||
1310 | goto drop_and_release; | ||
1311 | } | ||
1312 | tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; | ||
1264 | 1313 | ||
1265 | if (want_cookie && !tmp_opt.saw_tstamp) | 1314 | if (want_cookie && !tmp_opt.saw_tstamp) |
1266 | tcp_clear_options(&tmp_opt); | 1315 | tcp_clear_options(&tmp_opt); |
1267 | 1316 | ||
1268 | tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; | 1317 | tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; |
1269 | |||
1270 | tcp_openreq_init(req, &tmp_opt, skb); | 1318 | tcp_openreq_init(req, &tmp_opt, skb); |
1271 | 1319 | ||
1272 | ireq = inet_rsk(req); | 1320 | ireq = inet_rsk(req); |
@@ -1304,7 +1352,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1304 | (dst = inet_csk_route_req(sk, req)) != NULL && | 1352 | (dst = inet_csk_route_req(sk, req)) != NULL && |
1305 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && | 1353 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && |
1306 | peer->v4daddr == saddr) { | 1354 | peer->v4daddr == saddr) { |
1307 | if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL && | 1355 | if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && |
1308 | (s32)(peer->tcp_ts - req->ts_recent) > | 1356 | (s32)(peer->tcp_ts - req->ts_recent) > |
1309 | TCP_PAWS_WINDOW) { | 1357 | TCP_PAWS_WINDOW) { |
1310 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); | 1358 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); |
@@ -1333,7 +1381,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1333 | } | 1381 | } |
1334 | tcp_rsk(req)->snt_isn = isn; | 1382 | tcp_rsk(req)->snt_isn = isn; |
1335 | 1383 | ||
1336 | if (__tcp_v4_send_synack(sk, req, dst) || want_cookie) | 1384 | if (tcp_v4_send_synack(sk, dst, req, |
1385 | (struct request_values *)&tmp_ext) || | ||
1386 | want_cookie) | ||
1337 | goto drop_and_free; | 1387 | goto drop_and_free; |
1338 | 1388 | ||
1339 | inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); | 1389 | inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); |
@@ -1380,9 +1430,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1380 | newtp = tcp_sk(newsk); | 1430 | newtp = tcp_sk(newsk); |
1381 | newinet = inet_sk(newsk); | 1431 | newinet = inet_sk(newsk); |
1382 | ireq = inet_rsk(req); | 1432 | ireq = inet_rsk(req); |
1383 | newinet->daddr = ireq->rmt_addr; | 1433 | newinet->inet_daddr = ireq->rmt_addr; |
1384 | newinet->rcv_saddr = ireq->loc_addr; | 1434 | newinet->inet_rcv_saddr = ireq->loc_addr; |
1385 | newinet->saddr = ireq->loc_addr; | 1435 | newinet->inet_saddr = ireq->loc_addr; |
1386 | newinet->opt = ireq->opt; | 1436 | newinet->opt = ireq->opt; |
1387 | ireq->opt = NULL; | 1437 | ireq->opt = NULL; |
1388 | newinet->mc_index = inet_iif(skb); | 1438 | newinet->mc_index = inet_iif(skb); |
@@ -1390,7 +1440,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1390 | inet_csk(newsk)->icsk_ext_hdr_len = 0; | 1440 | inet_csk(newsk)->icsk_ext_hdr_len = 0; |
1391 | if (newinet->opt) | 1441 | if (newinet->opt) |
1392 | inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; | 1442 | inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; |
1393 | newinet->id = newtp->write_seq ^ jiffies; | 1443 | newinet->inet_id = newtp->write_seq ^ jiffies; |
1394 | 1444 | ||
1395 | tcp_mtup_init(newsk); | 1445 | tcp_mtup_init(newsk); |
1396 | tcp_sync_mss(newsk, dst_mtu(dst)); | 1446 | tcp_sync_mss(newsk, dst_mtu(dst)); |
@@ -1403,7 +1453,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1403 | 1453 | ||
1404 | #ifdef CONFIG_TCP_MD5SIG | 1454 | #ifdef CONFIG_TCP_MD5SIG |
1405 | /* Copy over the MD5 key from the original socket */ | 1455 | /* Copy over the MD5 key from the original socket */ |
1406 | if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) { | 1456 | key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr); |
1457 | if (key != NULL) { | ||
1407 | /* | 1458 | /* |
1408 | * We're using one, so create a matching key | 1459 | * We're using one, so create a matching key |
1409 | * on the newsk structure. If we fail to get | 1460 | * on the newsk structure. If we fail to get |
@@ -1412,13 +1463,13 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1412 | */ | 1463 | */ |
1413 | char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); | 1464 | char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); |
1414 | if (newkey != NULL) | 1465 | if (newkey != NULL) |
1415 | tcp_v4_md5_do_add(newsk, newinet->daddr, | 1466 | tcp_v4_md5_do_add(newsk, newinet->inet_daddr, |
1416 | newkey, key->keylen); | 1467 | newkey, key->keylen); |
1417 | newsk->sk_route_caps &= ~NETIF_F_GSO_MASK; | 1468 | newsk->sk_route_caps &= ~NETIF_F_GSO_MASK; |
1418 | } | 1469 | } |
1419 | #endif | 1470 | #endif |
1420 | 1471 | ||
1421 | __inet_hash_nolisten(newsk); | 1472 | __inet_hash_nolisten(newsk, NULL); |
1422 | __inet_inherit_port(sk, newsk); | 1473 | __inet_inherit_port(sk, newsk); |
1423 | 1474 | ||
1424 | return newsk; | 1475 | return newsk; |
@@ -1610,6 +1661,11 @@ process: | |||
1610 | if (sk->sk_state == TCP_TIME_WAIT) | 1661 | if (sk->sk_state == TCP_TIME_WAIT) |
1611 | goto do_time_wait; | 1662 | goto do_time_wait; |
1612 | 1663 | ||
1664 | if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { | ||
1665 | NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); | ||
1666 | goto discard_and_relse; | ||
1667 | } | ||
1668 | |||
1613 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) | 1669 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) |
1614 | goto discard_and_relse; | 1670 | goto discard_and_relse; |
1615 | nf_reset(skb); | 1671 | nf_reset(skb); |
@@ -1634,8 +1690,11 @@ process: | |||
1634 | if (!tcp_prequeue(sk, skb)) | 1690 | if (!tcp_prequeue(sk, skb)) |
1635 | ret = tcp_v4_do_rcv(sk, skb); | 1691 | ret = tcp_v4_do_rcv(sk, skb); |
1636 | } | 1692 | } |
1637 | } else | 1693 | } else if (unlikely(sk_add_backlog(sk, skb))) { |
1638 | sk_add_backlog(sk, skb); | 1694 | bh_unlock_sock(sk); |
1695 | NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); | ||
1696 | goto discard_and_relse; | ||
1697 | } | ||
1639 | bh_unlock_sock(sk); | 1698 | bh_unlock_sock(sk); |
1640 | 1699 | ||
1641 | sock_put(sk); | 1700 | sock_put(sk); |
@@ -1711,8 +1770,8 @@ int tcp_v4_remember_stamp(struct sock *sk) | |||
1711 | struct inet_peer *peer = NULL; | 1770 | struct inet_peer *peer = NULL; |
1712 | int release_it = 0; | 1771 | int release_it = 0; |
1713 | 1772 | ||
1714 | if (!rt || rt->rt_dst != inet->daddr) { | 1773 | if (!rt || rt->rt_dst != inet->inet_daddr) { |
1715 | peer = inet_getpeer(inet->daddr, 1); | 1774 | peer = inet_getpeer(inet->inet_daddr, 1); |
1716 | release_it = 1; | 1775 | release_it = 1; |
1717 | } else { | 1776 | } else { |
1718 | if (!rt->peer) | 1777 | if (!rt->peer) |
@@ -1722,9 +1781,9 @@ int tcp_v4_remember_stamp(struct sock *sk) | |||
1722 | 1781 | ||
1723 | if (peer) { | 1782 | if (peer) { |
1724 | if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || | 1783 | if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || |
1725 | (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && | 1784 | ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && |
1726 | peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) { | 1785 | peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { |
1727 | peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp; | 1786 | peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; |
1728 | peer->tcp_ts = tp->rx_opt.ts_recent; | 1787 | peer->tcp_ts = tp->rx_opt.ts_recent; |
1729 | } | 1788 | } |
1730 | if (release_it) | 1789 | if (release_it) |
@@ -1743,9 +1802,9 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) | |||
1743 | const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); | 1802 | const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); |
1744 | 1803 | ||
1745 | if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || | 1804 | if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || |
1746 | (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && | 1805 | ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && |
1747 | peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) { | 1806 | peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { |
1748 | peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp; | 1807 | peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; |
1749 | peer->tcp_ts = tcptw->tw_ts_recent; | 1808 | peer->tcp_ts = tcptw->tw_ts_recent; |
1750 | } | 1809 | } |
1751 | inet_putpeer(peer); | 1810 | inet_putpeer(peer); |
@@ -1810,7 +1869,7 @@ static int tcp_v4_init_sock(struct sock *sk) | |||
1810 | */ | 1869 | */ |
1811 | tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; | 1870 | tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; |
1812 | tp->snd_cwnd_clamp = ~0; | 1871 | tp->snd_cwnd_clamp = ~0; |
1813 | tp->mss_cache = 536; | 1872 | tp->mss_cache = TCP_MSS_DEFAULT; |
1814 | 1873 | ||
1815 | tp->reordering = sysctl_tcp_reordering; | 1874 | tp->reordering = sysctl_tcp_reordering; |
1816 | icsk->icsk_ca_ops = &tcp_init_congestion_ops; | 1875 | icsk->icsk_ca_ops = &tcp_init_congestion_ops; |
@@ -1826,6 +1885,19 @@ static int tcp_v4_init_sock(struct sock *sk) | |||
1826 | tp->af_specific = &tcp_sock_ipv4_specific; | 1885 | tp->af_specific = &tcp_sock_ipv4_specific; |
1827 | #endif | 1886 | #endif |
1828 | 1887 | ||
1888 | /* TCP Cookie Transactions */ | ||
1889 | if (sysctl_tcp_cookie_size > 0) { | ||
1890 | /* Default, cookies without s_data_payload. */ | ||
1891 | tp->cookie_values = | ||
1892 | kzalloc(sizeof(*tp->cookie_values), | ||
1893 | sk->sk_allocation); | ||
1894 | if (tp->cookie_values != NULL) | ||
1895 | kref_init(&tp->cookie_values->kref); | ||
1896 | } | ||
1897 | /* Presumed zeroed, in order of appearance: | ||
1898 | * cookie_in_always, cookie_out_never, | ||
1899 | * s_data_constant, s_data_in, s_data_out | ||
1900 | */ | ||
1829 | sk->sk_sndbuf = sysctl_tcp_wmem[1]; | 1901 | sk->sk_sndbuf = sysctl_tcp_wmem[1]; |
1830 | sk->sk_rcvbuf = sysctl_tcp_rmem[1]; | 1902 | sk->sk_rcvbuf = sysctl_tcp_rmem[1]; |
1831 | 1903 | ||
@@ -1879,6 +1951,13 @@ void tcp_v4_destroy_sock(struct sock *sk) | |||
1879 | sk->sk_sndmsg_page = NULL; | 1951 | sk->sk_sndmsg_page = NULL; |
1880 | } | 1952 | } |
1881 | 1953 | ||
1954 | /* TCP Cookie Transactions */ | ||
1955 | if (tp->cookie_values != NULL) { | ||
1956 | kref_put(&tp->cookie_values->kref, | ||
1957 | tcp_cookie_values_release); | ||
1958 | tp->cookie_values = NULL; | ||
1959 | } | ||
1960 | |||
1882 | percpu_counter_dec(&tcp_sockets_allocated); | 1961 | percpu_counter_dec(&tcp_sockets_allocated); |
1883 | } | 1962 | } |
1884 | 1963 | ||
@@ -2000,7 +2079,7 @@ static void *established_get_first(struct seq_file *seq) | |||
2000 | struct net *net = seq_file_net(seq); | 2079 | struct net *net = seq_file_net(seq); |
2001 | void *rc = NULL; | 2080 | void *rc = NULL; |
2002 | 2081 | ||
2003 | for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { | 2082 | for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { |
2004 | struct sock *sk; | 2083 | struct sock *sk; |
2005 | struct hlist_nulls_node *node; | 2084 | struct hlist_nulls_node *node; |
2006 | struct inet_timewait_sock *tw; | 2085 | struct inet_timewait_sock *tw; |
@@ -2061,10 +2140,10 @@ get_tw: | |||
2061 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 2140 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
2062 | 2141 | ||
2063 | /* Look for next non empty bucket */ | 2142 | /* Look for next non empty bucket */ |
2064 | while (++st->bucket < tcp_hashinfo.ehash_size && | 2143 | while (++st->bucket <= tcp_hashinfo.ehash_mask && |
2065 | empty_bucket(st)) | 2144 | empty_bucket(st)) |
2066 | ; | 2145 | ; |
2067 | if (st->bucket >= tcp_hashinfo.ehash_size) | 2146 | if (st->bucket > tcp_hashinfo.ehash_mask) |
2068 | return NULL; | 2147 | return NULL; |
2069 | 2148 | ||
2070 | spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); | 2149 | spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); |
@@ -2225,7 +2304,7 @@ static void get_openreq4(struct sock *sk, struct request_sock *req, | |||
2225 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n", | 2304 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n", |
2226 | i, | 2305 | i, |
2227 | ireq->loc_addr, | 2306 | ireq->loc_addr, |
2228 | ntohs(inet_sk(sk)->sport), | 2307 | ntohs(inet_sk(sk)->inet_sport), |
2229 | ireq->rmt_addr, | 2308 | ireq->rmt_addr, |
2230 | ntohs(ireq->rmt_port), | 2309 | ntohs(ireq->rmt_port), |
2231 | TCP_SYN_RECV, | 2310 | TCP_SYN_RECV, |
@@ -2248,10 +2327,11 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) | |||
2248 | struct tcp_sock *tp = tcp_sk(sk); | 2327 | struct tcp_sock *tp = tcp_sk(sk); |
2249 | const struct inet_connection_sock *icsk = inet_csk(sk); | 2328 | const struct inet_connection_sock *icsk = inet_csk(sk); |
2250 | struct inet_sock *inet = inet_sk(sk); | 2329 | struct inet_sock *inet = inet_sk(sk); |
2251 | __be32 dest = inet->daddr; | 2330 | __be32 dest = inet->inet_daddr; |
2252 | __be32 src = inet->rcv_saddr; | 2331 | __be32 src = inet->inet_rcv_saddr; |
2253 | __u16 destp = ntohs(inet->dport); | 2332 | __u16 destp = ntohs(inet->inet_dport); |
2254 | __u16 srcp = ntohs(inet->sport); | 2333 | __u16 srcp = ntohs(inet->inet_sport); |
2334 | int rx_queue; | ||
2255 | 2335 | ||
2256 | if (icsk->icsk_pending == ICSK_TIME_RETRANS) { | 2336 | if (icsk->icsk_pending == ICSK_TIME_RETRANS) { |
2257 | timer_active = 1; | 2337 | timer_active = 1; |
@@ -2267,12 +2347,19 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) | |||
2267 | timer_expires = jiffies; | 2347 | timer_expires = jiffies; |
2268 | } | 2348 | } |
2269 | 2349 | ||
2350 | if (sk->sk_state == TCP_LISTEN) | ||
2351 | rx_queue = sk->sk_ack_backlog; | ||
2352 | else | ||
2353 | /* | ||
2354 | * because we dont lock socket, we might find a transient negative value | ||
2355 | */ | ||
2356 | rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); | ||
2357 | |||
2270 | seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " | 2358 | seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " |
2271 | "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n", | 2359 | "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n", |
2272 | i, src, srcp, dest, destp, sk->sk_state, | 2360 | i, src, srcp, dest, destp, sk->sk_state, |
2273 | tp->write_seq - tp->snd_una, | 2361 | tp->write_seq - tp->snd_una, |
2274 | sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog : | 2362 | rx_queue, |
2275 | (tp->rcv_nxt - tp->copied_seq), | ||
2276 | timer_active, | 2363 | timer_active, |
2277 | jiffies_to_clock_t(timer_expires - jiffies), | 2364 | jiffies_to_clock_t(timer_expires - jiffies), |
2278 | icsk->icsk_retransmits, | 2365 | icsk->icsk_retransmits, |
@@ -2354,12 +2441,12 @@ static struct tcp_seq_afinfo tcp4_seq_afinfo = { | |||
2354 | }, | 2441 | }, |
2355 | }; | 2442 | }; |
2356 | 2443 | ||
2357 | static int tcp4_proc_init_net(struct net *net) | 2444 | static int __net_init tcp4_proc_init_net(struct net *net) |
2358 | { | 2445 | { |
2359 | return tcp_proc_register(net, &tcp4_seq_afinfo); | 2446 | return tcp_proc_register(net, &tcp4_seq_afinfo); |
2360 | } | 2447 | } |
2361 | 2448 | ||
2362 | static void tcp4_proc_exit_net(struct net *net) | 2449 | static void __net_exit tcp4_proc_exit_net(struct net *net) |
2363 | { | 2450 | { |
2364 | tcp_proc_unregister(net, &tcp4_seq_afinfo); | 2451 | tcp_proc_unregister(net, &tcp4_seq_afinfo); |
2365 | } | 2452 | } |
@@ -2463,12 +2550,17 @@ static int __net_init tcp_sk_init(struct net *net) | |||
2463 | static void __net_exit tcp_sk_exit(struct net *net) | 2550 | static void __net_exit tcp_sk_exit(struct net *net) |
2464 | { | 2551 | { |
2465 | inet_ctl_sock_destroy(net->ipv4.tcp_sock); | 2552 | inet_ctl_sock_destroy(net->ipv4.tcp_sock); |
2466 | inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET); | 2553 | } |
2554 | |||
2555 | static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) | ||
2556 | { | ||
2557 | inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET); | ||
2467 | } | 2558 | } |
2468 | 2559 | ||
2469 | static struct pernet_operations __net_initdata tcp_sk_ops = { | 2560 | static struct pernet_operations __net_initdata tcp_sk_ops = { |
2470 | .init = tcp_sk_init, | 2561 | .init = tcp_sk_init, |
2471 | .exit = tcp_sk_exit, | 2562 | .exit = tcp_sk_exit, |
2563 | .exit_batch = tcp_sk_exit_batch, | ||
2472 | }; | 2564 | }; |
2473 | 2565 | ||
2474 | void __init tcp_v4_init(void) | 2566 | void __init tcp_v4_init(void) |
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index ce3c41ff50b2..de870377fbba 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c | |||
@@ -143,8 +143,8 @@ static u32 tcp_lp_remote_hz_estimator(struct sock *sk) | |||
143 | goto out; | 143 | goto out; |
144 | 144 | ||
145 | /* we can't calc remote HZ with no different!! */ | 145 | /* we can't calc remote HZ with no different!! */ |
146 | if (tp->rx_opt.rcv_tsval == lp->remote_ref_time | 146 | if (tp->rx_opt.rcv_tsval == lp->remote_ref_time || |
147 | || tp->rx_opt.rcv_tsecr == lp->local_ref_time) | 147 | tp->rx_opt.rcv_tsecr == lp->local_ref_time) |
148 | goto out; | 148 | goto out; |
149 | 149 | ||
150 | m = HZ * (tp->rx_opt.rcv_tsval - | 150 | m = HZ * (tp->rx_opt.rcv_tsval - |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 4c03598ed924..5fabff9ac6d6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -20,19 +20,14 @@ | |||
20 | 20 | ||
21 | #include <linux/mm.h> | 21 | #include <linux/mm.h> |
22 | #include <linux/module.h> | 22 | #include <linux/module.h> |
23 | #include <linux/slab.h> | ||
23 | #include <linux/sysctl.h> | 24 | #include <linux/sysctl.h> |
24 | #include <linux/workqueue.h> | 25 | #include <linux/workqueue.h> |
25 | #include <net/tcp.h> | 26 | #include <net/tcp.h> |
26 | #include <net/inet_common.h> | 27 | #include <net/inet_common.h> |
27 | #include <net/xfrm.h> | 28 | #include <net/xfrm.h> |
28 | 29 | ||
29 | #ifdef CONFIG_SYSCTL | 30 | int sysctl_tcp_syncookies __read_mostly = 1; |
30 | #define SYNC_INIT 0 /* let the user enable it */ | ||
31 | #else | ||
32 | #define SYNC_INIT 1 | ||
33 | #endif | ||
34 | |||
35 | int sysctl_tcp_syncookies __read_mostly = SYNC_INIT; | ||
36 | EXPORT_SYMBOL(sysctl_tcp_syncookies); | 31 | EXPORT_SYMBOL(sysctl_tcp_syncookies); |
37 | 32 | ||
38 | int sysctl_tcp_abort_on_overflow __read_mostly; | 33 | int sysctl_tcp_abort_on_overflow __read_mostly; |
@@ -96,13 +91,14 @@ enum tcp_tw_status | |||
96 | tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, | 91 | tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, |
97 | const struct tcphdr *th) | 92 | const struct tcphdr *th) |
98 | { | 93 | { |
99 | struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); | ||
100 | struct tcp_options_received tmp_opt; | 94 | struct tcp_options_received tmp_opt; |
95 | u8 *hash_location; | ||
96 | struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); | ||
101 | int paws_reject = 0; | 97 | int paws_reject = 0; |
102 | 98 | ||
103 | tmp_opt.saw_tstamp = 0; | 99 | tmp_opt.saw_tstamp = 0; |
104 | if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { | 100 | if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { |
105 | tcp_parse_options(skb, &tmp_opt, 0); | 101 | tcp_parse_options(skb, &tmp_opt, &hash_location, 0); |
106 | 102 | ||
107 | if (tmp_opt.saw_tstamp) { | 103 | if (tmp_opt.saw_tstamp) { |
108 | tmp_opt.ts_recent = tcptw->tw_ts_recent; | 104 | tmp_opt.ts_recent = tcptw->tw_ts_recent; |
@@ -389,14 +385,43 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
389 | const struct inet_request_sock *ireq = inet_rsk(req); | 385 | const struct inet_request_sock *ireq = inet_rsk(req); |
390 | struct tcp_request_sock *treq = tcp_rsk(req); | 386 | struct tcp_request_sock *treq = tcp_rsk(req); |
391 | struct inet_connection_sock *newicsk = inet_csk(newsk); | 387 | struct inet_connection_sock *newicsk = inet_csk(newsk); |
392 | struct tcp_sock *newtp; | 388 | struct tcp_sock *newtp = tcp_sk(newsk); |
389 | struct tcp_sock *oldtp = tcp_sk(sk); | ||
390 | struct tcp_cookie_values *oldcvp = oldtp->cookie_values; | ||
391 | |||
392 | /* TCP Cookie Transactions require space for the cookie pair, | ||
393 | * as it differs for each connection. There is no need to | ||
394 | * copy any s_data_payload stored at the original socket. | ||
395 | * Failure will prevent resuming the connection. | ||
396 | * | ||
397 | * Presumed copied, in order of appearance: | ||
398 | * cookie_in_always, cookie_out_never | ||
399 | */ | ||
400 | if (oldcvp != NULL) { | ||
401 | struct tcp_cookie_values *newcvp = | ||
402 | kzalloc(sizeof(*newtp->cookie_values), | ||
403 | GFP_ATOMIC); | ||
404 | |||
405 | if (newcvp != NULL) { | ||
406 | kref_init(&newcvp->kref); | ||
407 | newcvp->cookie_desired = | ||
408 | oldcvp->cookie_desired; | ||
409 | newtp->cookie_values = newcvp; | ||
410 | } else { | ||
411 | /* Not Yet Implemented */ | ||
412 | newtp->cookie_values = NULL; | ||
413 | } | ||
414 | } | ||
393 | 415 | ||
394 | /* Now setup tcp_sock */ | 416 | /* Now setup tcp_sock */ |
395 | newtp = tcp_sk(newsk); | ||
396 | newtp->pred_flags = 0; | 417 | newtp->pred_flags = 0; |
397 | newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1; | 418 | |
398 | newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1; | 419 | newtp->rcv_wup = newtp->copied_seq = |
399 | newtp->snd_up = treq->snt_isn + 1; | 420 | newtp->rcv_nxt = treq->rcv_isn + 1; |
421 | |||
422 | newtp->snd_sml = newtp->snd_una = | ||
423 | newtp->snd_nxt = newtp->snd_up = | ||
424 | treq->snt_isn + 1 + tcp_s_data_size(oldtp); | ||
400 | 425 | ||
401 | tcp_prequeue_init(newtp); | 426 | tcp_prequeue_init(newtp); |
402 | 427 | ||
@@ -429,8 +454,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
429 | tcp_set_ca_state(newsk, TCP_CA_Open); | 454 | tcp_set_ca_state(newsk, TCP_CA_Open); |
430 | tcp_init_xmit_timers(newsk); | 455 | tcp_init_xmit_timers(newsk); |
431 | skb_queue_head_init(&newtp->out_of_order_queue); | 456 | skb_queue_head_init(&newtp->out_of_order_queue); |
432 | newtp->write_seq = treq->snt_isn + 1; | 457 | newtp->write_seq = newtp->pushed_seq = |
433 | newtp->pushed_seq = newtp->write_seq; | 458 | treq->snt_isn + 1 + tcp_s_data_size(oldtp); |
434 | 459 | ||
435 | newtp->rx_opt.saw_tstamp = 0; | 460 | newtp->rx_opt.saw_tstamp = 0; |
436 | 461 | ||
@@ -476,7 +501,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
476 | if (newtp->af_specific->md5_lookup(sk, newsk)) | 501 | if (newtp->af_specific->md5_lookup(sk, newsk)) |
477 | newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED; | 502 | newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED; |
478 | #endif | 503 | #endif |
479 | if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len) | 504 | if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) |
480 | newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; | 505 | newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; |
481 | newtp->rx_opt.mss_clamp = req->mss; | 506 | newtp->rx_opt.mss_clamp = req->mss; |
482 | TCP_ECN_openreq_child(newtp, req); | 507 | TCP_ECN_openreq_child(newtp, req); |
@@ -495,15 +520,16 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
495 | struct request_sock *req, | 520 | struct request_sock *req, |
496 | struct request_sock **prev) | 521 | struct request_sock **prev) |
497 | { | 522 | { |
523 | struct tcp_options_received tmp_opt; | ||
524 | u8 *hash_location; | ||
525 | struct sock *child; | ||
498 | const struct tcphdr *th = tcp_hdr(skb); | 526 | const struct tcphdr *th = tcp_hdr(skb); |
499 | __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); | 527 | __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); |
500 | int paws_reject = 0; | 528 | int paws_reject = 0; |
501 | struct tcp_options_received tmp_opt; | ||
502 | struct sock *child; | ||
503 | 529 | ||
504 | tmp_opt.saw_tstamp = 0; | 530 | tmp_opt.saw_tstamp = 0; |
505 | if (th->doff > (sizeof(struct tcphdr)>>2)) { | 531 | if (th->doff > (sizeof(struct tcphdr)>>2)) { |
506 | tcp_parse_options(skb, &tmp_opt, 0); | 532 | tcp_parse_options(skb, &tmp_opt, &hash_location, 0); |
507 | 533 | ||
508 | if (tmp_opt.saw_tstamp) { | 534 | if (tmp_opt.saw_tstamp) { |
509 | tmp_opt.ts_recent = req->ts_recent; | 535 | tmp_opt.ts_recent = req->ts_recent; |
@@ -537,7 +563,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
537 | * Enforce "SYN-ACK" according to figure 8, figure 6 | 563 | * Enforce "SYN-ACK" according to figure 8, figure 6 |
538 | * of RFC793, fixed by RFC1122. | 564 | * of RFC793, fixed by RFC1122. |
539 | */ | 565 | */ |
540 | req->rsk_ops->rtx_syn_ack(sk, req); | 566 | req->rsk_ops->rtx_syn_ack(sk, req, NULL); |
541 | return NULL; | 567 | return NULL; |
542 | } | 568 | } |
543 | 569 | ||
@@ -596,7 +622,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
596 | * Invalid ACK: reset will be sent by listening socket | 622 | * Invalid ACK: reset will be sent by listening socket |
597 | */ | 623 | */ |
598 | if ((flg & TCP_FLAG_ACK) && | 624 | if ((flg & TCP_FLAG_ACK) && |
599 | (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1)) | 625 | (TCP_SKB_CB(skb)->ack_seq != |
626 | tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk)))) | ||
600 | return sk; | 627 | return sk; |
601 | 628 | ||
602 | /* Also, it would be not so bad idea to check rcv_tsecr, which | 629 | /* Also, it would be not so bad idea to check rcv_tsecr, which |
@@ -702,7 +729,7 @@ int tcp_child_process(struct sock *parent, struct sock *child, | |||
702 | * in main socket hash table and lock on listening | 729 | * in main socket hash table and lock on listening |
703 | * socket does not protect us more. | 730 | * socket does not protect us more. |
704 | */ | 731 | */ |
705 | sk_add_backlog(child, skb); | 732 | __sk_add_backlog(child, skb); |
706 | } | 733 | } |
707 | 734 | ||
708 | bh_unlock_sock(child); | 735 | bh_unlock_sock(child); |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index fcd278a7080e..0dda86e72ad8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <net/tcp.h> | 37 | #include <net/tcp.h> |
38 | 38 | ||
39 | #include <linux/compiler.h> | 39 | #include <linux/compiler.h> |
40 | #include <linux/gfp.h> | ||
40 | #include <linux/module.h> | 41 | #include <linux/module.h> |
41 | 42 | ||
42 | /* People can turn this off for buggy TCP's found in printers etc. */ | 43 | /* People can turn this off for buggy TCP's found in printers etc. */ |
@@ -59,6 +60,10 @@ int sysctl_tcp_base_mss __read_mostly = 512; | |||
59 | /* By default, RFC2861 behavior. */ | 60 | /* By default, RFC2861 behavior. */ |
60 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; | 61 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; |
61 | 62 | ||
63 | int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */ | ||
64 | EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size); | ||
65 | |||
66 | |||
62 | /* Account for new data that has been sent to the network. */ | 67 | /* Account for new data that has been sent to the network. */ |
63 | static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) | 68 | static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) |
64 | { | 69 | { |
@@ -179,7 +184,8 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) | |||
179 | */ | 184 | */ |
180 | void tcp_select_initial_window(int __space, __u32 mss, | 185 | void tcp_select_initial_window(int __space, __u32 mss, |
181 | __u32 *rcv_wnd, __u32 *window_clamp, | 186 | __u32 *rcv_wnd, __u32 *window_clamp, |
182 | int wscale_ok, __u8 *rcv_wscale) | 187 | int wscale_ok, __u8 *rcv_wscale, |
188 | __u32 init_rcv_wnd) | ||
183 | { | 189 | { |
184 | unsigned int space = (__space < 0 ? 0 : __space); | 190 | unsigned int space = (__space < 0 ? 0 : __space); |
185 | 191 | ||
@@ -228,7 +234,13 @@ void tcp_select_initial_window(int __space, __u32 mss, | |||
228 | init_cwnd = 2; | 234 | init_cwnd = 2; |
229 | else if (mss > 1460) | 235 | else if (mss > 1460) |
230 | init_cwnd = 3; | 236 | init_cwnd = 3; |
231 | if (*rcv_wnd > init_cwnd * mss) | 237 | /* when initializing use the value from init_rcv_wnd |
238 | * rather than the default from above | ||
239 | */ | ||
240 | if (init_rcv_wnd && | ||
241 | (*rcv_wnd > init_rcv_wnd * mss)) | ||
242 | *rcv_wnd = init_rcv_wnd * mss; | ||
243 | else if (*rcv_wnd > init_cwnd * mss) | ||
232 | *rcv_wnd = init_cwnd * mss; | 244 | *rcv_wnd = init_cwnd * mss; |
233 | } | 245 | } |
234 | 246 | ||
@@ -362,15 +374,45 @@ static inline int tcp_urg_mode(const struct tcp_sock *tp) | |||
362 | #define OPTION_TS (1 << 1) | 374 | #define OPTION_TS (1 << 1) |
363 | #define OPTION_MD5 (1 << 2) | 375 | #define OPTION_MD5 (1 << 2) |
364 | #define OPTION_WSCALE (1 << 3) | 376 | #define OPTION_WSCALE (1 << 3) |
377 | #define OPTION_COOKIE_EXTENSION (1 << 4) | ||
365 | 378 | ||
366 | struct tcp_out_options { | 379 | struct tcp_out_options { |
367 | u8 options; /* bit field of OPTION_* */ | 380 | u8 options; /* bit field of OPTION_* */ |
368 | u8 ws; /* window scale, 0 to disable */ | 381 | u8 ws; /* window scale, 0 to disable */ |
369 | u8 num_sack_blocks; /* number of SACK blocks to include */ | 382 | u8 num_sack_blocks; /* number of SACK blocks to include */ |
383 | u8 hash_size; /* bytes in hash_location */ | ||
370 | u16 mss; /* 0 to disable */ | 384 | u16 mss; /* 0 to disable */ |
371 | __u32 tsval, tsecr; /* need to include OPTION_TS */ | 385 | __u32 tsval, tsecr; /* need to include OPTION_TS */ |
386 | __u8 *hash_location; /* temporary pointer, overloaded */ | ||
372 | }; | 387 | }; |
373 | 388 | ||
389 | /* The sysctl int routines are generic, so check consistency here. | ||
390 | */ | ||
391 | static u8 tcp_cookie_size_check(u8 desired) | ||
392 | { | ||
393 | if (desired > 0) { | ||
394 | /* previously specified */ | ||
395 | return desired; | ||
396 | } | ||
397 | if (sysctl_tcp_cookie_size <= 0) { | ||
398 | /* no default specified */ | ||
399 | return 0; | ||
400 | } | ||
401 | if (sysctl_tcp_cookie_size <= TCP_COOKIE_MIN) { | ||
402 | /* value too small, specify minimum */ | ||
403 | return TCP_COOKIE_MIN; | ||
404 | } | ||
405 | if (sysctl_tcp_cookie_size >= TCP_COOKIE_MAX) { | ||
406 | /* value too large, specify maximum */ | ||
407 | return TCP_COOKIE_MAX; | ||
408 | } | ||
409 | if (0x1 & sysctl_tcp_cookie_size) { | ||
410 | /* 8-bit multiple, illegal, fix it */ | ||
411 | return (u8)(sysctl_tcp_cookie_size + 0x1); | ||
412 | } | ||
413 | return (u8)sysctl_tcp_cookie_size; | ||
414 | } | ||
415 | |||
374 | /* Write previously computed TCP options to the packet. | 416 | /* Write previously computed TCP options to the packet. |
375 | * | 417 | * |
376 | * Beware: Something in the Internet is very sensitive to the ordering of | 418 | * Beware: Something in the Internet is very sensitive to the ordering of |
@@ -385,17 +427,34 @@ struct tcp_out_options { | |||
385 | * (but it may well be that other scenarios fail similarly). | 427 | * (but it may well be that other scenarios fail similarly). |
386 | */ | 428 | */ |
387 | static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | 429 | static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, |
388 | const struct tcp_out_options *opts, | 430 | struct tcp_out_options *opts) |
389 | __u8 **md5_hash) { | 431 | { |
390 | if (unlikely(OPTION_MD5 & opts->options)) { | 432 | u8 options = opts->options; /* mungable copy */ |
391 | *ptr++ = htonl((TCPOPT_NOP << 24) | | 433 | |
392 | (TCPOPT_NOP << 16) | | 434 | /* Having both authentication and cookies for security is redundant, |
393 | (TCPOPT_MD5SIG << 8) | | 435 | * and there's certainly not enough room. Instead, the cookie-less |
394 | TCPOLEN_MD5SIG); | 436 | * extension variant is proposed. |
395 | *md5_hash = (__u8 *)ptr; | 437 | * |
438 | * Consider the pessimal case with authentication. The options | ||
439 | * could look like: | ||
440 | * COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40 | ||
441 | */ | ||
442 | if (unlikely(OPTION_MD5 & options)) { | ||
443 | if (unlikely(OPTION_COOKIE_EXTENSION & options)) { | ||
444 | *ptr++ = htonl((TCPOPT_COOKIE << 24) | | ||
445 | (TCPOLEN_COOKIE_BASE << 16) | | ||
446 | (TCPOPT_MD5SIG << 8) | | ||
447 | TCPOLEN_MD5SIG); | ||
448 | } else { | ||
449 | *ptr++ = htonl((TCPOPT_NOP << 24) | | ||
450 | (TCPOPT_NOP << 16) | | ||
451 | (TCPOPT_MD5SIG << 8) | | ||
452 | TCPOLEN_MD5SIG); | ||
453 | } | ||
454 | options &= ~OPTION_COOKIE_EXTENSION; | ||
455 | /* overload cookie hash location */ | ||
456 | opts->hash_location = (__u8 *)ptr; | ||
396 | ptr += 4; | 457 | ptr += 4; |
397 | } else { | ||
398 | *md5_hash = NULL; | ||
399 | } | 458 | } |
400 | 459 | ||
401 | if (unlikely(opts->mss)) { | 460 | if (unlikely(opts->mss)) { |
@@ -404,12 +463,13 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | |||
404 | opts->mss); | 463 | opts->mss); |
405 | } | 464 | } |
406 | 465 | ||
407 | if (likely(OPTION_TS & opts->options)) { | 466 | if (likely(OPTION_TS & options)) { |
408 | if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) { | 467 | if (unlikely(OPTION_SACK_ADVERTISE & options)) { |
409 | *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | | 468 | *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | |
410 | (TCPOLEN_SACK_PERM << 16) | | 469 | (TCPOLEN_SACK_PERM << 16) | |
411 | (TCPOPT_TIMESTAMP << 8) | | 470 | (TCPOPT_TIMESTAMP << 8) | |
412 | TCPOLEN_TIMESTAMP); | 471 | TCPOLEN_TIMESTAMP); |
472 | options &= ~OPTION_SACK_ADVERTISE; | ||
413 | } else { | 473 | } else { |
414 | *ptr++ = htonl((TCPOPT_NOP << 24) | | 474 | *ptr++ = htonl((TCPOPT_NOP << 24) | |
415 | (TCPOPT_NOP << 16) | | 475 | (TCPOPT_NOP << 16) | |
@@ -420,15 +480,52 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | |||
420 | *ptr++ = htonl(opts->tsecr); | 480 | *ptr++ = htonl(opts->tsecr); |
421 | } | 481 | } |
422 | 482 | ||
423 | if (unlikely(OPTION_SACK_ADVERTISE & opts->options && | 483 | /* Specification requires after timestamp, so do it now. |
424 | !(OPTION_TS & opts->options))) { | 484 | * |
485 | * Consider the pessimal case without authentication. The options | ||
486 | * could look like: | ||
487 | * MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40 | ||
488 | */ | ||
489 | if (unlikely(OPTION_COOKIE_EXTENSION & options)) { | ||
490 | __u8 *cookie_copy = opts->hash_location; | ||
491 | u8 cookie_size = opts->hash_size; | ||
492 | |||
493 | /* 8-bit multiple handled in tcp_cookie_size_check() above, | ||
494 | * and elsewhere. | ||
495 | */ | ||
496 | if (0x2 & cookie_size) { | ||
497 | __u8 *p = (__u8 *)ptr; | ||
498 | |||
499 | /* 16-bit multiple */ | ||
500 | *p++ = TCPOPT_COOKIE; | ||
501 | *p++ = TCPOLEN_COOKIE_BASE + cookie_size; | ||
502 | *p++ = *cookie_copy++; | ||
503 | *p++ = *cookie_copy++; | ||
504 | ptr++; | ||
505 | cookie_size -= 2; | ||
506 | } else { | ||
507 | /* 32-bit multiple */ | ||
508 | *ptr++ = htonl(((TCPOPT_NOP << 24) | | ||
509 | (TCPOPT_NOP << 16) | | ||
510 | (TCPOPT_COOKIE << 8) | | ||
511 | TCPOLEN_COOKIE_BASE) + | ||
512 | cookie_size); | ||
513 | } | ||
514 | |||
515 | if (cookie_size > 0) { | ||
516 | memcpy(ptr, cookie_copy, cookie_size); | ||
517 | ptr += (cookie_size / 4); | ||
518 | } | ||
519 | } | ||
520 | |||
521 | if (unlikely(OPTION_SACK_ADVERTISE & options)) { | ||
425 | *ptr++ = htonl((TCPOPT_NOP << 24) | | 522 | *ptr++ = htonl((TCPOPT_NOP << 24) | |
426 | (TCPOPT_NOP << 16) | | 523 | (TCPOPT_NOP << 16) | |
427 | (TCPOPT_SACK_PERM << 8) | | 524 | (TCPOPT_SACK_PERM << 8) | |
428 | TCPOLEN_SACK_PERM); | 525 | TCPOLEN_SACK_PERM); |
429 | } | 526 | } |
430 | 527 | ||
431 | if (unlikely(OPTION_WSCALE & opts->options)) { | 528 | if (unlikely(OPTION_WSCALE & options)) { |
432 | *ptr++ = htonl((TCPOPT_NOP << 24) | | 529 | *ptr++ = htonl((TCPOPT_NOP << 24) | |
433 | (TCPOPT_WINDOW << 16) | | 530 | (TCPOPT_WINDOW << 16) | |
434 | (TCPOLEN_WINDOW << 8) | | 531 | (TCPOLEN_WINDOW << 8) | |
@@ -463,13 +560,17 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, | |||
463 | struct tcp_out_options *opts, | 560 | struct tcp_out_options *opts, |
464 | struct tcp_md5sig_key **md5) { | 561 | struct tcp_md5sig_key **md5) { |
465 | struct tcp_sock *tp = tcp_sk(sk); | 562 | struct tcp_sock *tp = tcp_sk(sk); |
466 | unsigned size = 0; | 563 | struct tcp_cookie_values *cvp = tp->cookie_values; |
564 | unsigned remaining = MAX_TCP_OPTION_SPACE; | ||
565 | u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? | ||
566 | tcp_cookie_size_check(cvp->cookie_desired) : | ||
567 | 0; | ||
467 | 568 | ||
468 | #ifdef CONFIG_TCP_MD5SIG | 569 | #ifdef CONFIG_TCP_MD5SIG |
469 | *md5 = tp->af_specific->md5_lookup(sk, sk); | 570 | *md5 = tp->af_specific->md5_lookup(sk, sk); |
470 | if (*md5) { | 571 | if (*md5) { |
471 | opts->options |= OPTION_MD5; | 572 | opts->options |= OPTION_MD5; |
472 | size += TCPOLEN_MD5SIG_ALIGNED; | 573 | remaining -= TCPOLEN_MD5SIG_ALIGNED; |
473 | } | 574 | } |
474 | #else | 575 | #else |
475 | *md5 = NULL; | 576 | *md5 = NULL; |
@@ -485,26 +586,72 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, | |||
485 | * SACKs don't matter, we never delay an ACK when we have any of those | 586 | * SACKs don't matter, we never delay an ACK when we have any of those |
486 | * going out. */ | 587 | * going out. */ |
487 | opts->mss = tcp_advertise_mss(sk); | 588 | opts->mss = tcp_advertise_mss(sk); |
488 | size += TCPOLEN_MSS_ALIGNED; | 589 | remaining -= TCPOLEN_MSS_ALIGNED; |
489 | 590 | ||
490 | if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { | 591 | if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { |
491 | opts->options |= OPTION_TS; | 592 | opts->options |= OPTION_TS; |
492 | opts->tsval = TCP_SKB_CB(skb)->when; | 593 | opts->tsval = TCP_SKB_CB(skb)->when; |
493 | opts->tsecr = tp->rx_opt.ts_recent; | 594 | opts->tsecr = tp->rx_opt.ts_recent; |
494 | size += TCPOLEN_TSTAMP_ALIGNED; | 595 | remaining -= TCPOLEN_TSTAMP_ALIGNED; |
495 | } | 596 | } |
496 | if (likely(sysctl_tcp_window_scaling)) { | 597 | if (likely(sysctl_tcp_window_scaling)) { |
497 | opts->ws = tp->rx_opt.rcv_wscale; | 598 | opts->ws = tp->rx_opt.rcv_wscale; |
498 | opts->options |= OPTION_WSCALE; | 599 | opts->options |= OPTION_WSCALE; |
499 | size += TCPOLEN_WSCALE_ALIGNED; | 600 | remaining -= TCPOLEN_WSCALE_ALIGNED; |
500 | } | 601 | } |
501 | if (likely(sysctl_tcp_sack)) { | 602 | if (likely(sysctl_tcp_sack)) { |
502 | opts->options |= OPTION_SACK_ADVERTISE; | 603 | opts->options |= OPTION_SACK_ADVERTISE; |
503 | if (unlikely(!(OPTION_TS & opts->options))) | 604 | if (unlikely(!(OPTION_TS & opts->options))) |
504 | size += TCPOLEN_SACKPERM_ALIGNED; | 605 | remaining -= TCPOLEN_SACKPERM_ALIGNED; |
505 | } | 606 | } |
506 | 607 | ||
507 | return size; | 608 | /* Note that timestamps are required by the specification. |
609 | * | ||
610 | * Odd numbers of bytes are prohibited by the specification, ensuring | ||
611 | * that the cookie is 16-bit aligned, and the resulting cookie pair is | ||
612 | * 32-bit aligned. | ||
613 | */ | ||
614 | if (*md5 == NULL && | ||
615 | (OPTION_TS & opts->options) && | ||
616 | cookie_size > 0) { | ||
617 | int need = TCPOLEN_COOKIE_BASE + cookie_size; | ||
618 | |||
619 | if (0x2 & need) { | ||
620 | /* 32-bit multiple */ | ||
621 | need += 2; /* NOPs */ | ||
622 | |||
623 | if (need > remaining) { | ||
624 | /* try shrinking cookie to fit */ | ||
625 | cookie_size -= 2; | ||
626 | need -= 4; | ||
627 | } | ||
628 | } | ||
629 | while (need > remaining && TCP_COOKIE_MIN <= cookie_size) { | ||
630 | cookie_size -= 4; | ||
631 | need -= 4; | ||
632 | } | ||
633 | if (TCP_COOKIE_MIN <= cookie_size) { | ||
634 | opts->options |= OPTION_COOKIE_EXTENSION; | ||
635 | opts->hash_location = (__u8 *)&cvp->cookie_pair[0]; | ||
636 | opts->hash_size = cookie_size; | ||
637 | |||
638 | /* Remember for future incarnations. */ | ||
639 | cvp->cookie_desired = cookie_size; | ||
640 | |||
641 | if (cvp->cookie_desired != cvp->cookie_pair_size) { | ||
642 | /* Currently use random bytes as a nonce, | ||
643 | * assuming these are completely unpredictable | ||
644 | * by hostile users of the same system. | ||
645 | */ | ||
646 | get_random_bytes(&cvp->cookie_pair[0], | ||
647 | cookie_size); | ||
648 | cvp->cookie_pair_size = cookie_size; | ||
649 | } | ||
650 | |||
651 | remaining -= need; | ||
652 | } | ||
653 | } | ||
654 | return MAX_TCP_OPTION_SPACE - remaining; | ||
508 | } | 655 | } |
509 | 656 | ||
510 | /* Set up TCP options for SYN-ACKs. */ | 657 | /* Set up TCP options for SYN-ACKs. */ |
@@ -512,48 +659,77 @@ static unsigned tcp_synack_options(struct sock *sk, | |||
512 | struct request_sock *req, | 659 | struct request_sock *req, |
513 | unsigned mss, struct sk_buff *skb, | 660 | unsigned mss, struct sk_buff *skb, |
514 | struct tcp_out_options *opts, | 661 | struct tcp_out_options *opts, |
515 | struct tcp_md5sig_key **md5) { | 662 | struct tcp_md5sig_key **md5, |
516 | unsigned size = 0; | 663 | struct tcp_extend_values *xvp) |
664 | { | ||
517 | struct inet_request_sock *ireq = inet_rsk(req); | 665 | struct inet_request_sock *ireq = inet_rsk(req); |
518 | char doing_ts; | 666 | unsigned remaining = MAX_TCP_OPTION_SPACE; |
667 | u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? | ||
668 | xvp->cookie_plus : | ||
669 | 0; | ||
670 | bool doing_ts = ireq->tstamp_ok; | ||
519 | 671 | ||
520 | #ifdef CONFIG_TCP_MD5SIG | 672 | #ifdef CONFIG_TCP_MD5SIG |
521 | *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); | 673 | *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); |
522 | if (*md5) { | 674 | if (*md5) { |
523 | opts->options |= OPTION_MD5; | 675 | opts->options |= OPTION_MD5; |
524 | size += TCPOLEN_MD5SIG_ALIGNED; | 676 | remaining -= TCPOLEN_MD5SIG_ALIGNED; |
677 | |||
678 | /* We can't fit any SACK blocks in a packet with MD5 + TS | ||
679 | * options. There was discussion about disabling SACK | ||
680 | * rather than TS in order to fit in better with old, | ||
681 | * buggy kernels, but that was deemed to be unnecessary. | ||
682 | */ | ||
683 | doing_ts &= !ireq->sack_ok; | ||
525 | } | 684 | } |
526 | #else | 685 | #else |
527 | *md5 = NULL; | 686 | *md5 = NULL; |
528 | #endif | 687 | #endif |
529 | 688 | ||
530 | /* we can't fit any SACK blocks in a packet with MD5 + TS | 689 | /* We always send an MSS option. */ |
531 | options. There was discussion about disabling SACK rather than TS in | ||
532 | order to fit in better with old, buggy kernels, but that was deemed | ||
533 | to be unnecessary. */ | ||
534 | doing_ts = ireq->tstamp_ok && !(*md5 && ireq->sack_ok); | ||
535 | |||
536 | opts->mss = mss; | 690 | opts->mss = mss; |
537 | size += TCPOLEN_MSS_ALIGNED; | 691 | remaining -= TCPOLEN_MSS_ALIGNED; |
538 | 692 | ||
539 | if (likely(ireq->wscale_ok)) { | 693 | if (likely(ireq->wscale_ok)) { |
540 | opts->ws = ireq->rcv_wscale; | 694 | opts->ws = ireq->rcv_wscale; |
541 | opts->options |= OPTION_WSCALE; | 695 | opts->options |= OPTION_WSCALE; |
542 | size += TCPOLEN_WSCALE_ALIGNED; | 696 | remaining -= TCPOLEN_WSCALE_ALIGNED; |
543 | } | 697 | } |
544 | if (likely(doing_ts)) { | 698 | if (likely(doing_ts)) { |
545 | opts->options |= OPTION_TS; | 699 | opts->options |= OPTION_TS; |
546 | opts->tsval = TCP_SKB_CB(skb)->when; | 700 | opts->tsval = TCP_SKB_CB(skb)->when; |
547 | opts->tsecr = req->ts_recent; | 701 | opts->tsecr = req->ts_recent; |
548 | size += TCPOLEN_TSTAMP_ALIGNED; | 702 | remaining -= TCPOLEN_TSTAMP_ALIGNED; |
549 | } | 703 | } |
550 | if (likely(ireq->sack_ok)) { | 704 | if (likely(ireq->sack_ok)) { |
551 | opts->options |= OPTION_SACK_ADVERTISE; | 705 | opts->options |= OPTION_SACK_ADVERTISE; |
552 | if (unlikely(!doing_ts)) | 706 | if (unlikely(!doing_ts)) |
553 | size += TCPOLEN_SACKPERM_ALIGNED; | 707 | remaining -= TCPOLEN_SACKPERM_ALIGNED; |
554 | } | 708 | } |
555 | 709 | ||
556 | return size; | 710 | /* Similar rationale to tcp_syn_options() applies here, too. |
711 | * If the <SYN> options fit, the same options should fit now! | ||
712 | */ | ||
713 | if (*md5 == NULL && | ||
714 | doing_ts && | ||
715 | cookie_plus > TCPOLEN_COOKIE_BASE) { | ||
716 | int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */ | ||
717 | |||
718 | if (0x2 & need) { | ||
719 | /* 32-bit multiple */ | ||
720 | need += 2; /* NOPs */ | ||
721 | } | ||
722 | if (need <= remaining) { | ||
723 | opts->options |= OPTION_COOKIE_EXTENSION; | ||
724 | opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE; | ||
725 | remaining -= need; | ||
726 | } else { | ||
727 | /* There's no error return, so flag it. */ | ||
728 | xvp->cookie_out_never = 1; /* true */ | ||
729 | opts->hash_size = 0; | ||
730 | } | ||
731 | } | ||
732 | return MAX_TCP_OPTION_SPACE - remaining; | ||
557 | } | 733 | } |
558 | 734 | ||
559 | /* Compute TCP options for ESTABLISHED sockets. This is not the | 735 | /* Compute TCP options for ESTABLISHED sockets. This is not the |
@@ -619,7 +795,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
619 | struct tcp_out_options opts; | 795 | struct tcp_out_options opts; |
620 | unsigned tcp_options_size, tcp_header_size; | 796 | unsigned tcp_options_size, tcp_header_size; |
621 | struct tcp_md5sig_key *md5; | 797 | struct tcp_md5sig_key *md5; |
622 | __u8 *md5_hash_location; | ||
623 | struct tcphdr *th; | 798 | struct tcphdr *th; |
624 | int err; | 799 | int err; |
625 | 800 | ||
@@ -661,8 +836,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
661 | 836 | ||
662 | /* Build TCP header and checksum it. */ | 837 | /* Build TCP header and checksum it. */ |
663 | th = tcp_hdr(skb); | 838 | th = tcp_hdr(skb); |
664 | th->source = inet->sport; | 839 | th->source = inet->inet_sport; |
665 | th->dest = inet->dport; | 840 | th->dest = inet->inet_dport; |
666 | th->seq = htonl(tcb->seq); | 841 | th->seq = htonl(tcb->seq); |
667 | th->ack_seq = htonl(tp->rcv_nxt); | 842 | th->ack_seq = htonl(tp->rcv_nxt); |
668 | *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | | 843 | *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | |
@@ -690,7 +865,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
690 | } | 865 | } |
691 | } | 866 | } |
692 | 867 | ||
693 | tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); | 868 | tcp_options_write((__be32 *)(th + 1), tp, &opts); |
694 | if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0)) | 869 | if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0)) |
695 | TCP_ECN_send(sk, skb, tcp_header_size); | 870 | TCP_ECN_send(sk, skb, tcp_header_size); |
696 | 871 | ||
@@ -698,7 +873,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
698 | /* Calculate the MD5 hash, as we have all we need now */ | 873 | /* Calculate the MD5 hash, as we have all we need now */ |
699 | if (md5) { | 874 | if (md5) { |
700 | sk->sk_route_caps &= ~NETIF_F_GSO_MASK; | 875 | sk->sk_route_caps &= ~NETIF_F_GSO_MASK; |
701 | tp->af_specific->calc_md5_hash(md5_hash_location, | 876 | tp->af_specific->calc_md5_hash(opts.hash_location, |
702 | md5, sk, NULL, skb); | 877 | md5, sk, NULL, skb); |
703 | } | 878 | } |
704 | #endif | 879 | #endif |
@@ -1627,11 +1802,6 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1627 | void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, | 1802 | void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, |
1628 | int nonagle) | 1803 | int nonagle) |
1629 | { | 1804 | { |
1630 | struct sk_buff *skb = tcp_send_head(sk); | ||
1631 | |||
1632 | if (!skb) | ||
1633 | return; | ||
1634 | |||
1635 | /* If we are closed, the bytes will have to remain here. | 1805 | /* If we are closed, the bytes will have to remain here. |
1636 | * In time closedown will finish, we empty the write queue and | 1806 | * In time closedown will finish, we empty the write queue and |
1637 | * all will be happy. | 1807 | * all will be happy. |
@@ -1918,8 +2088,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1918 | * case, when window is shrunk to zero. In this case | 2088 | * case, when window is shrunk to zero. In this case |
1919 | * our retransmit serves as a zero window probe. | 2089 | * our retransmit serves as a zero window probe. |
1920 | */ | 2090 | */ |
1921 | if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) | 2091 | if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) && |
1922 | && TCP_SKB_CB(skb)->seq != tp->snd_una) | 2092 | TCP_SKB_CB(skb)->seq != tp->snd_una) |
1923 | return -EAGAIN; | 2093 | return -EAGAIN; |
1924 | 2094 | ||
1925 | if (skb->len > cur_mss) { | 2095 | if (skb->len > cur_mss) { |
@@ -2219,19 +2389,24 @@ int tcp_send_synack(struct sock *sk) | |||
2219 | 2389 | ||
2220 | /* Prepare a SYN-ACK. */ | 2390 | /* Prepare a SYN-ACK. */ |
2221 | struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | 2391 | struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, |
2222 | struct request_sock *req) | 2392 | struct request_sock *req, |
2393 | struct request_values *rvp) | ||
2223 | { | 2394 | { |
2395 | struct tcp_out_options opts; | ||
2396 | struct tcp_extend_values *xvp = tcp_xv(rvp); | ||
2224 | struct inet_request_sock *ireq = inet_rsk(req); | 2397 | struct inet_request_sock *ireq = inet_rsk(req); |
2225 | struct tcp_sock *tp = tcp_sk(sk); | 2398 | struct tcp_sock *tp = tcp_sk(sk); |
2399 | const struct tcp_cookie_values *cvp = tp->cookie_values; | ||
2226 | struct tcphdr *th; | 2400 | struct tcphdr *th; |
2227 | int tcp_header_size; | ||
2228 | struct tcp_out_options opts; | ||
2229 | struct sk_buff *skb; | 2401 | struct sk_buff *skb; |
2230 | struct tcp_md5sig_key *md5; | 2402 | struct tcp_md5sig_key *md5; |
2231 | __u8 *md5_hash_location; | 2403 | int tcp_header_size; |
2232 | int mss; | 2404 | int mss; |
2405 | int s_data_desired = 0; | ||
2233 | 2406 | ||
2234 | skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); | 2407 | if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) |
2408 | s_data_desired = cvp->s_data_desired; | ||
2409 | skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15 + s_data_desired, 1, GFP_ATOMIC); | ||
2235 | if (skb == NULL) | 2410 | if (skb == NULL) |
2236 | return NULL; | 2411 | return NULL; |
2237 | 2412 | ||
@@ -2254,7 +2429,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2254 | &req->rcv_wnd, | 2429 | &req->rcv_wnd, |
2255 | &req->window_clamp, | 2430 | &req->window_clamp, |
2256 | ireq->wscale_ok, | 2431 | ireq->wscale_ok, |
2257 | &rcv_wscale); | 2432 | &rcv_wscale, |
2433 | dst_metric(dst, RTAX_INITRWND)); | ||
2258 | ireq->rcv_wscale = rcv_wscale; | 2434 | ireq->rcv_wscale = rcv_wscale; |
2259 | } | 2435 | } |
2260 | 2436 | ||
@@ -2266,8 +2442,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2266 | #endif | 2442 | #endif |
2267 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2443 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
2268 | tcp_header_size = tcp_synack_options(sk, req, mss, | 2444 | tcp_header_size = tcp_synack_options(sk, req, mss, |
2269 | skb, &opts, &md5) + | 2445 | skb, &opts, &md5, xvp) |
2270 | sizeof(struct tcphdr); | 2446 | + sizeof(*th); |
2271 | 2447 | ||
2272 | skb_push(skb, tcp_header_size); | 2448 | skb_push(skb, tcp_header_size); |
2273 | skb_reset_transport_header(skb); | 2449 | skb_reset_transport_header(skb); |
@@ -2284,19 +2460,54 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2284 | */ | 2460 | */ |
2285 | tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, | 2461 | tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, |
2286 | TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); | 2462 | TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); |
2463 | |||
2464 | if (OPTION_COOKIE_EXTENSION & opts.options) { | ||
2465 | if (s_data_desired) { | ||
2466 | u8 *buf = skb_put(skb, s_data_desired); | ||
2467 | |||
2468 | /* copy data directly from the listening socket. */ | ||
2469 | memcpy(buf, cvp->s_data_payload, s_data_desired); | ||
2470 | TCP_SKB_CB(skb)->end_seq += s_data_desired; | ||
2471 | } | ||
2472 | |||
2473 | if (opts.hash_size > 0) { | ||
2474 | __u32 workspace[SHA_WORKSPACE_WORDS]; | ||
2475 | u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS]; | ||
2476 | u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1]; | ||
2477 | |||
2478 | /* Secret recipe depends on the Timestamp, (future) | ||
2479 | * Sequence and Acknowledgment Numbers, Initiator | ||
2480 | * Cookie, and others handled by IP variant caller. | ||
2481 | */ | ||
2482 | *tail-- ^= opts.tsval; | ||
2483 | *tail-- ^= tcp_rsk(req)->rcv_isn + 1; | ||
2484 | *tail-- ^= TCP_SKB_CB(skb)->seq + 1; | ||
2485 | |||
2486 | /* recommended */ | ||
2487 | *tail-- ^= ((th->dest << 16) | th->source); | ||
2488 | *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */ | ||
2489 | |||
2490 | sha_transform((__u32 *)&xvp->cookie_bakery[0], | ||
2491 | (char *)mess, | ||
2492 | &workspace[0]); | ||
2493 | opts.hash_location = | ||
2494 | (__u8 *)&xvp->cookie_bakery[0]; | ||
2495 | } | ||
2496 | } | ||
2497 | |||
2287 | th->seq = htonl(TCP_SKB_CB(skb)->seq); | 2498 | th->seq = htonl(TCP_SKB_CB(skb)->seq); |
2288 | th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); | 2499 | th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); |
2289 | 2500 | ||
2290 | /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ | 2501 | /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ |
2291 | th->window = htons(min(req->rcv_wnd, 65535U)); | 2502 | th->window = htons(min(req->rcv_wnd, 65535U)); |
2292 | tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); | 2503 | tcp_options_write((__be32 *)(th + 1), tp, &opts); |
2293 | th->doff = (tcp_header_size >> 2); | 2504 | th->doff = (tcp_header_size >> 2); |
2294 | TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); | 2505 | TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); |
2295 | 2506 | ||
2296 | #ifdef CONFIG_TCP_MD5SIG | 2507 | #ifdef CONFIG_TCP_MD5SIG |
2297 | /* Okay, we have all we need - do the md5 hash if needed */ | 2508 | /* Okay, we have all we need - do the md5 hash if needed */ |
2298 | if (md5) { | 2509 | if (md5) { |
2299 | tcp_rsk(req)->af_specific->calc_md5_hash(md5_hash_location, | 2510 | tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location, |
2300 | md5, NULL, req, skb); | 2511 | md5, NULL, req, skb); |
2301 | } | 2512 | } |
2302 | #endif | 2513 | #endif |
@@ -2342,7 +2553,8 @@ static void tcp_connect_init(struct sock *sk) | |||
2342 | &tp->rcv_wnd, | 2553 | &tp->rcv_wnd, |
2343 | &tp->window_clamp, | 2554 | &tp->window_clamp, |
2344 | sysctl_tcp_window_scaling, | 2555 | sysctl_tcp_window_scaling, |
2345 | &rcv_wscale); | 2556 | &rcv_wscale, |
2557 | dst_metric(dst, RTAX_INITRWND)); | ||
2346 | 2558 | ||
2347 | tp->rx_opt.rcv_wscale = rcv_wscale; | 2559 | tp->rx_opt.rcv_wscale = rcv_wscale; |
2348 | tp->rcv_ssthresh = tp->rcv_wnd; | 2560 | tp->rcv_ssthresh = tp->rcv_wnd; |
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 59f5b5e7c566..f8efada580e8 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/kprobes.h> | 22 | #include <linux/kprobes.h> |
23 | #include <linux/socket.h> | 23 | #include <linux/socket.h> |
24 | #include <linux/tcp.h> | 24 | #include <linux/tcp.h> |
25 | #include <linux/slab.h> | ||
25 | #include <linux/proc_fs.h> | 26 | #include <linux/proc_fs.h> |
26 | #include <linux/module.h> | 27 | #include <linux/module.h> |
27 | #include <linux/ktime.h> | 28 | #include <linux/ktime.h> |
@@ -39,9 +40,9 @@ static int port __read_mostly = 0; | |||
39 | MODULE_PARM_DESC(port, "Port to match (0=all)"); | 40 | MODULE_PARM_DESC(port, "Port to match (0=all)"); |
40 | module_param(port, int, 0); | 41 | module_param(port, int, 0); |
41 | 42 | ||
42 | static int bufsize __read_mostly = 4096; | 43 | static unsigned int bufsize __read_mostly = 4096; |
43 | MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)"); | 44 | MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)"); |
44 | module_param(bufsize, int, 0); | 45 | module_param(bufsize, uint, 0); |
45 | 46 | ||
46 | static int full __read_mostly; | 47 | static int full __read_mostly; |
47 | MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)"); | 48 | MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)"); |
@@ -75,12 +76,12 @@ static struct { | |||
75 | 76 | ||
76 | static inline int tcp_probe_used(void) | 77 | static inline int tcp_probe_used(void) |
77 | { | 78 | { |
78 | return (tcp_probe.head - tcp_probe.tail) % bufsize; | 79 | return (tcp_probe.head - tcp_probe.tail) & (bufsize - 1); |
79 | } | 80 | } |
80 | 81 | ||
81 | static inline int tcp_probe_avail(void) | 82 | static inline int tcp_probe_avail(void) |
82 | { | 83 | { |
83 | return bufsize - tcp_probe_used(); | 84 | return bufsize - tcp_probe_used() - 1; |
84 | } | 85 | } |
85 | 86 | ||
86 | /* | 87 | /* |
@@ -94,8 +95,9 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
94 | const struct inet_sock *inet = inet_sk(sk); | 95 | const struct inet_sock *inet = inet_sk(sk); |
95 | 96 | ||
96 | /* Only update if port matches */ | 97 | /* Only update if port matches */ |
97 | if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port) | 98 | if ((port == 0 || ntohs(inet->inet_dport) == port || |
98 | && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { | 99 | ntohs(inet->inet_sport) == port) && |
100 | (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { | ||
99 | 101 | ||
100 | spin_lock(&tcp_probe.lock); | 102 | spin_lock(&tcp_probe.lock); |
101 | /* If log fills, just silently drop */ | 103 | /* If log fills, just silently drop */ |
@@ -103,10 +105,10 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
103 | struct tcp_log *p = tcp_probe.log + tcp_probe.head; | 105 | struct tcp_log *p = tcp_probe.log + tcp_probe.head; |
104 | 106 | ||
105 | p->tstamp = ktime_get(); | 107 | p->tstamp = ktime_get(); |
106 | p->saddr = inet->saddr; | 108 | p->saddr = inet->inet_saddr; |
107 | p->sport = inet->sport; | 109 | p->sport = inet->inet_sport; |
108 | p->daddr = inet->daddr; | 110 | p->daddr = inet->inet_daddr; |
109 | p->dport = inet->dport; | 111 | p->dport = inet->inet_dport; |
110 | p->length = skb->len; | 112 | p->length = skb->len; |
111 | p->snd_nxt = tp->snd_nxt; | 113 | p->snd_nxt = tp->snd_nxt; |
112 | p->snd_una = tp->snd_una; | 114 | p->snd_una = tp->snd_una; |
@@ -115,7 +117,7 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
115 | p->ssthresh = tcp_current_ssthresh(sk); | 117 | p->ssthresh = tcp_current_ssthresh(sk); |
116 | p->srtt = tp->srtt >> 3; | 118 | p->srtt = tp->srtt >> 3; |
117 | 119 | ||
118 | tcp_probe.head = (tcp_probe.head + 1) % bufsize; | 120 | tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1); |
119 | } | 121 | } |
120 | tcp_probe.lastcwnd = tp->snd_cwnd; | 122 | tcp_probe.lastcwnd = tp->snd_cwnd; |
121 | spin_unlock(&tcp_probe.lock); | 123 | spin_unlock(&tcp_probe.lock); |
@@ -148,7 +150,7 @@ static int tcpprobe_open(struct inode * inode, struct file * file) | |||
148 | static int tcpprobe_sprint(char *tbuf, int n) | 150 | static int tcpprobe_sprint(char *tbuf, int n) |
149 | { | 151 | { |
150 | const struct tcp_log *p | 152 | const struct tcp_log *p |
151 | = tcp_probe.log + tcp_probe.tail % bufsize; | 153 | = tcp_probe.log + tcp_probe.tail; |
152 | struct timespec tv | 154 | struct timespec tv |
153 | = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); | 155 | = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); |
154 | 156 | ||
@@ -191,7 +193,7 @@ static ssize_t tcpprobe_read(struct file *file, char __user *buf, | |||
191 | width = tcpprobe_sprint(tbuf, sizeof(tbuf)); | 193 | width = tcpprobe_sprint(tbuf, sizeof(tbuf)); |
192 | 194 | ||
193 | if (cnt + width < len) | 195 | if (cnt + width < len) |
194 | tcp_probe.tail = (tcp_probe.tail + 1) % bufsize; | 196 | tcp_probe.tail = (tcp_probe.tail + 1) & (bufsize - 1); |
195 | 197 | ||
196 | spin_unlock_bh(&tcp_probe.lock); | 198 | spin_unlock_bh(&tcp_probe.lock); |
197 | 199 | ||
@@ -221,9 +223,10 @@ static __init int tcpprobe_init(void) | |||
221 | init_waitqueue_head(&tcp_probe.wait); | 223 | init_waitqueue_head(&tcp_probe.wait); |
222 | spin_lock_init(&tcp_probe.lock); | 224 | spin_lock_init(&tcp_probe.lock); |
223 | 225 | ||
224 | if (bufsize < 0) | 226 | if (bufsize == 0) |
225 | return -EINVAL; | 227 | return -EINVAL; |
226 | 228 | ||
229 | bufsize = roundup_pow_of_two(bufsize); | ||
227 | tcp_probe.log = kcalloc(bufsize, sizeof(struct tcp_log), GFP_KERNEL); | 230 | tcp_probe.log = kcalloc(bufsize, sizeof(struct tcp_log), GFP_KERNEL); |
228 | if (!tcp_probe.log) | 231 | if (!tcp_probe.log) |
229 | goto err0; | 232 | goto err0; |
@@ -235,7 +238,7 @@ static __init int tcpprobe_init(void) | |||
235 | if (ret) | 238 | if (ret) |
236 | goto err1; | 239 | goto err1; |
237 | 240 | ||
238 | pr_info("TCP probe registered (port=%d)\n", port); | 241 | pr_info("TCP probe registered (port=%d) bufsize=%u\n", port, bufsize); |
239 | return 0; | 242 | return 0; |
240 | err1: | 243 | err1: |
241 | proc_net_remove(&init_net, procname); | 244 | proc_net_remove(&init_net, procname); |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index cdb2ca7684d4..8a0ab2977f1f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -19,6 +19,7 @@ | |||
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include <linux/module.h> | 21 | #include <linux/module.h> |
22 | #include <linux/gfp.h> | ||
22 | #include <net/tcp.h> | 23 | #include <net/tcp.h> |
23 | 24 | ||
24 | int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; | 25 | int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; |
@@ -29,6 +30,7 @@ int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; | |||
29 | int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; | 30 | int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; |
30 | int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; | 31 | int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; |
31 | int sysctl_tcp_orphan_retries __read_mostly; | 32 | int sysctl_tcp_orphan_retries __read_mostly; |
33 | int sysctl_tcp_thin_linear_timeouts __read_mostly; | ||
32 | 34 | ||
33 | static void tcp_write_timer(unsigned long); | 35 | static void tcp_write_timer(unsigned long); |
34 | static void tcp_delack_timer(unsigned long); | 36 | static void tcp_delack_timer(unsigned long); |
@@ -132,6 +134,35 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) | |||
132 | } | 134 | } |
133 | } | 135 | } |
134 | 136 | ||
137 | /* This function calculates a "timeout" which is equivalent to the timeout of a | ||
138 | * TCP connection after "boundary" unsuccessful, exponentially backed-off | ||
139 | * retransmissions with an initial RTO of TCP_RTO_MIN. | ||
140 | */ | ||
141 | static bool retransmits_timed_out(struct sock *sk, | ||
142 | unsigned int boundary) | ||
143 | { | ||
144 | unsigned int timeout, linear_backoff_thresh; | ||
145 | unsigned int start_ts; | ||
146 | |||
147 | if (!inet_csk(sk)->icsk_retransmits) | ||
148 | return false; | ||
149 | |||
150 | if (unlikely(!tcp_sk(sk)->retrans_stamp)) | ||
151 | start_ts = TCP_SKB_CB(tcp_write_queue_head(sk))->when; | ||
152 | else | ||
153 | start_ts = tcp_sk(sk)->retrans_stamp; | ||
154 | |||
155 | linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); | ||
156 | |||
157 | if (boundary <= linear_backoff_thresh) | ||
158 | timeout = ((2 << boundary) - 1) * TCP_RTO_MIN; | ||
159 | else | ||
160 | timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN + | ||
161 | (boundary - linear_backoff_thresh) * TCP_RTO_MAX; | ||
162 | |||
163 | return (tcp_time_stamp - start_ts) >= timeout; | ||
164 | } | ||
165 | |||
135 | /* A write timeout has occurred. Process the after effects. */ | 166 | /* A write timeout has occurred. Process the after effects. */ |
136 | static int tcp_write_timeout(struct sock *sk) | 167 | static int tcp_write_timeout(struct sock *sk) |
137 | { | 168 | { |
@@ -141,14 +172,14 @@ static int tcp_write_timeout(struct sock *sk) | |||
141 | 172 | ||
142 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { | 173 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { |
143 | if (icsk->icsk_retransmits) | 174 | if (icsk->icsk_retransmits) |
144 | dst_negative_advice(&sk->sk_dst_cache); | 175 | dst_negative_advice(&sk->sk_dst_cache, sk); |
145 | retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; | 176 | retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; |
146 | } else { | 177 | } else { |
147 | if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { | 178 | if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { |
148 | /* Black hole detection */ | 179 | /* Black hole detection */ |
149 | tcp_mtu_probing(icsk, sk); | 180 | tcp_mtu_probing(icsk, sk); |
150 | 181 | ||
151 | dst_negative_advice(&sk->sk_dst_cache); | 182 | dst_negative_advice(&sk->sk_dst_cache, sk); |
152 | } | 183 | } |
153 | 184 | ||
154 | retry_until = sysctl_tcp_retries2; | 185 | retry_until = sysctl_tcp_retries2; |
@@ -303,15 +334,15 @@ void tcp_retransmit_timer(struct sock *sk) | |||
303 | struct inet_sock *inet = inet_sk(sk); | 334 | struct inet_sock *inet = inet_sk(sk); |
304 | if (sk->sk_family == AF_INET) { | 335 | if (sk->sk_family == AF_INET) { |
305 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", | 336 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", |
306 | &inet->daddr, ntohs(inet->dport), | 337 | &inet->inet_daddr, ntohs(inet->inet_dport), |
307 | inet->num, tp->snd_una, tp->snd_nxt); | 338 | inet->inet_num, tp->snd_una, tp->snd_nxt); |
308 | } | 339 | } |
309 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 340 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
310 | else if (sk->sk_family == AF_INET6) { | 341 | else if (sk->sk_family == AF_INET6) { |
311 | struct ipv6_pinfo *np = inet6_sk(sk); | 342 | struct ipv6_pinfo *np = inet6_sk(sk); |
312 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", | 343 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", |
313 | &np->daddr, ntohs(inet->dport), | 344 | &np->daddr, ntohs(inet->inet_dport), |
314 | inet->num, tp->snd_una, tp->snd_nxt); | 345 | inet->inet_num, tp->snd_una, tp->snd_nxt); |
315 | } | 346 | } |
316 | #endif | 347 | #endif |
317 | #endif | 348 | #endif |
@@ -386,7 +417,25 @@ void tcp_retransmit_timer(struct sock *sk) | |||
386 | icsk->icsk_retransmits++; | 417 | icsk->icsk_retransmits++; |
387 | 418 | ||
388 | out_reset_timer: | 419 | out_reset_timer: |
389 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); | 420 | /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is |
421 | * used to reset timer, set to 0. Recalculate 'icsk_rto' as this | ||
422 | * might be increased if the stream oscillates between thin and thick, | ||
423 | * thus the old value might already be too high compared to the value | ||
424 | * set by 'tcp_set_rto' in tcp_input.c which resets the rto without | ||
425 | * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating | ||
426 | * exponential backoff behaviour to avoid continue hammering | ||
427 | * linear-timeout retransmissions into a black hole | ||
428 | */ | ||
429 | if (sk->sk_state == TCP_ESTABLISHED && | ||
430 | (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) && | ||
431 | tcp_stream_is_thin(tp) && | ||
432 | icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { | ||
433 | icsk->icsk_backoff = 0; | ||
434 | icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX); | ||
435 | } else { | ||
436 | /* Use normal (exponential) backoff */ | ||
437 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); | ||
438 | } | ||
390 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); | 439 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); |
391 | if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) | 440 | if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) |
392 | __sk_dst_reset(sk); | 441 | __sk_dst_reset(sk); |
@@ -445,6 +494,12 @@ static void tcp_synack_timer(struct sock *sk) | |||
445 | TCP_TIMEOUT_INIT, TCP_RTO_MAX); | 494 | TCP_TIMEOUT_INIT, TCP_RTO_MAX); |
446 | } | 495 | } |
447 | 496 | ||
497 | void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req) | ||
498 | { | ||
499 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEOUTS); | ||
500 | } | ||
501 | EXPORT_SYMBOL(tcp_syn_ack_timeout); | ||
502 | |||
448 | void tcp_set_keepalive(struct sock *sk, int val) | 503 | void tcp_set_keepalive(struct sock *sk, int val) |
449 | { | 504 | { |
450 | if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) | 505 | if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) |
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index e9bbff746488..b612acf76183 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c | |||
@@ -165,9 +165,8 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
165 | * every other rtt. | 165 | * every other rtt. |
166 | */ | 166 | */ |
167 | if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { | 167 | if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { |
168 | if (veno->inc | 168 | if (veno->inc && |
169 | && tp->snd_cwnd < | 169 | tp->snd_cwnd < tp->snd_cwnd_clamp) { |
170 | tp->snd_cwnd_clamp) { | ||
171 | tp->snd_cwnd++; | 170 | tp->snd_cwnd++; |
172 | veno->inc = 0; | 171 | veno->inc = 0; |
173 | } else | 172 | } else |
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 66b6821b984e..a0f240358892 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c | |||
@@ -157,8 +157,8 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
157 | 157 | ||
158 | if (queue > TCP_YEAH_ALPHA || | 158 | if (queue > TCP_YEAH_ALPHA || |
159 | rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) { | 159 | rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) { |
160 | if (queue > TCP_YEAH_ALPHA | 160 | if (queue > TCP_YEAH_ALPHA && |
161 | && tp->snd_cwnd > yeah->reno_count) { | 161 | tp->snd_cwnd > yeah->reno_count) { |
162 | u32 reduction = min(queue / TCP_YEAH_GAMMA , | 162 | u32 reduction = min(queue / TCP_YEAH_GAMMA , |
163 | tp->snd_cwnd >> TCP_YEAH_EPSILON); | 163 | tp->snd_cwnd >> TCP_YEAH_EPSILON); |
164 | 164 | ||
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index 3959e0ca456a..3b3813cc80b9 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/mutex.h> | 8 | #include <linux/mutex.h> |
9 | #include <linux/netdevice.h> | 9 | #include <linux/netdevice.h> |
10 | #include <linux/skbuff.h> | 10 | #include <linux/skbuff.h> |
11 | #include <linux/slab.h> | ||
11 | #include <net/icmp.h> | 12 | #include <net/icmp.h> |
12 | #include <net/ip.h> | 13 | #include <net/ip.h> |
13 | #include <net/protocol.h> | 14 | #include <net/protocol.h> |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0fa9f70e4b19..c36522a0f113 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -95,6 +95,7 @@ | |||
95 | #include <linux/mm.h> | 95 | #include <linux/mm.h> |
96 | #include <linux/inet.h> | 96 | #include <linux/inet.h> |
97 | #include <linux/netdevice.h> | 97 | #include <linux/netdevice.h> |
98 | #include <linux/slab.h> | ||
98 | #include <net/tcp_states.h> | 99 | #include <net/tcp_states.h> |
99 | #include <linux/skbuff.h> | 100 | #include <linux/skbuff.h> |
100 | #include <linux/proc_fs.h> | 101 | #include <linux/proc_fs.h> |
@@ -106,7 +107,7 @@ | |||
106 | #include <net/xfrm.h> | 107 | #include <net/xfrm.h> |
107 | #include "udp_impl.h" | 108 | #include "udp_impl.h" |
108 | 109 | ||
109 | struct udp_table udp_table; | 110 | struct udp_table udp_table __read_mostly; |
110 | EXPORT_SYMBOL(udp_table); | 111 | EXPORT_SYMBOL(udp_table); |
111 | 112 | ||
112 | int sysctl_udp_mem[3] __read_mostly; | 113 | int sysctl_udp_mem[3] __read_mostly; |
@@ -121,28 +122,30 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min); | |||
121 | atomic_t udp_memory_allocated; | 122 | atomic_t udp_memory_allocated; |
122 | EXPORT_SYMBOL(udp_memory_allocated); | 123 | EXPORT_SYMBOL(udp_memory_allocated); |
123 | 124 | ||
124 | #define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE) | 125 | #define MAX_UDP_PORTS 65536 |
126 | #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) | ||
125 | 127 | ||
126 | static int udp_lib_lport_inuse(struct net *net, __u16 num, | 128 | static int udp_lib_lport_inuse(struct net *net, __u16 num, |
127 | const struct udp_hslot *hslot, | 129 | const struct udp_hslot *hslot, |
128 | unsigned long *bitmap, | 130 | unsigned long *bitmap, |
129 | struct sock *sk, | 131 | struct sock *sk, |
130 | int (*saddr_comp)(const struct sock *sk1, | 132 | int (*saddr_comp)(const struct sock *sk1, |
131 | const struct sock *sk2)) | 133 | const struct sock *sk2), |
134 | unsigned int log) | ||
132 | { | 135 | { |
133 | struct sock *sk2; | 136 | struct sock *sk2; |
134 | struct hlist_nulls_node *node; | 137 | struct hlist_nulls_node *node; |
135 | 138 | ||
136 | sk_nulls_for_each(sk2, node, &hslot->head) | 139 | sk_nulls_for_each(sk2, node, &hslot->head) |
137 | if (net_eq(sock_net(sk2), net) && | 140 | if (net_eq(sock_net(sk2), net) && |
138 | sk2 != sk && | 141 | sk2 != sk && |
139 | (bitmap || sk2->sk_hash == num) && | 142 | (bitmap || udp_sk(sk2)->udp_port_hash == num) && |
140 | (!sk2->sk_reuse || !sk->sk_reuse) && | 143 | (!sk2->sk_reuse || !sk->sk_reuse) && |
141 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if | 144 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || |
142 | || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && | 145 | sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && |
143 | (*saddr_comp)(sk, sk2)) { | 146 | (*saddr_comp)(sk, sk2)) { |
144 | if (bitmap) | 147 | if (bitmap) |
145 | __set_bit(sk2->sk_hash / UDP_HTABLE_SIZE, | 148 | __set_bit(udp_sk(sk2)->udp_port_hash >> log, |
146 | bitmap); | 149 | bitmap); |
147 | else | 150 | else |
148 | return 1; | 151 | return 1; |
@@ -150,18 +153,51 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, | |||
150 | return 0; | 153 | return 0; |
151 | } | 154 | } |
152 | 155 | ||
156 | /* | ||
157 | * Note: we still hold spinlock of primary hash chain, so no other writer | ||
158 | * can insert/delete a socket with local_port == num | ||
159 | */ | ||
160 | static int udp_lib_lport_inuse2(struct net *net, __u16 num, | ||
161 | struct udp_hslot *hslot2, | ||
162 | struct sock *sk, | ||
163 | int (*saddr_comp)(const struct sock *sk1, | ||
164 | const struct sock *sk2)) | ||
165 | { | ||
166 | struct sock *sk2; | ||
167 | struct hlist_nulls_node *node; | ||
168 | int res = 0; | ||
169 | |||
170 | spin_lock(&hslot2->lock); | ||
171 | udp_portaddr_for_each_entry(sk2, node, &hslot2->head) | ||
172 | if (net_eq(sock_net(sk2), net) && | ||
173 | sk2 != sk && | ||
174 | (udp_sk(sk2)->udp_port_hash == num) && | ||
175 | (!sk2->sk_reuse || !sk->sk_reuse) && | ||
176 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || | ||
177 | sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && | ||
178 | (*saddr_comp)(sk, sk2)) { | ||
179 | res = 1; | ||
180 | break; | ||
181 | } | ||
182 | spin_unlock(&hslot2->lock); | ||
183 | return res; | ||
184 | } | ||
185 | |||
153 | /** | 186 | /** |
154 | * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 | 187 | * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 |
155 | * | 188 | * |
156 | * @sk: socket struct in question | 189 | * @sk: socket struct in question |
157 | * @snum: port number to look up | 190 | * @snum: port number to look up |
158 | * @saddr_comp: AF-dependent comparison of bound local IP addresses | 191 | * @saddr_comp: AF-dependent comparison of bound local IP addresses |
192 | * @hash2_nulladdr: AF-dependant hash value in secondary hash chains, | ||
193 | * with NULL address | ||
159 | */ | 194 | */ |
160 | int udp_lib_get_port(struct sock *sk, unsigned short snum, | 195 | int udp_lib_get_port(struct sock *sk, unsigned short snum, |
161 | int (*saddr_comp)(const struct sock *sk1, | 196 | int (*saddr_comp)(const struct sock *sk1, |
162 | const struct sock *sk2)) | 197 | const struct sock *sk2), |
198 | unsigned int hash2_nulladdr) | ||
163 | { | 199 | { |
164 | struct udp_hslot *hslot; | 200 | struct udp_hslot *hslot, *hslot2; |
165 | struct udp_table *udptable = sk->sk_prot->h.udp_table; | 201 | struct udp_table *udptable = sk->sk_prot->h.udp_table; |
166 | int error = 1; | 202 | int error = 1; |
167 | struct net *net = sock_net(sk); | 203 | struct net *net = sock_net(sk); |
@@ -180,13 +216,14 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
180 | /* | 216 | /* |
181 | * force rand to be an odd multiple of UDP_HTABLE_SIZE | 217 | * force rand to be an odd multiple of UDP_HTABLE_SIZE |
182 | */ | 218 | */ |
183 | rand = (rand | 1) * UDP_HTABLE_SIZE; | 219 | rand = (rand | 1) * (udptable->mask + 1); |
184 | for (last = first + UDP_HTABLE_SIZE; first != last; first++) { | 220 | last = first + udptable->mask + 1; |
185 | hslot = &udptable->hash[udp_hashfn(net, first)]; | 221 | do { |
222 | hslot = udp_hashslot(udptable, net, first); | ||
186 | bitmap_zero(bitmap, PORTS_PER_CHAIN); | 223 | bitmap_zero(bitmap, PORTS_PER_CHAIN); |
187 | spin_lock_bh(&hslot->lock); | 224 | spin_lock_bh(&hslot->lock); |
188 | udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, | 225 | udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, |
189 | saddr_comp); | 226 | saddr_comp, udptable->log); |
190 | 227 | ||
191 | snum = first; | 228 | snum = first; |
192 | /* | 229 | /* |
@@ -196,25 +233,59 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
196 | */ | 233 | */ |
197 | do { | 234 | do { |
198 | if (low <= snum && snum <= high && | 235 | if (low <= snum && snum <= high && |
199 | !test_bit(snum / UDP_HTABLE_SIZE, bitmap)) | 236 | !test_bit(snum >> udptable->log, bitmap)) |
200 | goto found; | 237 | goto found; |
201 | snum += rand; | 238 | snum += rand; |
202 | } while (snum != first); | 239 | } while (snum != first); |
203 | spin_unlock_bh(&hslot->lock); | 240 | spin_unlock_bh(&hslot->lock); |
204 | } | 241 | } while (++first != last); |
205 | goto fail; | 242 | goto fail; |
206 | } else { | 243 | } else { |
207 | hslot = &udptable->hash[udp_hashfn(net, snum)]; | 244 | hslot = udp_hashslot(udptable, net, snum); |
208 | spin_lock_bh(&hslot->lock); | 245 | spin_lock_bh(&hslot->lock); |
209 | if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp)) | 246 | if (hslot->count > 10) { |
247 | int exist; | ||
248 | unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum; | ||
249 | |||
250 | slot2 &= udptable->mask; | ||
251 | hash2_nulladdr &= udptable->mask; | ||
252 | |||
253 | hslot2 = udp_hashslot2(udptable, slot2); | ||
254 | if (hslot->count < hslot2->count) | ||
255 | goto scan_primary_hash; | ||
256 | |||
257 | exist = udp_lib_lport_inuse2(net, snum, hslot2, | ||
258 | sk, saddr_comp); | ||
259 | if (!exist && (hash2_nulladdr != slot2)) { | ||
260 | hslot2 = udp_hashslot2(udptable, hash2_nulladdr); | ||
261 | exist = udp_lib_lport_inuse2(net, snum, hslot2, | ||
262 | sk, saddr_comp); | ||
263 | } | ||
264 | if (exist) | ||
265 | goto fail_unlock; | ||
266 | else | ||
267 | goto found; | ||
268 | } | ||
269 | scan_primary_hash: | ||
270 | if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, | ||
271 | saddr_comp, 0)) | ||
210 | goto fail_unlock; | 272 | goto fail_unlock; |
211 | } | 273 | } |
212 | found: | 274 | found: |
213 | inet_sk(sk)->num = snum; | 275 | inet_sk(sk)->inet_num = snum; |
214 | sk->sk_hash = snum; | 276 | udp_sk(sk)->udp_port_hash = snum; |
277 | udp_sk(sk)->udp_portaddr_hash ^= snum; | ||
215 | if (sk_unhashed(sk)) { | 278 | if (sk_unhashed(sk)) { |
216 | sk_nulls_add_node_rcu(sk, &hslot->head); | 279 | sk_nulls_add_node_rcu(sk, &hslot->head); |
280 | hslot->count++; | ||
217 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 281 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
282 | |||
283 | hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); | ||
284 | spin_lock(&hslot2->lock); | ||
285 | hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, | ||
286 | &hslot2->head); | ||
287 | hslot2->count++; | ||
288 | spin_unlock(&hslot2->lock); | ||
218 | } | 289 | } |
219 | error = 0; | 290 | error = 0; |
220 | fail_unlock: | 291 | fail_unlock: |
@@ -229,13 +300,26 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) | |||
229 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); | 300 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); |
230 | 301 | ||
231 | return (!ipv6_only_sock(sk2) && | 302 | return (!ipv6_only_sock(sk2) && |
232 | (!inet1->rcv_saddr || !inet2->rcv_saddr || | 303 | (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr || |
233 | inet1->rcv_saddr == inet2->rcv_saddr)); | 304 | inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)); |
305 | } | ||
306 | |||
307 | static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr, | ||
308 | unsigned int port) | ||
309 | { | ||
310 | return jhash_1word(saddr, net_hash_mix(net)) ^ port; | ||
234 | } | 311 | } |
235 | 312 | ||
236 | int udp_v4_get_port(struct sock *sk, unsigned short snum) | 313 | int udp_v4_get_port(struct sock *sk, unsigned short snum) |
237 | { | 314 | { |
238 | return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal); | 315 | unsigned int hash2_nulladdr = |
316 | udp4_portaddr_hash(sock_net(sk), INADDR_ANY, snum); | ||
317 | unsigned int hash2_partial = | ||
318 | udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); | ||
319 | |||
320 | /* precompute partial secondary hash */ | ||
321 | udp_sk(sk)->udp_portaddr_hash = hash2_partial; | ||
322 | return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr); | ||
239 | } | 323 | } |
240 | 324 | ||
241 | static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, | 325 | static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, |
@@ -244,23 +328,23 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, | |||
244 | { | 328 | { |
245 | int score = -1; | 329 | int score = -1; |
246 | 330 | ||
247 | if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && | 331 | if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && |
248 | !ipv6_only_sock(sk)) { | 332 | !ipv6_only_sock(sk)) { |
249 | struct inet_sock *inet = inet_sk(sk); | 333 | struct inet_sock *inet = inet_sk(sk); |
250 | 334 | ||
251 | score = (sk->sk_family == PF_INET ? 1 : 0); | 335 | score = (sk->sk_family == PF_INET ? 1 : 0); |
252 | if (inet->rcv_saddr) { | 336 | if (inet->inet_rcv_saddr) { |
253 | if (inet->rcv_saddr != daddr) | 337 | if (inet->inet_rcv_saddr != daddr) |
254 | return -1; | 338 | return -1; |
255 | score += 2; | 339 | score += 2; |
256 | } | 340 | } |
257 | if (inet->daddr) { | 341 | if (inet->inet_daddr) { |
258 | if (inet->daddr != saddr) | 342 | if (inet->inet_daddr != saddr) |
259 | return -1; | 343 | return -1; |
260 | score += 2; | 344 | score += 2; |
261 | } | 345 | } |
262 | if (inet->dport) { | 346 | if (inet->inet_dport) { |
263 | if (inet->dport != sport) | 347 | if (inet->inet_dport != sport) |
264 | return -1; | 348 | return -1; |
265 | score += 2; | 349 | score += 2; |
266 | } | 350 | } |
@@ -273,6 +357,89 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, | |||
273 | return score; | 357 | return score; |
274 | } | 358 | } |
275 | 359 | ||
360 | /* | ||
361 | * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num) | ||
362 | */ | ||
363 | #define SCORE2_MAX (1 + 2 + 2 + 2) | ||
364 | static inline int compute_score2(struct sock *sk, struct net *net, | ||
365 | __be32 saddr, __be16 sport, | ||
366 | __be32 daddr, unsigned int hnum, int dif) | ||
367 | { | ||
368 | int score = -1; | ||
369 | |||
370 | if (net_eq(sock_net(sk), net) && !ipv6_only_sock(sk)) { | ||
371 | struct inet_sock *inet = inet_sk(sk); | ||
372 | |||
373 | if (inet->inet_rcv_saddr != daddr) | ||
374 | return -1; | ||
375 | if (inet->inet_num != hnum) | ||
376 | return -1; | ||
377 | |||
378 | score = (sk->sk_family == PF_INET ? 1 : 0); | ||
379 | if (inet->inet_daddr) { | ||
380 | if (inet->inet_daddr != saddr) | ||
381 | return -1; | ||
382 | score += 2; | ||
383 | } | ||
384 | if (inet->inet_dport) { | ||
385 | if (inet->inet_dport != sport) | ||
386 | return -1; | ||
387 | score += 2; | ||
388 | } | ||
389 | if (sk->sk_bound_dev_if) { | ||
390 | if (sk->sk_bound_dev_if != dif) | ||
391 | return -1; | ||
392 | score += 2; | ||
393 | } | ||
394 | } | ||
395 | return score; | ||
396 | } | ||
397 | |||
398 | |||
399 | /* called with read_rcu_lock() */ | ||
400 | static struct sock *udp4_lib_lookup2(struct net *net, | ||
401 | __be32 saddr, __be16 sport, | ||
402 | __be32 daddr, unsigned int hnum, int dif, | ||
403 | struct udp_hslot *hslot2, unsigned int slot2) | ||
404 | { | ||
405 | struct sock *sk, *result; | ||
406 | struct hlist_nulls_node *node; | ||
407 | int score, badness; | ||
408 | |||
409 | begin: | ||
410 | result = NULL; | ||
411 | badness = -1; | ||
412 | udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { | ||
413 | score = compute_score2(sk, net, saddr, sport, | ||
414 | daddr, hnum, dif); | ||
415 | if (score > badness) { | ||
416 | result = sk; | ||
417 | badness = score; | ||
418 | if (score == SCORE2_MAX) | ||
419 | goto exact_match; | ||
420 | } | ||
421 | } | ||
422 | /* | ||
423 | * if the nulls value we got at the end of this lookup is | ||
424 | * not the expected one, we must restart lookup. | ||
425 | * We probably met an item that was moved to another chain. | ||
426 | */ | ||
427 | if (get_nulls_value(node) != slot2) | ||
428 | goto begin; | ||
429 | |||
430 | if (result) { | ||
431 | exact_match: | ||
432 | if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) | ||
433 | result = NULL; | ||
434 | else if (unlikely(compute_score2(result, net, saddr, sport, | ||
435 | daddr, hnum, dif) < badness)) { | ||
436 | sock_put(result); | ||
437 | goto begin; | ||
438 | } | ||
439 | } | ||
440 | return result; | ||
441 | } | ||
442 | |||
276 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try | 443 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try |
277 | * harder than this. -DaveM | 444 | * harder than this. -DaveM |
278 | */ | 445 | */ |
@@ -283,11 +450,35 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, | |||
283 | struct sock *sk, *result; | 450 | struct sock *sk, *result; |
284 | struct hlist_nulls_node *node; | 451 | struct hlist_nulls_node *node; |
285 | unsigned short hnum = ntohs(dport); | 452 | unsigned short hnum = ntohs(dport); |
286 | unsigned int hash = udp_hashfn(net, hnum); | 453 | unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); |
287 | struct udp_hslot *hslot = &udptable->hash[hash]; | 454 | struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; |
288 | int score, badness; | 455 | int score, badness; |
289 | 456 | ||
290 | rcu_read_lock(); | 457 | rcu_read_lock(); |
458 | if (hslot->count > 10) { | ||
459 | hash2 = udp4_portaddr_hash(net, daddr, hnum); | ||
460 | slot2 = hash2 & udptable->mask; | ||
461 | hslot2 = &udptable->hash2[slot2]; | ||
462 | if (hslot->count < hslot2->count) | ||
463 | goto begin; | ||
464 | |||
465 | result = udp4_lib_lookup2(net, saddr, sport, | ||
466 | daddr, hnum, dif, | ||
467 | hslot2, slot2); | ||
468 | if (!result) { | ||
469 | hash2 = udp4_portaddr_hash(net, INADDR_ANY, hnum); | ||
470 | slot2 = hash2 & udptable->mask; | ||
471 | hslot2 = &udptable->hash2[slot2]; | ||
472 | if (hslot->count < hslot2->count) | ||
473 | goto begin; | ||
474 | |||
475 | result = udp4_lib_lookup2(net, saddr, sport, | ||
476 | INADDR_ANY, hnum, dif, | ||
477 | hslot2, slot2); | ||
478 | } | ||
479 | rcu_read_unlock(); | ||
480 | return result; | ||
481 | } | ||
291 | begin: | 482 | begin: |
292 | result = NULL; | 483 | result = NULL; |
293 | badness = -1; | 484 | badness = -1; |
@@ -304,7 +495,7 @@ begin: | |||
304 | * not the expected one, we must restart lookup. | 495 | * not the expected one, we must restart lookup. |
305 | * We probably met an item that was moved to another chain. | 496 | * We probably met an item that was moved to another chain. |
306 | */ | 497 | */ |
307 | if (get_nulls_value(node) != hash) | 498 | if (get_nulls_value(node) != slot) |
308 | goto begin; | 499 | goto begin; |
309 | 500 | ||
310 | if (result) { | 501 | if (result) { |
@@ -354,12 +545,13 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, | |||
354 | sk_nulls_for_each_from(s, node) { | 545 | sk_nulls_for_each_from(s, node) { |
355 | struct inet_sock *inet = inet_sk(s); | 546 | struct inet_sock *inet = inet_sk(s); |
356 | 547 | ||
357 | if (!net_eq(sock_net(s), net) || | 548 | if (!net_eq(sock_net(s), net) || |
358 | s->sk_hash != hnum || | 549 | udp_sk(s)->udp_port_hash != hnum || |
359 | (inet->daddr && inet->daddr != rmt_addr) || | 550 | (inet->inet_daddr && inet->inet_daddr != rmt_addr) || |
360 | (inet->dport != rmt_port && inet->dport) || | 551 | (inet->inet_dport != rmt_port && inet->inet_dport) || |
361 | (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || | 552 | (inet->inet_rcv_saddr && |
362 | ipv6_only_sock(s) || | 553 | inet->inet_rcv_saddr != loc_addr) || |
554 | ipv6_only_sock(s) || | ||
363 | (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) | 555 | (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) |
364 | continue; | 556 | continue; |
365 | if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) | 557 | if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) |
@@ -642,14 +834,14 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
642 | } else { | 834 | } else { |
643 | if (sk->sk_state != TCP_ESTABLISHED) | 835 | if (sk->sk_state != TCP_ESTABLISHED) |
644 | return -EDESTADDRREQ; | 836 | return -EDESTADDRREQ; |
645 | daddr = inet->daddr; | 837 | daddr = inet->inet_daddr; |
646 | dport = inet->dport; | 838 | dport = inet->inet_dport; |
647 | /* Open fast path for connected socket. | 839 | /* Open fast path for connected socket. |
648 | Route will not be used, if at least one option is set. | 840 | Route will not be used, if at least one option is set. |
649 | */ | 841 | */ |
650 | connected = 1; | 842 | connected = 1; |
651 | } | 843 | } |
652 | ipc.addr = inet->saddr; | 844 | ipc.addr = inet->inet_saddr; |
653 | 845 | ||
654 | ipc.oif = sk->sk_bound_dev_if; | 846 | ipc.oif = sk->sk_bound_dev_if; |
655 | err = sock_tx_timestamp(msg, sk, &ipc.shtx); | 847 | err = sock_tx_timestamp(msg, sk, &ipc.shtx); |
@@ -704,7 +896,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
704 | .proto = sk->sk_protocol, | 896 | .proto = sk->sk_protocol, |
705 | .flags = inet_sk_flowi_flags(sk), | 897 | .flags = inet_sk_flowi_flags(sk), |
706 | .uli_u = { .ports = | 898 | .uli_u = { .ports = |
707 | { .sport = inet->sport, | 899 | { .sport = inet->inet_sport, |
708 | .dport = dport } } }; | 900 | .dport = dport } } }; |
709 | struct net *net = sock_net(sk); | 901 | struct net *net = sock_net(sk); |
710 | 902 | ||
@@ -748,7 +940,7 @@ back_from_confirm: | |||
748 | inet->cork.fl.fl4_dst = daddr; | 940 | inet->cork.fl.fl4_dst = daddr; |
749 | inet->cork.fl.fl_ip_dport = dport; | 941 | inet->cork.fl.fl_ip_dport = dport; |
750 | inet->cork.fl.fl4_src = saddr; | 942 | inet->cork.fl.fl4_src = saddr; |
751 | inet->cork.fl.fl_ip_sport = inet->sport; | 943 | inet->cork.fl.fl_ip_sport = inet->inet_sport; |
752 | up->pending = AF_INET; | 944 | up->pending = AF_INET; |
753 | 945 | ||
754 | do_append_data: | 946 | do_append_data: |
@@ -862,6 +1054,7 @@ static unsigned int first_packet_length(struct sock *sk) | |||
862 | udp_lib_checksum_complete(skb)) { | 1054 | udp_lib_checksum_complete(skb)) { |
863 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, | 1055 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, |
864 | IS_UDPLITE(sk)); | 1056 | IS_UDPLITE(sk)); |
1057 | atomic_inc(&sk->sk_drops); | ||
865 | __skb_unlink(skb, rcvq); | 1058 | __skb_unlink(skb, rcvq); |
866 | __skb_queue_tail(&list_kill, skb); | 1059 | __skb_queue_tail(&list_kill, skb); |
867 | } | 1060 | } |
@@ -925,7 +1118,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
925 | struct inet_sock *inet = inet_sk(sk); | 1118 | struct inet_sock *inet = inet_sk(sk); |
926 | struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; | 1119 | struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; |
927 | struct sk_buff *skb; | 1120 | struct sk_buff *skb; |
928 | unsigned int ulen, copied; | 1121 | unsigned int ulen; |
929 | int peeked; | 1122 | int peeked; |
930 | int err; | 1123 | int err; |
931 | int is_udplite = IS_UDPLITE(sk); | 1124 | int is_udplite = IS_UDPLITE(sk); |
@@ -946,10 +1139,9 @@ try_again: | |||
946 | goto out; | 1139 | goto out; |
947 | 1140 | ||
948 | ulen = skb->len - sizeof(struct udphdr); | 1141 | ulen = skb->len - sizeof(struct udphdr); |
949 | copied = len; | 1142 | if (len > ulen) |
950 | if (copied > ulen) | 1143 | len = ulen; |
951 | copied = ulen; | 1144 | else if (len < ulen) |
952 | else if (copied < ulen) | ||
953 | msg->msg_flags |= MSG_TRUNC; | 1145 | msg->msg_flags |= MSG_TRUNC; |
954 | 1146 | ||
955 | /* | 1147 | /* |
@@ -958,14 +1150,14 @@ try_again: | |||
958 | * coverage checksum (UDP-Lite), do it before the copy. | 1150 | * coverage checksum (UDP-Lite), do it before the copy. |
959 | */ | 1151 | */ |
960 | 1152 | ||
961 | if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { | 1153 | if (len < ulen || UDP_SKB_CB(skb)->partial_cov) { |
962 | if (udp_lib_checksum_complete(skb)) | 1154 | if (udp_lib_checksum_complete(skb)) |
963 | goto csum_copy_err; | 1155 | goto csum_copy_err; |
964 | } | 1156 | } |
965 | 1157 | ||
966 | if (skb_csum_unnecessary(skb)) | 1158 | if (skb_csum_unnecessary(skb)) |
967 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), | 1159 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), |
968 | msg->msg_iov, copied); | 1160 | msg->msg_iov, len); |
969 | else { | 1161 | else { |
970 | err = skb_copy_and_csum_datagram_iovec(skb, | 1162 | err = skb_copy_and_csum_datagram_iovec(skb, |
971 | sizeof(struct udphdr), | 1163 | sizeof(struct udphdr), |
@@ -982,7 +1174,7 @@ try_again: | |||
982 | UDP_INC_STATS_USER(sock_net(sk), | 1174 | UDP_INC_STATS_USER(sock_net(sk), |
983 | UDP_MIB_INDATAGRAMS, is_udplite); | 1175 | UDP_MIB_INDATAGRAMS, is_udplite); |
984 | 1176 | ||
985 | sock_recv_timestamp(msg, sk, skb); | 1177 | sock_recv_ts_and_drops(msg, sk, skb); |
986 | 1178 | ||
987 | /* Copy the address. */ | 1179 | /* Copy the address. */ |
988 | if (sin) { | 1180 | if (sin) { |
@@ -994,7 +1186,7 @@ try_again: | |||
994 | if (inet->cmsg_flags) | 1186 | if (inet->cmsg_flags) |
995 | ip_cmsg_recv(msg, skb); | 1187 | ip_cmsg_recv(msg, skb); |
996 | 1188 | ||
997 | err = copied; | 1189 | err = len; |
998 | if (flags & MSG_TRUNC) | 1190 | if (flags & MSG_TRUNC) |
999 | err = ulen; | 1191 | err = ulen; |
1000 | 1192 | ||
@@ -1023,15 +1215,15 @@ int udp_disconnect(struct sock *sk, int flags) | |||
1023 | */ | 1215 | */ |
1024 | 1216 | ||
1025 | sk->sk_state = TCP_CLOSE; | 1217 | sk->sk_state = TCP_CLOSE; |
1026 | inet->daddr = 0; | 1218 | inet->inet_daddr = 0; |
1027 | inet->dport = 0; | 1219 | inet->inet_dport = 0; |
1028 | sk->sk_bound_dev_if = 0; | 1220 | sk->sk_bound_dev_if = 0; |
1029 | if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) | 1221 | if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) |
1030 | inet_reset_saddr(sk); | 1222 | inet_reset_saddr(sk); |
1031 | 1223 | ||
1032 | if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) { | 1224 | if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) { |
1033 | sk->sk_prot->unhash(sk); | 1225 | sk->sk_prot->unhash(sk); |
1034 | inet->sport = 0; | 1226 | inet->inet_sport = 0; |
1035 | } | 1227 | } |
1036 | sk_dst_reset(sk); | 1228 | sk_dst_reset(sk); |
1037 | return 0; | 1229 | return 0; |
@@ -1042,13 +1234,22 @@ void udp_lib_unhash(struct sock *sk) | |||
1042 | { | 1234 | { |
1043 | if (sk_hashed(sk)) { | 1235 | if (sk_hashed(sk)) { |
1044 | struct udp_table *udptable = sk->sk_prot->h.udp_table; | 1236 | struct udp_table *udptable = sk->sk_prot->h.udp_table; |
1045 | unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash); | 1237 | struct udp_hslot *hslot, *hslot2; |
1046 | struct udp_hslot *hslot = &udptable->hash[hash]; | 1238 | |
1239 | hslot = udp_hashslot(udptable, sock_net(sk), | ||
1240 | udp_sk(sk)->udp_port_hash); | ||
1241 | hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); | ||
1047 | 1242 | ||
1048 | spin_lock_bh(&hslot->lock); | 1243 | spin_lock_bh(&hslot->lock); |
1049 | if (sk_nulls_del_node_init_rcu(sk)) { | 1244 | if (sk_nulls_del_node_init_rcu(sk)) { |
1050 | inet_sk(sk)->num = 0; | 1245 | hslot->count--; |
1246 | inet_sk(sk)->inet_num = 0; | ||
1051 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | 1247 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
1248 | |||
1249 | spin_lock(&hslot2->lock); | ||
1250 | hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); | ||
1251 | hslot2->count--; | ||
1252 | spin_unlock(&hslot2->lock); | ||
1052 | } | 1253 | } |
1053 | spin_unlock_bh(&hslot->lock); | 1254 | spin_unlock_bh(&hslot->lock); |
1054 | } | 1255 | } |
@@ -1057,25 +1258,22 @@ EXPORT_SYMBOL(udp_lib_unhash); | |||
1057 | 1258 | ||
1058 | static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | 1259 | static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) |
1059 | { | 1260 | { |
1060 | int is_udplite = IS_UDPLITE(sk); | 1261 | int rc = sock_queue_rcv_skb(sk, skb); |
1061 | int rc; | 1262 | |
1263 | if (rc < 0) { | ||
1264 | int is_udplite = IS_UDPLITE(sk); | ||
1062 | 1265 | ||
1063 | if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) { | ||
1064 | /* Note that an ENOMEM error is charged twice */ | 1266 | /* Note that an ENOMEM error is charged twice */ |
1065 | if (rc == -ENOMEM) { | 1267 | if (rc == -ENOMEM) |
1066 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, | 1268 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, |
1067 | is_udplite); | 1269 | is_udplite); |
1068 | atomic_inc(&sk->sk_drops); | 1270 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); |
1069 | } | 1271 | kfree_skb(skb); |
1070 | goto drop; | 1272 | return -1; |
1071 | } | 1273 | } |
1072 | 1274 | ||
1073 | return 0; | 1275 | return 0; |
1074 | 1276 | ||
1075 | drop: | ||
1076 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); | ||
1077 | kfree_skb(skb); | ||
1078 | return -1; | ||
1079 | } | 1277 | } |
1080 | 1278 | ||
1081 | /* returns: | 1279 | /* returns: |
@@ -1174,61 +1372,98 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
1174 | bh_lock_sock(sk); | 1372 | bh_lock_sock(sk); |
1175 | if (!sock_owned_by_user(sk)) | 1373 | if (!sock_owned_by_user(sk)) |
1176 | rc = __udp_queue_rcv_skb(sk, skb); | 1374 | rc = __udp_queue_rcv_skb(sk, skb); |
1177 | else | 1375 | else if (sk_add_backlog(sk, skb)) { |
1178 | sk_add_backlog(sk, skb); | 1376 | bh_unlock_sock(sk); |
1377 | goto drop; | ||
1378 | } | ||
1179 | bh_unlock_sock(sk); | 1379 | bh_unlock_sock(sk); |
1180 | 1380 | ||
1181 | return rc; | 1381 | return rc; |
1182 | 1382 | ||
1183 | drop: | 1383 | drop: |
1184 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); | 1384 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); |
1385 | atomic_inc(&sk->sk_drops); | ||
1185 | kfree_skb(skb); | 1386 | kfree_skb(skb); |
1186 | return -1; | 1387 | return -1; |
1187 | } | 1388 | } |
1188 | 1389 | ||
1390 | |||
1391 | static void flush_stack(struct sock **stack, unsigned int count, | ||
1392 | struct sk_buff *skb, unsigned int final) | ||
1393 | { | ||
1394 | unsigned int i; | ||
1395 | struct sk_buff *skb1 = NULL; | ||
1396 | struct sock *sk; | ||
1397 | |||
1398 | for (i = 0; i < count; i++) { | ||
1399 | sk = stack[i]; | ||
1400 | if (likely(skb1 == NULL)) | ||
1401 | skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); | ||
1402 | |||
1403 | if (!skb1) { | ||
1404 | atomic_inc(&sk->sk_drops); | ||
1405 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, | ||
1406 | IS_UDPLITE(sk)); | ||
1407 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, | ||
1408 | IS_UDPLITE(sk)); | ||
1409 | } | ||
1410 | |||
1411 | if (skb1 && udp_queue_rcv_skb(sk, skb1) <= 0) | ||
1412 | skb1 = NULL; | ||
1413 | } | ||
1414 | if (unlikely(skb1)) | ||
1415 | kfree_skb(skb1); | ||
1416 | } | ||
1417 | |||
1189 | /* | 1418 | /* |
1190 | * Multicasts and broadcasts go to each listener. | 1419 | * Multicasts and broadcasts go to each listener. |
1191 | * | 1420 | * |
1192 | * Note: called only from the BH handler context, | 1421 | * Note: called only from the BH handler context. |
1193 | * so we don't need to lock the hashes. | ||
1194 | */ | 1422 | */ |
1195 | static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, | 1423 | static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, |
1196 | struct udphdr *uh, | 1424 | struct udphdr *uh, |
1197 | __be32 saddr, __be32 daddr, | 1425 | __be32 saddr, __be32 daddr, |
1198 | struct udp_table *udptable) | 1426 | struct udp_table *udptable) |
1199 | { | 1427 | { |
1200 | struct sock *sk; | 1428 | struct sock *sk, *stack[256 / sizeof(struct sock *)]; |
1201 | struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; | 1429 | struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); |
1202 | int dif; | 1430 | int dif; |
1431 | unsigned int i, count = 0; | ||
1203 | 1432 | ||
1204 | spin_lock(&hslot->lock); | 1433 | spin_lock(&hslot->lock); |
1205 | sk = sk_nulls_head(&hslot->head); | 1434 | sk = sk_nulls_head(&hslot->head); |
1206 | dif = skb->dev->ifindex; | 1435 | dif = skb->dev->ifindex; |
1207 | sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); | 1436 | sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); |
1208 | if (sk) { | 1437 | while (sk) { |
1209 | struct sock *sknext = NULL; | 1438 | stack[count++] = sk; |
1439 | sk = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, | ||
1440 | daddr, uh->source, saddr, dif); | ||
1441 | if (unlikely(count == ARRAY_SIZE(stack))) { | ||
1442 | if (!sk) | ||
1443 | break; | ||
1444 | flush_stack(stack, count, skb, ~0); | ||
1445 | count = 0; | ||
1446 | } | ||
1447 | } | ||
1448 | /* | ||
1449 | * before releasing chain lock, we must take a reference on sockets | ||
1450 | */ | ||
1451 | for (i = 0; i < count; i++) | ||
1452 | sock_hold(stack[i]); | ||
1210 | 1453 | ||
1211 | do { | ||
1212 | struct sk_buff *skb1 = skb; | ||
1213 | |||
1214 | sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, | ||
1215 | daddr, uh->source, saddr, | ||
1216 | dif); | ||
1217 | if (sknext) | ||
1218 | skb1 = skb_clone(skb, GFP_ATOMIC); | ||
1219 | |||
1220 | if (skb1) { | ||
1221 | int ret = udp_queue_rcv_skb(sk, skb1); | ||
1222 | if (ret > 0) | ||
1223 | /* we should probably re-process instead | ||
1224 | * of dropping packets here. */ | ||
1225 | kfree_skb(skb1); | ||
1226 | } | ||
1227 | sk = sknext; | ||
1228 | } while (sknext); | ||
1229 | } else | ||
1230 | consume_skb(skb); | ||
1231 | spin_unlock(&hslot->lock); | 1454 | spin_unlock(&hslot->lock); |
1455 | |||
1456 | /* | ||
1457 | * do the slow work with no lock held | ||
1458 | */ | ||
1459 | if (count) { | ||
1460 | flush_stack(stack, count, skb, count - 1); | ||
1461 | |||
1462 | for (i = 0; i < count; i++) | ||
1463 | sock_put(stack[i]); | ||
1464 | } else { | ||
1465 | kfree_skb(skb); | ||
1466 | } | ||
1232 | return 0; | 1467 | return 0; |
1233 | } | 1468 | } |
1234 | 1469 | ||
@@ -1292,6 +1527,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, | |||
1292 | 1527 | ||
1293 | uh = udp_hdr(skb); | 1528 | uh = udp_hdr(skb); |
1294 | ulen = ntohs(uh->len); | 1529 | ulen = ntohs(uh->len); |
1530 | saddr = ip_hdr(skb)->saddr; | ||
1531 | daddr = ip_hdr(skb)->daddr; | ||
1532 | |||
1295 | if (ulen > skb->len) | 1533 | if (ulen > skb->len) |
1296 | goto short_packet; | 1534 | goto short_packet; |
1297 | 1535 | ||
@@ -1305,9 +1543,6 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, | |||
1305 | if (udp4_csum_init(skb, uh, proto)) | 1543 | if (udp4_csum_init(skb, uh, proto)) |
1306 | goto csum_error; | 1544 | goto csum_error; |
1307 | 1545 | ||
1308 | saddr = ip_hdr(skb)->saddr; | ||
1309 | daddr = ip_hdr(skb)->daddr; | ||
1310 | |||
1311 | if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) | 1546 | if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) |
1312 | return __udp4_lib_mcast_deliver(net, skb, uh, | 1547 | return __udp4_lib_mcast_deliver(net, skb, uh, |
1313 | saddr, daddr, udptable); | 1548 | saddr, daddr, udptable); |
@@ -1620,9 +1855,14 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) | |||
1620 | struct udp_iter_state *state = seq->private; | 1855 | struct udp_iter_state *state = seq->private; |
1621 | struct net *net = seq_file_net(seq); | 1856 | struct net *net = seq_file_net(seq); |
1622 | 1857 | ||
1623 | for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { | 1858 | for (state->bucket = start; state->bucket <= state->udp_table->mask; |
1859 | ++state->bucket) { | ||
1624 | struct hlist_nulls_node *node; | 1860 | struct hlist_nulls_node *node; |
1625 | struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; | 1861 | struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; |
1862 | |||
1863 | if (hlist_nulls_empty(&hslot->head)) | ||
1864 | continue; | ||
1865 | |||
1626 | spin_lock_bh(&hslot->lock); | 1866 | spin_lock_bh(&hslot->lock); |
1627 | sk_nulls_for_each(sk, node, &hslot->head) { | 1867 | sk_nulls_for_each(sk, node, &hslot->head) { |
1628 | if (!net_eq(sock_net(sk), net)) | 1868 | if (!net_eq(sock_net(sk), net)) |
@@ -1647,7 +1887,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) | |||
1647 | } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); | 1887 | } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); |
1648 | 1888 | ||
1649 | if (!sk) { | 1889 | if (!sk) { |
1650 | if (state->bucket < UDP_HTABLE_SIZE) | 1890 | if (state->bucket <= state->udp_table->mask) |
1651 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); | 1891 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); |
1652 | return udp_get_first(seq, state->bucket + 1); | 1892 | return udp_get_first(seq, state->bucket + 1); |
1653 | } | 1893 | } |
@@ -1667,7 +1907,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) | |||
1667 | static void *udp_seq_start(struct seq_file *seq, loff_t *pos) | 1907 | static void *udp_seq_start(struct seq_file *seq, loff_t *pos) |
1668 | { | 1908 | { |
1669 | struct udp_iter_state *state = seq->private; | 1909 | struct udp_iter_state *state = seq->private; |
1670 | state->bucket = UDP_HTABLE_SIZE; | 1910 | state->bucket = MAX_UDP_PORTS; |
1671 | 1911 | ||
1672 | return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; | 1912 | return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; |
1673 | } | 1913 | } |
@@ -1689,7 +1929,7 @@ static void udp_seq_stop(struct seq_file *seq, void *v) | |||
1689 | { | 1929 | { |
1690 | struct udp_iter_state *state = seq->private; | 1930 | struct udp_iter_state *state = seq->private; |
1691 | 1931 | ||
1692 | if (state->bucket < UDP_HTABLE_SIZE) | 1932 | if (state->bucket <= state->udp_table->mask) |
1693 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); | 1933 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); |
1694 | } | 1934 | } |
1695 | 1935 | ||
@@ -1744,12 +1984,12 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, | |||
1744 | int bucket, int *len) | 1984 | int bucket, int *len) |
1745 | { | 1985 | { |
1746 | struct inet_sock *inet = inet_sk(sp); | 1986 | struct inet_sock *inet = inet_sk(sp); |
1747 | __be32 dest = inet->daddr; | 1987 | __be32 dest = inet->inet_daddr; |
1748 | __be32 src = inet->rcv_saddr; | 1988 | __be32 src = inet->inet_rcv_saddr; |
1749 | __u16 destp = ntohs(inet->dport); | 1989 | __u16 destp = ntohs(inet->inet_dport); |
1750 | __u16 srcp = ntohs(inet->sport); | 1990 | __u16 srcp = ntohs(inet->inet_sport); |
1751 | 1991 | ||
1752 | seq_printf(f, "%4d: %08X:%04X %08X:%04X" | 1992 | seq_printf(f, "%5d: %08X:%04X %08X:%04X" |
1753 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", | 1993 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", |
1754 | bucket, src, srcp, dest, destp, sp->sk_state, | 1994 | bucket, src, srcp, dest, destp, sp->sk_state, |
1755 | sk_wmem_alloc_get(sp), | 1995 | sk_wmem_alloc_get(sp), |
@@ -1789,12 +2029,12 @@ static struct udp_seq_afinfo udp4_seq_afinfo = { | |||
1789 | }, | 2029 | }, |
1790 | }; | 2030 | }; |
1791 | 2031 | ||
1792 | static int udp4_proc_init_net(struct net *net) | 2032 | static int __net_init udp4_proc_init_net(struct net *net) |
1793 | { | 2033 | { |
1794 | return udp_proc_register(net, &udp4_seq_afinfo); | 2034 | return udp_proc_register(net, &udp4_seq_afinfo); |
1795 | } | 2035 | } |
1796 | 2036 | ||
1797 | static void udp4_proc_exit_net(struct net *net) | 2037 | static void __net_exit udp4_proc_exit_net(struct net *net) |
1798 | { | 2038 | { |
1799 | udp_proc_unregister(net, &udp4_seq_afinfo); | 2039 | udp_proc_unregister(net, &udp4_seq_afinfo); |
1800 | } | 2040 | } |
@@ -1815,21 +2055,60 @@ void udp4_proc_exit(void) | |||
1815 | } | 2055 | } |
1816 | #endif /* CONFIG_PROC_FS */ | 2056 | #endif /* CONFIG_PROC_FS */ |
1817 | 2057 | ||
1818 | void __init udp_table_init(struct udp_table *table) | 2058 | static __initdata unsigned long uhash_entries; |
2059 | static int __init set_uhash_entries(char *str) | ||
1819 | { | 2060 | { |
1820 | int i; | 2061 | if (!str) |
2062 | return 0; | ||
2063 | uhash_entries = simple_strtoul(str, &str, 0); | ||
2064 | if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN) | ||
2065 | uhash_entries = UDP_HTABLE_SIZE_MIN; | ||
2066 | return 1; | ||
2067 | } | ||
2068 | __setup("uhash_entries=", set_uhash_entries); | ||
1821 | 2069 | ||
1822 | for (i = 0; i < UDP_HTABLE_SIZE; i++) { | 2070 | void __init udp_table_init(struct udp_table *table, const char *name) |
2071 | { | ||
2072 | unsigned int i; | ||
2073 | |||
2074 | if (!CONFIG_BASE_SMALL) | ||
2075 | table->hash = alloc_large_system_hash(name, | ||
2076 | 2 * sizeof(struct udp_hslot), | ||
2077 | uhash_entries, | ||
2078 | 21, /* one slot per 2 MB */ | ||
2079 | 0, | ||
2080 | &table->log, | ||
2081 | &table->mask, | ||
2082 | 64 * 1024); | ||
2083 | /* | ||
2084 | * Make sure hash table has the minimum size | ||
2085 | */ | ||
2086 | if (CONFIG_BASE_SMALL || table->mask < UDP_HTABLE_SIZE_MIN - 1) { | ||
2087 | table->hash = kmalloc(UDP_HTABLE_SIZE_MIN * | ||
2088 | 2 * sizeof(struct udp_hslot), GFP_KERNEL); | ||
2089 | if (!table->hash) | ||
2090 | panic(name); | ||
2091 | table->log = ilog2(UDP_HTABLE_SIZE_MIN); | ||
2092 | table->mask = UDP_HTABLE_SIZE_MIN - 1; | ||
2093 | } | ||
2094 | table->hash2 = table->hash + (table->mask + 1); | ||
2095 | for (i = 0; i <= table->mask; i++) { | ||
1823 | INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); | 2096 | INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); |
2097 | table->hash[i].count = 0; | ||
1824 | spin_lock_init(&table->hash[i].lock); | 2098 | spin_lock_init(&table->hash[i].lock); |
1825 | } | 2099 | } |
2100 | for (i = 0; i <= table->mask; i++) { | ||
2101 | INIT_HLIST_NULLS_HEAD(&table->hash2[i].head, i); | ||
2102 | table->hash2[i].count = 0; | ||
2103 | spin_lock_init(&table->hash2[i].lock); | ||
2104 | } | ||
1826 | } | 2105 | } |
1827 | 2106 | ||
1828 | void __init udp_init(void) | 2107 | void __init udp_init(void) |
1829 | { | 2108 | { |
1830 | unsigned long nr_pages, limit; | 2109 | unsigned long nr_pages, limit; |
1831 | 2110 | ||
1832 | udp_table_init(&udp_table); | 2111 | udp_table_init(&udp_table, "UDP"); |
1833 | /* Set the pressure threshold up by the same strategy of TCP. It is a | 2112 | /* Set the pressure threshold up by the same strategy of TCP. It is a |
1834 | * fraction of global memory that is up to 1/2 at 256 MB, decreasing | 2113 | * fraction of global memory that is up to 1/2 at 256 MB, decreasing |
1835 | * toward zero with the amount of memory, with a floor of 128 pages. | 2114 | * toward zero with the amount of memory, with a floor of 128 pages. |
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 95248d7f75ec..6610bf76369f 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c | |||
@@ -12,7 +12,7 @@ | |||
12 | */ | 12 | */ |
13 | #include "udp_impl.h" | 13 | #include "udp_impl.h" |
14 | 14 | ||
15 | struct udp_table udplite_table; | 15 | struct udp_table udplite_table __read_mostly; |
16 | EXPORT_SYMBOL(udplite_table); | 16 | EXPORT_SYMBOL(udplite_table); |
17 | 17 | ||
18 | static int udplite_rcv(struct sk_buff *skb) | 18 | static int udplite_rcv(struct sk_buff *skb) |
@@ -64,7 +64,6 @@ static struct inet_protosw udplite4_protosw = { | |||
64 | .protocol = IPPROTO_UDPLITE, | 64 | .protocol = IPPROTO_UDPLITE, |
65 | .prot = &udplite_prot, | 65 | .prot = &udplite_prot, |
66 | .ops = &inet_dgram_ops, | 66 | .ops = &inet_dgram_ops, |
67 | .capability = -1, | ||
68 | .no_check = 0, /* must checksum (RFC 3828) */ | 67 | .no_check = 0, /* must checksum (RFC 3828) */ |
69 | .flags = INET_PROTOSW_PERMANENT, | 68 | .flags = INET_PROTOSW_PERMANENT, |
70 | }; | 69 | }; |
@@ -82,12 +81,12 @@ static struct udp_seq_afinfo udplite4_seq_afinfo = { | |||
82 | }, | 81 | }, |
83 | }; | 82 | }; |
84 | 83 | ||
85 | static int udplite4_proc_init_net(struct net *net) | 84 | static int __net_init udplite4_proc_init_net(struct net *net) |
86 | { | 85 | { |
87 | return udp_proc_register(net, &udplite4_seq_afinfo); | 86 | return udp_proc_register(net, &udplite4_seq_afinfo); |
88 | } | 87 | } |
89 | 88 | ||
90 | static void udplite4_proc_exit_net(struct net *net) | 89 | static void __net_exit udplite4_proc_exit_net(struct net *net) |
91 | { | 90 | { |
92 | udp_proc_unregister(net, &udplite4_seq_afinfo); | 91 | udp_proc_unregister(net, &udplite4_seq_afinfo); |
93 | } | 92 | } |
@@ -110,7 +109,7 @@ static inline int udplite4_proc_init(void) | |||
110 | 109 | ||
111 | void __init udplite4_register(void) | 110 | void __init udplite4_register(void) |
112 | { | 111 | { |
113 | udp_table_init(&udplite_table); | 112 | udp_table_init(&udplite_table, "UDP-Lite"); |
114 | if (proto_register(&udplite_prot, 1)) | 113 | if (proto_register(&udplite_prot, 1)) |
115 | goto out_register_err; | 114 | goto out_register_err; |
116 | 115 | ||
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index f9f922a0ba88..c791bb63203f 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c | |||
@@ -9,6 +9,7 @@ | |||
9 | * | 9 | * |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/slab.h> | ||
12 | #include <linux/module.h> | 13 | #include <linux/module.h> |
13 | #include <linux/string.h> | 14 | #include <linux/string.h> |
14 | #include <linux/netfilter.h> | 15 | #include <linux/netfilter.h> |
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 3444f3b34eca..6f368413eb0e 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c | |||
@@ -4,6 +4,7 @@ | |||
4 | * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au> | 4 | * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au> |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/gfp.h> | ||
7 | #include <linux/init.h> | 8 | #include <linux/init.h> |
8 | #include <linux/kernel.h> | 9 | #include <linux/kernel.h> |
9 | #include <linux/module.h> | 10 | #include <linux/module.h> |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 74fb2eb833ec..e4a1483fba77 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -15,7 +15,6 @@ | |||
15 | #include <net/xfrm.h> | 15 | #include <net/xfrm.h> |
16 | #include <net/ip.h> | 16 | #include <net/ip.h> |
17 | 17 | ||
18 | static struct dst_ops xfrm4_dst_ops; | ||
19 | static struct xfrm_policy_afinfo xfrm4_policy_afinfo; | 18 | static struct xfrm_policy_afinfo xfrm4_policy_afinfo; |
20 | 19 | ||
21 | static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, | 20 | static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, |
@@ -92,11 +91,12 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, | |||
92 | return 0; | 91 | return 0; |
93 | } | 92 | } |
94 | 93 | ||
95 | static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) | 94 | static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, |
95 | struct flowi *fl) | ||
96 | { | 96 | { |
97 | struct rtable *rt = (struct rtable *)xdst->route; | 97 | struct rtable *rt = (struct rtable *)xdst->route; |
98 | 98 | ||
99 | xdst->u.rt.fl = rt->fl; | 99 | xdst->u.rt.fl = *fl; |
100 | 100 | ||
101 | xdst->u.dst.dev = dev; | 101 | xdst->u.dst.dev = dev; |
102 | dev_hold(dev); | 102 | dev_hold(dev); |
@@ -190,8 +190,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
190 | 190 | ||
191 | static inline int xfrm4_garbage_collect(struct dst_ops *ops) | 191 | static inline int xfrm4_garbage_collect(struct dst_ops *ops) |
192 | { | 192 | { |
193 | xfrm4_policy_afinfo.garbage_collect(&init_net); | 193 | struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); |
194 | return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2); | 194 | |
195 | xfrm4_policy_afinfo.garbage_collect(net); | ||
196 | return (atomic_read(&ops->entries) > ops->gc_thresh * 2); | ||
195 | } | 197 | } |
196 | 198 | ||
197 | static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) | 199 | static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) |
@@ -267,9 +269,8 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { | |||
267 | #ifdef CONFIG_SYSCTL | 269 | #ifdef CONFIG_SYSCTL |
268 | static struct ctl_table xfrm4_policy_table[] = { | 270 | static struct ctl_table xfrm4_policy_table[] = { |
269 | { | 271 | { |
270 | .ctl_name = CTL_UNNUMBERED, | ||
271 | .procname = "xfrm4_gc_thresh", | 272 | .procname = "xfrm4_gc_thresh", |
272 | .data = &xfrm4_dst_ops.gc_thresh, | 273 | .data = &init_net.xfrm.xfrm4_dst_ops.gc_thresh, |
273 | .maxlen = sizeof(int), | 274 | .maxlen = sizeof(int), |
274 | .mode = 0644, | 275 | .mode = 0644, |
275 | .proc_handler = proc_dointvec, | 276 | .proc_handler = proc_dointvec, |
@@ -296,8 +297,6 @@ static void __exit xfrm4_policy_fini(void) | |||
296 | 297 | ||
297 | void __init xfrm4_init(int rt_max_size) | 298 | void __init xfrm4_init(int rt_max_size) |
298 | { | 299 | { |
299 | xfrm4_state_init(); | ||
300 | xfrm4_policy_init(); | ||
301 | /* | 300 | /* |
302 | * Select a default value for the gc_thresh based on the main route | 301 | * Select a default value for the gc_thresh based on the main route |
303 | * table hash size. It seems to me the worst case scenario is when | 302 | * table hash size. It seems to me the worst case scenario is when |
@@ -309,6 +308,9 @@ void __init xfrm4_init(int rt_max_size) | |||
309 | * and start cleaning when were 1/2 full | 308 | * and start cleaning when were 1/2 full |
310 | */ | 309 | */ |
311 | xfrm4_dst_ops.gc_thresh = rt_max_size/2; | 310 | xfrm4_dst_ops.gc_thresh = rt_max_size/2; |
311 | |||
312 | xfrm4_state_init(); | ||
313 | xfrm4_policy_init(); | ||
312 | #ifdef CONFIG_SYSCTL | 314 | #ifdef CONFIG_SYSCTL |
313 | sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path, | 315 | sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path, |
314 | xfrm4_policy_table); | 316 | xfrm4_policy_table); |