diff options
Diffstat (limited to 'net/ipv4')
99 files changed, 4213 insertions, 3533 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 0c94a1ac2946..7cd7760144f7 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
| @@ -46,7 +46,7 @@ config IP_ADVANCED_ROUTER | |||
| 46 | rp_filter on use: | 46 | rp_filter on use: |
| 47 | 47 | ||
| 48 | echo 1 > /proc/sys/net/ipv4/conf/<device>/rp_filter | 48 | echo 1 > /proc/sys/net/ipv4/conf/<device>/rp_filter |
| 49 | and | 49 | or |
| 50 | echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter | 50 | echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter |
| 51 | 51 | ||
| 52 | Note that some distributions enable it in startup scripts. | 52 | Note that some distributions enable it in startup scripts. |
| @@ -217,6 +217,7 @@ config NET_IPIP | |||
| 217 | 217 | ||
| 218 | config NET_IPGRE | 218 | config NET_IPGRE |
| 219 | tristate "IP: GRE tunnels over IP" | 219 | tristate "IP: GRE tunnels over IP" |
| 220 | depends on IPV6 || IPV6=n | ||
| 220 | help | 221 | help |
| 221 | Tunneling means encapsulating data of one protocol type within | 222 | Tunneling means encapsulating data of one protocol type within |
| 222 | another protocol and sending it over a channel that understands the | 223 | another protocol and sending it over a channel that understands the |
| @@ -250,6 +251,20 @@ config IP_MROUTE | |||
| 250 | <file:Documentation/networking/multicast.txt>. If you haven't heard | 251 | <file:Documentation/networking/multicast.txt>. If you haven't heard |
| 251 | about it, you don't need it. | 252 | about it, you don't need it. |
| 252 | 253 | ||
| 254 | config IP_MROUTE_MULTIPLE_TABLES | ||
| 255 | bool "IP: multicast policy routing" | ||
| 256 | depends on IP_MROUTE && IP_ADVANCED_ROUTER | ||
| 257 | select FIB_RULES | ||
| 258 | help | ||
| 259 | Normally, a multicast router runs a userspace daemon and decides | ||
| 260 | what to do with a multicast packet based on the source and | ||
| 261 | destination addresses. If you say Y here, the multicast router | ||
| 262 | will also be able to take interfaces and packet marks into | ||
| 263 | account and run multiple instances of userspace daemons | ||
| 264 | simultaneously, each one handling a single table. | ||
| 265 | |||
| 266 | If unsure, say N. | ||
| 267 | |||
| 253 | config IP_PIMSM_V1 | 268 | config IP_PIMSM_V1 |
| 254 | bool "IP: PIM-SM version 1 support" | 269 | bool "IP: PIM-SM version 1 support" |
| 255 | depends on IP_MROUTE | 270 | depends on IP_MROUTE |
| @@ -289,7 +304,7 @@ config ARPD | |||
| 289 | If unsure, say N. | 304 | If unsure, say N. |
| 290 | 305 | ||
| 291 | config SYN_COOKIES | 306 | config SYN_COOKIES |
| 292 | bool "IP: TCP syncookie support (disabled per default)" | 307 | bool "IP: TCP syncookie support" |
| 293 | ---help--- | 308 | ---help--- |
| 294 | Normal TCP/IP networking is open to an attack known as "SYN | 309 | Normal TCP/IP networking is open to an attack known as "SYN |
| 295 | flooding". This denial-of-service attack prevents legitimate remote | 310 | flooding". This denial-of-service attack prevents legitimate remote |
| @@ -314,13 +329,13 @@ config SYN_COOKIES | |||
| 314 | server is really overloaded. If this happens frequently better turn | 329 | server is really overloaded. If this happens frequently better turn |
| 315 | them off. | 330 | them off. |
| 316 | 331 | ||
| 317 | If you say Y here, note that SYN cookies aren't enabled by default; | 332 | If you say Y here, you can disable SYN cookies at run time by |
| 318 | you can enable them by saying Y to "/proc file system support" and | 333 | saying Y to "/proc file system support" and |
| 319 | "Sysctl support" below and executing the command | 334 | "Sysctl support" below and executing the command |
| 320 | 335 | ||
| 321 | echo 1 >/proc/sys/net/ipv4/tcp_syncookies | 336 | echo 0 > /proc/sys/net/ipv4/tcp_syncookies |
| 322 | 337 | ||
| 323 | at boot time after the /proc file system has been mounted. | 338 | after the /proc file system has been mounted. |
| 324 | 339 | ||
| 325 | If unsure, say N. | 340 | If unsure, say N. |
| 326 | 341 | ||
| @@ -398,7 +413,7 @@ config INET_XFRM_MODE_BEET | |||
| 398 | If unsure, say Y. | 413 | If unsure, say Y. |
| 399 | 414 | ||
| 400 | config INET_LRO | 415 | config INET_LRO |
| 401 | bool "Large Receive Offload (ipv4/tcp)" | 416 | tristate "Large Receive Offload (ipv4/tcp)" |
| 402 | default y | 417 | default y |
| 403 | ---help--- | 418 | ---help--- |
| 404 | Support for Large Receive Offload (ipv4/tcp). | 419 | Support for Large Receive Offload (ipv4/tcp). |
| @@ -587,9 +602,15 @@ choice | |||
| 587 | config DEFAULT_HTCP | 602 | config DEFAULT_HTCP |
| 588 | bool "Htcp" if TCP_CONG_HTCP=y | 603 | bool "Htcp" if TCP_CONG_HTCP=y |
| 589 | 604 | ||
| 605 | config DEFAULT_HYBLA | ||
| 606 | bool "Hybla" if TCP_CONG_HYBLA=y | ||
| 607 | |||
| 590 | config DEFAULT_VEGAS | 608 | config DEFAULT_VEGAS |
| 591 | bool "Vegas" if TCP_CONG_VEGAS=y | 609 | bool "Vegas" if TCP_CONG_VEGAS=y |
| 592 | 610 | ||
| 611 | config DEFAULT_VENO | ||
| 612 | bool "Veno" if TCP_CONG_VENO=y | ||
| 613 | |||
| 593 | config DEFAULT_WESTWOOD | 614 | config DEFAULT_WESTWOOD |
| 594 | bool "Westwood" if TCP_CONG_WESTWOOD=y | 615 | bool "Westwood" if TCP_CONG_WESTWOOD=y |
| 595 | 616 | ||
| @@ -610,8 +631,10 @@ config DEFAULT_TCP_CONG | |||
| 610 | default "bic" if DEFAULT_BIC | 631 | default "bic" if DEFAULT_BIC |
| 611 | default "cubic" if DEFAULT_CUBIC | 632 | default "cubic" if DEFAULT_CUBIC |
| 612 | default "htcp" if DEFAULT_HTCP | 633 | default "htcp" if DEFAULT_HTCP |
| 634 | default "hybla" if DEFAULT_HYBLA | ||
| 613 | default "vegas" if DEFAULT_VEGAS | 635 | default "vegas" if DEFAULT_VEGAS |
| 614 | default "westwood" if DEFAULT_WESTWOOD | 636 | default "westwood" if DEFAULT_WESTWOOD |
| 637 | default "veno" if DEFAULT_VENO | ||
| 615 | default "reno" if DEFAULT_RENO | 638 | default "reno" if DEFAULT_RENO |
| 616 | default "cubic" | 639 | default "cubic" |
| 617 | 640 | ||
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7d12c6a9b19b..6a1100c25a9f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
| @@ -86,6 +86,7 @@ | |||
| 86 | #include <linux/poll.h> | 86 | #include <linux/poll.h> |
| 87 | #include <linux/netfilter_ipv4.h> | 87 | #include <linux/netfilter_ipv4.h> |
| 88 | #include <linux/random.h> | 88 | #include <linux/random.h> |
| 89 | #include <linux/slab.h> | ||
| 89 | 90 | ||
| 90 | #include <asm/uaccess.h> | 91 | #include <asm/uaccess.h> |
| 91 | #include <asm/system.h> | 92 | #include <asm/system.h> |
| @@ -153,7 +154,7 @@ void inet_sock_destruct(struct sock *sk) | |||
| 153 | WARN_ON(sk->sk_forward_alloc); | 154 | WARN_ON(sk->sk_forward_alloc); |
| 154 | 155 | ||
| 155 | kfree(inet->opt); | 156 | kfree(inet->opt); |
| 156 | dst_release(sk->sk_dst_cache); | 157 | dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); |
| 157 | sk_refcnt_debug_dec(sk); | 158 | sk_refcnt_debug_dec(sk); |
| 158 | } | 159 | } |
| 159 | EXPORT_SYMBOL(inet_sock_destruct); | 160 | EXPORT_SYMBOL(inet_sock_destruct); |
| @@ -354,6 +355,8 @@ lookup_protocol: | |||
| 354 | inet = inet_sk(sk); | 355 | inet = inet_sk(sk); |
| 355 | inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; | 356 | inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; |
| 356 | 357 | ||
| 358 | inet->nodefrag = 0; | ||
| 359 | |||
| 357 | if (SOCK_RAW == sock->type) { | 360 | if (SOCK_RAW == sock->type) { |
| 358 | inet->inet_num = protocol; | 361 | inet->inet_num = protocol; |
| 359 | if (IPPROTO_RAW == protocol) | 362 | if (IPPROTO_RAW == protocol) |
| @@ -418,6 +421,8 @@ int inet_release(struct socket *sock) | |||
| 418 | if (sk) { | 421 | if (sk) { |
| 419 | long timeout; | 422 | long timeout; |
| 420 | 423 | ||
| 424 | sock_rps_reset_flow(sk); | ||
| 425 | |||
| 421 | /* Applications forget to leave groups before exiting */ | 426 | /* Applications forget to leave groups before exiting */ |
| 422 | ip_mc_drop_socket(sk); | 427 | ip_mc_drop_socket(sk); |
| 423 | 428 | ||
| @@ -530,6 +535,8 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, | |||
| 530 | { | 535 | { |
| 531 | struct sock *sk = sock->sk; | 536 | struct sock *sk = sock->sk; |
| 532 | 537 | ||
| 538 | if (addr_len < sizeof(uaddr->sa_family)) | ||
| 539 | return -EINVAL; | ||
| 533 | if (uaddr->sa_family == AF_UNSPEC) | 540 | if (uaddr->sa_family == AF_UNSPEC) |
| 534 | return sk->sk_prot->disconnect(sk, flags); | 541 | return sk->sk_prot->disconnect(sk, flags); |
| 535 | 542 | ||
| @@ -543,7 +550,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo) | |||
| 543 | { | 550 | { |
| 544 | DEFINE_WAIT(wait); | 551 | DEFINE_WAIT(wait); |
| 545 | 552 | ||
| 546 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 553 | prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); |
| 547 | 554 | ||
| 548 | /* Basic assumption: if someone sets sk->sk_err, he _must_ | 555 | /* Basic assumption: if someone sets sk->sk_err, he _must_ |
| 549 | * change state of the socket from TCP_SYN_*. | 556 | * change state of the socket from TCP_SYN_*. |
| @@ -556,9 +563,9 @@ static long inet_wait_for_connect(struct sock *sk, long timeo) | |||
| 556 | lock_sock(sk); | 563 | lock_sock(sk); |
| 557 | if (signal_pending(current) || !timeo) | 564 | if (signal_pending(current) || !timeo) |
| 558 | break; | 565 | break; |
| 559 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 566 | prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); |
| 560 | } | 567 | } |
| 561 | finish_wait(sk->sk_sleep, &wait); | 568 | finish_wait(sk_sleep(sk), &wait); |
| 562 | return timeo; | 569 | return timeo; |
| 563 | } | 570 | } |
| 564 | 571 | ||
| @@ -573,6 +580,9 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, | |||
| 573 | int err; | 580 | int err; |
| 574 | long timeo; | 581 | long timeo; |
| 575 | 582 | ||
| 583 | if (addr_len < sizeof(uaddr->sa_family)) | ||
| 584 | return -EINVAL; | ||
| 585 | |||
| 576 | lock_sock(sk); | 586 | lock_sock(sk); |
| 577 | 587 | ||
| 578 | if (uaddr->sa_family == AF_UNSPEC) { | 588 | if (uaddr->sa_family == AF_UNSPEC) { |
| @@ -714,29 +724,51 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | |||
| 714 | { | 724 | { |
| 715 | struct sock *sk = sock->sk; | 725 | struct sock *sk = sock->sk; |
| 716 | 726 | ||
| 727 | sock_rps_record_flow(sk); | ||
| 728 | |||
| 717 | /* We may need to bind the socket. */ | 729 | /* We may need to bind the socket. */ |
| 718 | if (!inet_sk(sk)->inet_num && inet_autobind(sk)) | 730 | if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind && |
| 731 | inet_autobind(sk)) | ||
| 719 | return -EAGAIN; | 732 | return -EAGAIN; |
| 720 | 733 | ||
| 721 | return sk->sk_prot->sendmsg(iocb, sk, msg, size); | 734 | return sk->sk_prot->sendmsg(iocb, sk, msg, size); |
| 722 | } | 735 | } |
| 723 | EXPORT_SYMBOL(inet_sendmsg); | 736 | EXPORT_SYMBOL(inet_sendmsg); |
| 724 | 737 | ||
| 725 | 738 | ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, | |
| 726 | static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, | 739 | size_t size, int flags) |
| 727 | size_t size, int flags) | ||
| 728 | { | 740 | { |
| 729 | struct sock *sk = sock->sk; | 741 | struct sock *sk = sock->sk; |
| 730 | 742 | ||
| 743 | sock_rps_record_flow(sk); | ||
| 744 | |||
| 731 | /* We may need to bind the socket. */ | 745 | /* We may need to bind the socket. */ |
| 732 | if (!inet_sk(sk)->inet_num && inet_autobind(sk)) | 746 | if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind && |
| 747 | inet_autobind(sk)) | ||
| 733 | return -EAGAIN; | 748 | return -EAGAIN; |
| 734 | 749 | ||
| 735 | if (sk->sk_prot->sendpage) | 750 | if (sk->sk_prot->sendpage) |
| 736 | return sk->sk_prot->sendpage(sk, page, offset, size, flags); | 751 | return sk->sk_prot->sendpage(sk, page, offset, size, flags); |
| 737 | return sock_no_sendpage(sock, page, offset, size, flags); | 752 | return sock_no_sendpage(sock, page, offset, size, flags); |
| 738 | } | 753 | } |
| 754 | EXPORT_SYMBOL(inet_sendpage); | ||
| 739 | 755 | ||
| 756 | int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | ||
| 757 | size_t size, int flags) | ||
| 758 | { | ||
| 759 | struct sock *sk = sock->sk; | ||
| 760 | int addr_len = 0; | ||
| 761 | int err; | ||
| 762 | |||
| 763 | sock_rps_record_flow(sk); | ||
| 764 | |||
| 765 | err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, | ||
| 766 | flags & ~MSG_DONTWAIT, &addr_len); | ||
| 767 | if (err >= 0) | ||
| 768 | msg->msg_namelen = addr_len; | ||
| 769 | return err; | ||
| 770 | } | ||
| 771 | EXPORT_SYMBOL(inet_recvmsg); | ||
| 740 | 772 | ||
| 741 | int inet_shutdown(struct socket *sock, int how) | 773 | int inet_shutdown(struct socket *sock, int how) |
| 742 | { | 774 | { |
| @@ -865,10 +897,10 @@ const struct proto_ops inet_stream_ops = { | |||
| 865 | .shutdown = inet_shutdown, | 897 | .shutdown = inet_shutdown, |
| 866 | .setsockopt = sock_common_setsockopt, | 898 | .setsockopt = sock_common_setsockopt, |
| 867 | .getsockopt = sock_common_getsockopt, | 899 | .getsockopt = sock_common_getsockopt, |
| 868 | .sendmsg = tcp_sendmsg, | 900 | .sendmsg = inet_sendmsg, |
| 869 | .recvmsg = sock_common_recvmsg, | 901 | .recvmsg = inet_recvmsg, |
| 870 | .mmap = sock_no_mmap, | 902 | .mmap = sock_no_mmap, |
| 871 | .sendpage = tcp_sendpage, | 903 | .sendpage = inet_sendpage, |
| 872 | .splice_read = tcp_splice_read, | 904 | .splice_read = tcp_splice_read, |
| 873 | #ifdef CONFIG_COMPAT | 905 | #ifdef CONFIG_COMPAT |
| 874 | .compat_setsockopt = compat_sock_common_setsockopt, | 906 | .compat_setsockopt = compat_sock_common_setsockopt, |
| @@ -893,7 +925,7 @@ const struct proto_ops inet_dgram_ops = { | |||
| 893 | .setsockopt = sock_common_setsockopt, | 925 | .setsockopt = sock_common_setsockopt, |
| 894 | .getsockopt = sock_common_getsockopt, | 926 | .getsockopt = sock_common_getsockopt, |
| 895 | .sendmsg = inet_sendmsg, | 927 | .sendmsg = inet_sendmsg, |
| 896 | .recvmsg = sock_common_recvmsg, | 928 | .recvmsg = inet_recvmsg, |
| 897 | .mmap = sock_no_mmap, | 929 | .mmap = sock_no_mmap, |
| 898 | .sendpage = inet_sendpage, | 930 | .sendpage = inet_sendpage, |
| 899 | #ifdef CONFIG_COMPAT | 931 | #ifdef CONFIG_COMPAT |
| @@ -923,7 +955,7 @@ static const struct proto_ops inet_sockraw_ops = { | |||
| 923 | .setsockopt = sock_common_setsockopt, | 955 | .setsockopt = sock_common_setsockopt, |
| 924 | .getsockopt = sock_common_getsockopt, | 956 | .getsockopt = sock_common_getsockopt, |
| 925 | .sendmsg = inet_sendmsg, | 957 | .sendmsg = inet_sendmsg, |
| 926 | .recvmsg = sock_common_recvmsg, | 958 | .recvmsg = inet_recvmsg, |
| 927 | .mmap = sock_no_mmap, | 959 | .mmap = sock_no_mmap, |
| 928 | .sendpage = inet_sendpage, | 960 | .sendpage = inet_sendpage, |
| 929 | #ifdef CONFIG_COMPAT | 961 | #ifdef CONFIG_COMPAT |
| @@ -1073,7 +1105,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) | |||
| 1073 | if (err) | 1105 | if (err) |
| 1074 | return err; | 1106 | return err; |
| 1075 | 1107 | ||
| 1076 | sk_setup_caps(sk, &rt->u.dst); | 1108 | sk_setup_caps(sk, &rt->dst); |
| 1077 | 1109 | ||
| 1078 | new_saddr = rt->rt_src; | 1110 | new_saddr = rt->rt_src; |
| 1079 | 1111 | ||
| @@ -1139,7 +1171,7 @@ int inet_sk_rebuild_header(struct sock *sk) | |||
| 1139 | err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0); | 1171 | err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0); |
| 1140 | } | 1172 | } |
| 1141 | if (!err) | 1173 | if (!err) |
| 1142 | sk_setup_caps(sk, &rt->u.dst); | 1174 | sk_setup_caps(sk, &rt->dst); |
| 1143 | else { | 1175 | else { |
| 1144 | /* Routing failed... */ | 1176 | /* Routing failed... */ |
| 1145 | sk->sk_route_caps = 0; | 1177 | sk->sk_route_caps = 0; |
| @@ -1296,8 +1328,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, | |||
| 1296 | if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) | 1328 | if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) |
| 1297 | goto out_unlock; | 1329 | goto out_unlock; |
| 1298 | 1330 | ||
| 1299 | id = ntohl(*(u32 *)&iph->id); | 1331 | id = ntohl(*(__be32 *)&iph->id); |
| 1300 | flush = (u16)((ntohl(*(u32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF)); | 1332 | flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF)); |
| 1301 | id >>= 16; | 1333 | id >>= 16; |
| 1302 | 1334 | ||
| 1303 | for (p = *head; p; p = p->next) { | 1335 | for (p = *head; p; p = p->next) { |
| @@ -1310,8 +1342,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, | |||
| 1310 | 1342 | ||
| 1311 | if ((iph->protocol ^ iph2->protocol) | | 1343 | if ((iph->protocol ^ iph2->protocol) | |
| 1312 | (iph->tos ^ iph2->tos) | | 1344 | (iph->tos ^ iph2->tos) | |
| 1313 | (iph->saddr ^ iph2->saddr) | | 1345 | ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | |
| 1314 | (iph->daddr ^ iph2->daddr)) { | 1346 | ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { |
| 1315 | NAPI_GRO_CB(p)->same_flow = 0; | 1347 | NAPI_GRO_CB(p)->same_flow = 0; |
| 1316 | continue; | 1348 | continue; |
| 1317 | } | 1349 | } |
| @@ -1385,7 +1417,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, | |||
| 1385 | } | 1417 | } |
| 1386 | EXPORT_SYMBOL_GPL(inet_ctl_sock_create); | 1418 | EXPORT_SYMBOL_GPL(inet_ctl_sock_create); |
| 1387 | 1419 | ||
| 1388 | unsigned long snmp_fold_field(void *mib[], int offt) | 1420 | unsigned long snmp_fold_field(void __percpu *mib[], int offt) |
| 1389 | { | 1421 | { |
| 1390 | unsigned long res = 0; | 1422 | unsigned long res = 0; |
| 1391 | int i; | 1423 | int i; |
| @@ -1398,13 +1430,49 @@ unsigned long snmp_fold_field(void *mib[], int offt) | |||
| 1398 | } | 1430 | } |
| 1399 | EXPORT_SYMBOL_GPL(snmp_fold_field); | 1431 | EXPORT_SYMBOL_GPL(snmp_fold_field); |
| 1400 | 1432 | ||
| 1401 | int snmp_mib_init(void *ptr[2], size_t mibsize) | 1433 | #if BITS_PER_LONG==32 |
| 1434 | |||
| 1435 | u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) | ||
| 1436 | { | ||
| 1437 | u64 res = 0; | ||
| 1438 | int cpu; | ||
| 1439 | |||
| 1440 | for_each_possible_cpu(cpu) { | ||
| 1441 | void *bhptr, *userptr; | ||
| 1442 | struct u64_stats_sync *syncp; | ||
| 1443 | u64 v_bh, v_user; | ||
| 1444 | unsigned int start; | ||
| 1445 | |||
| 1446 | /* first mib used by softirq context, we must use _bh() accessors */ | ||
| 1447 | bhptr = per_cpu_ptr(SNMP_STAT_BHPTR(mib), cpu); | ||
| 1448 | syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); | ||
| 1449 | do { | ||
| 1450 | start = u64_stats_fetch_begin_bh(syncp); | ||
| 1451 | v_bh = *(((u64 *) bhptr) + offt); | ||
| 1452 | } while (u64_stats_fetch_retry_bh(syncp, start)); | ||
| 1453 | |||
| 1454 | /* second mib used in USER context */ | ||
| 1455 | userptr = per_cpu_ptr(SNMP_STAT_USRPTR(mib), cpu); | ||
| 1456 | syncp = (struct u64_stats_sync *)(userptr + syncp_offset); | ||
| 1457 | do { | ||
| 1458 | start = u64_stats_fetch_begin(syncp); | ||
| 1459 | v_user = *(((u64 *) userptr) + offt); | ||
| 1460 | } while (u64_stats_fetch_retry(syncp, start)); | ||
| 1461 | |||
| 1462 | res += v_bh + v_user; | ||
| 1463 | } | ||
| 1464 | return res; | ||
| 1465 | } | ||
| 1466 | EXPORT_SYMBOL_GPL(snmp_fold_field64); | ||
| 1467 | #endif | ||
| 1468 | |||
| 1469 | int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align) | ||
| 1402 | { | 1470 | { |
| 1403 | BUG_ON(ptr == NULL); | 1471 | BUG_ON(ptr == NULL); |
| 1404 | ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); | 1472 | ptr[0] = __alloc_percpu(mibsize, align); |
| 1405 | if (!ptr[0]) | 1473 | if (!ptr[0]) |
| 1406 | goto err0; | 1474 | goto err0; |
| 1407 | ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); | 1475 | ptr[1] = __alloc_percpu(mibsize, align); |
| 1408 | if (!ptr[1]) | 1476 | if (!ptr[1]) |
| 1409 | goto err1; | 1477 | goto err1; |
| 1410 | return 0; | 1478 | return 0; |
| @@ -1416,7 +1484,7 @@ err0: | |||
| 1416 | } | 1484 | } |
| 1417 | EXPORT_SYMBOL_GPL(snmp_mib_init); | 1485 | EXPORT_SYMBOL_GPL(snmp_mib_init); |
| 1418 | 1486 | ||
| 1419 | void snmp_mib_free(void *ptr[2]) | 1487 | void snmp_mib_free(void __percpu *ptr[2]) |
| 1420 | { | 1488 | { |
| 1421 | BUG_ON(ptr == NULL); | 1489 | BUG_ON(ptr == NULL); |
| 1422 | free_percpu(ptr[0]); | 1490 | free_percpu(ptr[0]); |
| @@ -1460,56 +1528,63 @@ static const struct net_protocol icmp_protocol = { | |||
| 1460 | 1528 | ||
| 1461 | static __net_init int ipv4_mib_init_net(struct net *net) | 1529 | static __net_init int ipv4_mib_init_net(struct net *net) |
| 1462 | { | 1530 | { |
| 1463 | if (snmp_mib_init((void **)net->mib.tcp_statistics, | 1531 | if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics, |
| 1464 | sizeof(struct tcp_mib)) < 0) | 1532 | sizeof(struct tcp_mib), |
| 1533 | __alignof__(struct tcp_mib)) < 0) | ||
| 1465 | goto err_tcp_mib; | 1534 | goto err_tcp_mib; |
| 1466 | if (snmp_mib_init((void **)net->mib.ip_statistics, | 1535 | if (snmp_mib_init((void __percpu **)net->mib.ip_statistics, |
| 1467 | sizeof(struct ipstats_mib)) < 0) | 1536 | sizeof(struct ipstats_mib), |
| 1537 | __alignof__(struct ipstats_mib)) < 0) | ||
| 1468 | goto err_ip_mib; | 1538 | goto err_ip_mib; |
| 1469 | if (snmp_mib_init((void **)net->mib.net_statistics, | 1539 | if (snmp_mib_init((void __percpu **)net->mib.net_statistics, |
| 1470 | sizeof(struct linux_mib)) < 0) | 1540 | sizeof(struct linux_mib), |
| 1541 | __alignof__(struct linux_mib)) < 0) | ||
| 1471 | goto err_net_mib; | 1542 | goto err_net_mib; |
| 1472 | if (snmp_mib_init((void **)net->mib.udp_statistics, | 1543 | if (snmp_mib_init((void __percpu **)net->mib.udp_statistics, |
| 1473 | sizeof(struct udp_mib)) < 0) | 1544 | sizeof(struct udp_mib), |
| 1545 | __alignof__(struct udp_mib)) < 0) | ||
| 1474 | goto err_udp_mib; | 1546 | goto err_udp_mib; |
| 1475 | if (snmp_mib_init((void **)net->mib.udplite_statistics, | 1547 | if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics, |
| 1476 | sizeof(struct udp_mib)) < 0) | 1548 | sizeof(struct udp_mib), |
| 1549 | __alignof__(struct udp_mib)) < 0) | ||
| 1477 | goto err_udplite_mib; | 1550 | goto err_udplite_mib; |
| 1478 | if (snmp_mib_init((void **)net->mib.icmp_statistics, | 1551 | if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics, |
| 1479 | sizeof(struct icmp_mib)) < 0) | 1552 | sizeof(struct icmp_mib), |
| 1553 | __alignof__(struct icmp_mib)) < 0) | ||
| 1480 | goto err_icmp_mib; | 1554 | goto err_icmp_mib; |
| 1481 | if (snmp_mib_init((void **)net->mib.icmpmsg_statistics, | 1555 | if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics, |
| 1482 | sizeof(struct icmpmsg_mib)) < 0) | 1556 | sizeof(struct icmpmsg_mib), |
| 1557 | __alignof__(struct icmpmsg_mib)) < 0) | ||
| 1483 | goto err_icmpmsg_mib; | 1558 | goto err_icmpmsg_mib; |
| 1484 | 1559 | ||
| 1485 | tcp_mib_init(net); | 1560 | tcp_mib_init(net); |
| 1486 | return 0; | 1561 | return 0; |
| 1487 | 1562 | ||
| 1488 | err_icmpmsg_mib: | 1563 | err_icmpmsg_mib: |
| 1489 | snmp_mib_free((void **)net->mib.icmp_statistics); | 1564 | snmp_mib_free((void __percpu **)net->mib.icmp_statistics); |
| 1490 | err_icmp_mib: | 1565 | err_icmp_mib: |
| 1491 | snmp_mib_free((void **)net->mib.udplite_statistics); | 1566 | snmp_mib_free((void __percpu **)net->mib.udplite_statistics); |
| 1492 | err_udplite_mib: | 1567 | err_udplite_mib: |
| 1493 | snmp_mib_free((void **)net->mib.udp_statistics); | 1568 | snmp_mib_free((void __percpu **)net->mib.udp_statistics); |
| 1494 | err_udp_mib: | 1569 | err_udp_mib: |
| 1495 | snmp_mib_free((void **)net->mib.net_statistics); | 1570 | snmp_mib_free((void __percpu **)net->mib.net_statistics); |
| 1496 | err_net_mib: | 1571 | err_net_mib: |
| 1497 | snmp_mib_free((void **)net->mib.ip_statistics); | 1572 | snmp_mib_free((void __percpu **)net->mib.ip_statistics); |
| 1498 | err_ip_mib: | 1573 | err_ip_mib: |
| 1499 | snmp_mib_free((void **)net->mib.tcp_statistics); | 1574 | snmp_mib_free((void __percpu **)net->mib.tcp_statistics); |
| 1500 | err_tcp_mib: | 1575 | err_tcp_mib: |
| 1501 | return -ENOMEM; | 1576 | return -ENOMEM; |
| 1502 | } | 1577 | } |
| 1503 | 1578 | ||
| 1504 | static __net_exit void ipv4_mib_exit_net(struct net *net) | 1579 | static __net_exit void ipv4_mib_exit_net(struct net *net) |
| 1505 | { | 1580 | { |
| 1506 | snmp_mib_free((void **)net->mib.icmpmsg_statistics); | 1581 | snmp_mib_free((void __percpu **)net->mib.icmpmsg_statistics); |
| 1507 | snmp_mib_free((void **)net->mib.icmp_statistics); | 1582 | snmp_mib_free((void __percpu **)net->mib.icmp_statistics); |
| 1508 | snmp_mib_free((void **)net->mib.udplite_statistics); | 1583 | snmp_mib_free((void __percpu **)net->mib.udplite_statistics); |
| 1509 | snmp_mib_free((void **)net->mib.udp_statistics); | 1584 | snmp_mib_free((void __percpu **)net->mib.udp_statistics); |
| 1510 | snmp_mib_free((void **)net->mib.net_statistics); | 1585 | snmp_mib_free((void __percpu **)net->mib.net_statistics); |
| 1511 | snmp_mib_free((void **)net->mib.ip_statistics); | 1586 | snmp_mib_free((void __percpu **)net->mib.ip_statistics); |
| 1512 | snmp_mib_free((void **)net->mib.tcp_statistics); | 1587 | snmp_mib_free((void __percpu **)net->mib.tcp_statistics); |
| 1513 | } | 1588 | } |
| 1514 | 1589 | ||
| 1515 | static __net_initdata struct pernet_operations ipv4_mib_ops = { | 1590 | static __net_initdata struct pernet_operations ipv4_mib_ops = { |
| @@ -1546,9 +1621,13 @@ static int __init inet_init(void) | |||
| 1546 | 1621 | ||
| 1547 | BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); | 1622 | BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); |
| 1548 | 1623 | ||
| 1624 | sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); | ||
| 1625 | if (!sysctl_local_reserved_ports) | ||
| 1626 | goto out; | ||
| 1627 | |||
| 1549 | rc = proto_register(&tcp_prot, 1); | 1628 | rc = proto_register(&tcp_prot, 1); |
| 1550 | if (rc) | 1629 | if (rc) |
| 1551 | goto out; | 1630 | goto out_free_reserved_ports; |
| 1552 | 1631 | ||
| 1553 | rc = proto_register(&udp_prot, 1); | 1632 | rc = proto_register(&udp_prot, 1); |
| 1554 | if (rc) | 1633 | if (rc) |
| @@ -1647,6 +1726,8 @@ out_unregister_udp_proto: | |||
| 1647 | proto_unregister(&udp_prot); | 1726 | proto_unregister(&udp_prot); |
| 1648 | out_unregister_tcp_proto: | 1727 | out_unregister_tcp_proto: |
| 1649 | proto_unregister(&tcp_prot); | 1728 | proto_unregister(&tcp_prot); |
| 1729 | out_free_reserved_ports: | ||
| 1730 | kfree(sysctl_local_reserved_ports); | ||
| 1650 | goto out; | 1731 | goto out; |
| 1651 | } | 1732 | } |
| 1652 | 1733 | ||
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 7ed3e4ae93ae..880a5ec6dce0 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | #include <crypto/hash.h> | 1 | #include <crypto/hash.h> |
| 2 | #include <linux/err.h> | 2 | #include <linux/err.h> |
| 3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
| 4 | #include <linux/slab.h> | ||
| 4 | #include <net/ip.h> | 5 | #include <net/ip.h> |
| 5 | #include <net/xfrm.h> | 6 | #include <net/xfrm.h> |
| 6 | #include <net/ah.h> | 7 | #include <net/ah.h> |
| @@ -393,7 +394,7 @@ static void ah4_err(struct sk_buff *skb, u32 info) | |||
| 393 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) | 394 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) |
| 394 | return; | 395 | return; |
| 395 | 396 | ||
| 396 | x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); | 397 | x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); |
| 397 | if (!x) | 398 | if (!x) |
| 398 | return; | 399 | return; |
| 399 | printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", | 400 | printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", |
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index c95cd93acf29..96c1955b3e2f 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
| @@ -70,6 +70,7 @@ | |||
| 70 | * bonding can change the skb before | 70 | * bonding can change the skb before |
| 71 | * sending (e.g. insert 8021q tag). | 71 | * sending (e.g. insert 8021q tag). |
| 72 | * Harald Welte : convert to make use of jenkins hash | 72 | * Harald Welte : convert to make use of jenkins hash |
| 73 | * Jesper D. Brouer: Proxy ARP PVLAN RFC 3069 support. | ||
| 73 | */ | 74 | */ |
| 74 | 75 | ||
| 75 | #include <linux/module.h> | 76 | #include <linux/module.h> |
| @@ -97,6 +98,7 @@ | |||
| 97 | #include <linux/net.h> | 98 | #include <linux/net.h> |
| 98 | #include <linux/rcupdate.h> | 99 | #include <linux/rcupdate.h> |
| 99 | #include <linux/jhash.h> | 100 | #include <linux/jhash.h> |
| 101 | #include <linux/slab.h> | ||
| 100 | #ifdef CONFIG_SYSCTL | 102 | #ifdef CONFIG_SYSCTL |
| 101 | #include <linux/sysctl.h> | 103 | #include <linux/sysctl.h> |
| 102 | #endif | 104 | #endif |
| @@ -114,6 +116,7 @@ | |||
| 114 | #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) | 116 | #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) |
| 115 | #include <net/atmclip.h> | 117 | #include <net/atmclip.h> |
| 116 | struct neigh_table *clip_tbl_hook; | 118 | struct neigh_table *clip_tbl_hook; |
| 119 | EXPORT_SYMBOL(clip_tbl_hook); | ||
| 117 | #endif | 120 | #endif |
| 118 | 121 | ||
| 119 | #include <asm/system.h> | 122 | #include <asm/system.h> |
| @@ -167,6 +170,7 @@ const struct neigh_ops arp_broken_ops = { | |||
| 167 | .hh_output = dev_queue_xmit, | 170 | .hh_output = dev_queue_xmit, |
| 168 | .queue_xmit = dev_queue_xmit, | 171 | .queue_xmit = dev_queue_xmit, |
| 169 | }; | 172 | }; |
| 173 | EXPORT_SYMBOL(arp_broken_ops); | ||
| 170 | 174 | ||
| 171 | struct neigh_table arp_tbl = { | 175 | struct neigh_table arp_tbl = { |
| 172 | .family = AF_INET, | 176 | .family = AF_INET, |
| @@ -196,6 +200,7 @@ struct neigh_table arp_tbl = { | |||
| 196 | .gc_thresh2 = 512, | 200 | .gc_thresh2 = 512, |
| 197 | .gc_thresh3 = 1024, | 201 | .gc_thresh3 = 1024, |
| 198 | }; | 202 | }; |
| 203 | EXPORT_SYMBOL(arp_tbl); | ||
| 199 | 204 | ||
| 200 | int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir) | 205 | int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir) |
| 201 | { | 206 | { |
| @@ -331,11 +336,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) | |||
| 331 | struct net_device *dev = neigh->dev; | 336 | struct net_device *dev = neigh->dev; |
| 332 | __be32 target = *(__be32*)neigh->primary_key; | 337 | __be32 target = *(__be32*)neigh->primary_key; |
| 333 | int probes = atomic_read(&neigh->probes); | 338 | int probes = atomic_read(&neigh->probes); |
| 334 | struct in_device *in_dev = in_dev_get(dev); | 339 | struct in_device *in_dev; |
| 335 | 340 | ||
| 336 | if (!in_dev) | 341 | rcu_read_lock(); |
| 342 | in_dev = __in_dev_get_rcu(dev); | ||
| 343 | if (!in_dev) { | ||
| 344 | rcu_read_unlock(); | ||
| 337 | return; | 345 | return; |
| 338 | 346 | } | |
| 339 | switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { | 347 | switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { |
| 340 | default: | 348 | default: |
| 341 | case 0: /* By default announce any local IP */ | 349 | case 0: /* By default announce any local IP */ |
| @@ -356,9 +364,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) | |||
| 356 | case 2: /* Avoid secondary IPs, get a primary/preferred one */ | 364 | case 2: /* Avoid secondary IPs, get a primary/preferred one */ |
| 357 | break; | 365 | break; |
| 358 | } | 366 | } |
| 367 | rcu_read_unlock(); | ||
| 359 | 368 | ||
| 360 | if (in_dev) | ||
| 361 | in_dev_put(in_dev); | ||
| 362 | if (!saddr) | 369 | if (!saddr) |
| 363 | saddr = inet_select_addr(dev, target, RT_SCOPE_LINK); | 370 | saddr = inet_select_addr(dev, target, RT_SCOPE_LINK); |
| 364 | 371 | ||
| @@ -425,7 +432,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) | |||
| 425 | 432 | ||
| 426 | if (ip_route_output_key(net, &rt, &fl) < 0) | 433 | if (ip_route_output_key(net, &rt, &fl) < 0) |
| 427 | return 1; | 434 | return 1; |
| 428 | if (rt->u.dst.dev != dev) { | 435 | if (rt->dst.dev != dev) { |
| 429 | NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); | 436 | NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); |
| 430 | flag = 1; | 437 | flag = 1; |
| 431 | } | 438 | } |
| @@ -495,6 +502,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) | |||
| 495 | kfree_skb(skb); | 502 | kfree_skb(skb); |
| 496 | return 1; | 503 | return 1; |
| 497 | } | 504 | } |
| 505 | EXPORT_SYMBOL(arp_find); | ||
| 498 | 506 | ||
| 499 | /* END OF OBSOLETE FUNCTIONS */ | 507 | /* END OF OBSOLETE FUNCTIONS */ |
| 500 | 508 | ||
| @@ -524,12 +532,15 @@ int arp_bind_neighbour(struct dst_entry *dst) | |||
| 524 | /* | 532 | /* |
| 525 | * Check if we can use proxy ARP for this path | 533 | * Check if we can use proxy ARP for this path |
| 526 | */ | 534 | */ |
| 527 | 535 | static inline int arp_fwd_proxy(struct in_device *in_dev, | |
| 528 | static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt) | 536 | struct net_device *dev, struct rtable *rt) |
| 529 | { | 537 | { |
| 530 | struct in_device *out_dev; | 538 | struct in_device *out_dev; |
| 531 | int imi, omi = -1; | 539 | int imi, omi = -1; |
| 532 | 540 | ||
| 541 | if (rt->dst.dev == dev) | ||
| 542 | return 0; | ||
| 543 | |||
| 533 | if (!IN_DEV_PROXY_ARP(in_dev)) | 544 | if (!IN_DEV_PROXY_ARP(in_dev)) |
| 534 | return 0; | 545 | return 0; |
| 535 | 546 | ||
| @@ -540,14 +551,51 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt) | |||
| 540 | 551 | ||
| 541 | /* place to check for proxy_arp for routes */ | 552 | /* place to check for proxy_arp for routes */ |
| 542 | 553 | ||
| 543 | if ((out_dev = in_dev_get(rt->u.dst.dev)) != NULL) { | 554 | out_dev = __in_dev_get_rcu(rt->dst.dev); |
| 555 | if (out_dev) | ||
| 544 | omi = IN_DEV_MEDIUM_ID(out_dev); | 556 | omi = IN_DEV_MEDIUM_ID(out_dev); |
| 545 | in_dev_put(out_dev); | 557 | |
| 546 | } | ||
| 547 | return (omi != imi && omi != -1); | 558 | return (omi != imi && omi != -1); |
| 548 | } | 559 | } |
| 549 | 560 | ||
| 550 | /* | 561 | /* |
| 562 | * Check for RFC3069 proxy arp private VLAN (allow to send back to same dev) | ||
| 563 | * | ||
| 564 | * RFC3069 supports proxy arp replies back to the same interface. This | ||
| 565 | * is done to support (ethernet) switch features, like RFC 3069, where | ||
| 566 | * the individual ports are not allowed to communicate with each | ||
| 567 | * other, BUT they are allowed to talk to the upstream router. As | ||
| 568 | * described in RFC 3069, it is possible to allow these hosts to | ||
| 569 | * communicate through the upstream router, by proxy_arp'ing. | ||
| 570 | * | ||
| 571 | * RFC 3069: "VLAN Aggregation for Efficient IP Address Allocation" | ||
| 572 | * | ||
| 573 | * This technology is known by different names: | ||
| 574 | * In RFC 3069 it is called VLAN Aggregation. | ||
| 575 | * Cisco and Allied Telesyn call it Private VLAN. | ||
| 576 | * Hewlett-Packard call it Source-Port filtering or port-isolation. | ||
| 577 | * Ericsson call it MAC-Forced Forwarding (RFC Draft). | ||
| 578 | * | ||
| 579 | */ | ||
| 580 | static inline int arp_fwd_pvlan(struct in_device *in_dev, | ||
| 581 | struct net_device *dev, struct rtable *rt, | ||
| 582 | __be32 sip, __be32 tip) | ||
| 583 | { | ||
| 584 | /* Private VLAN is only concerned about the same ethernet segment */ | ||
| 585 | if (rt->dst.dev != dev) | ||
| 586 | return 0; | ||
| 587 | |||
| 588 | /* Don't reply on self probes (often done by windowz boxes)*/ | ||
| 589 | if (sip == tip) | ||
| 590 | return 0; | ||
| 591 | |||
| 592 | if (IN_DEV_PROXY_ARP_PVLAN(in_dev)) | ||
| 593 | return 1; | ||
| 594 | else | ||
| 595 | return 0; | ||
| 596 | } | ||
| 597 | |||
| 598 | /* | ||
| 551 | * Interface to link layer: send routine and receive handler. | 599 | * Interface to link layer: send routine and receive handler. |
| 552 | */ | 600 | */ |
| 553 | 601 | ||
| @@ -619,13 +667,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, | |||
| 619 | #endif | 667 | #endif |
| 620 | #endif | 668 | #endif |
| 621 | 669 | ||
| 622 | #ifdef CONFIG_FDDI | 670 | #if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE) |
| 623 | case ARPHRD_FDDI: | 671 | case ARPHRD_FDDI: |
| 624 | arp->ar_hrd = htons(ARPHRD_ETHER); | 672 | arp->ar_hrd = htons(ARPHRD_ETHER); |
| 625 | arp->ar_pro = htons(ETH_P_IP); | 673 | arp->ar_pro = htons(ETH_P_IP); |
| 626 | break; | 674 | break; |
| 627 | #endif | 675 | #endif |
| 628 | #ifdef CONFIG_TR | 676 | #if defined(CONFIG_TR) || defined(CONFIG_TR_MODULE) |
| 629 | case ARPHRD_IEEE802_TR: | 677 | case ARPHRD_IEEE802_TR: |
| 630 | arp->ar_hrd = htons(ARPHRD_IEEE802); | 678 | arp->ar_hrd = htons(ARPHRD_IEEE802); |
| 631 | arp->ar_pro = htons(ETH_P_IP); | 679 | arp->ar_pro = htons(ETH_P_IP); |
| @@ -656,6 +704,7 @@ out: | |||
| 656 | kfree_skb(skb); | 704 | kfree_skb(skb); |
| 657 | return NULL; | 705 | return NULL; |
| 658 | } | 706 | } |
| 707 | EXPORT_SYMBOL(arp_create); | ||
| 659 | 708 | ||
| 660 | /* | 709 | /* |
| 661 | * Send an arp packet. | 710 | * Send an arp packet. |
| @@ -665,6 +714,7 @@ void arp_xmit(struct sk_buff *skb) | |||
| 665 | /* Send it off, maybe filter it using firewalling first. */ | 714 | /* Send it off, maybe filter it using firewalling first. */ |
| 666 | NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit); | 715 | NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit); |
| 667 | } | 716 | } |
| 717 | EXPORT_SYMBOL(arp_xmit); | ||
| 668 | 718 | ||
| 669 | /* | 719 | /* |
| 670 | * Create and send an arp packet. | 720 | * Create and send an arp packet. |
| @@ -691,6 +741,7 @@ void arp_send(int type, int ptype, __be32 dest_ip, | |||
| 691 | 741 | ||
| 692 | arp_xmit(skb); | 742 | arp_xmit(skb); |
| 693 | } | 743 | } |
| 744 | EXPORT_SYMBOL(arp_send); | ||
| 694 | 745 | ||
| 695 | /* | 746 | /* |
| 696 | * Process an arp request. | 747 | * Process an arp request. |
| @@ -699,7 +750,7 @@ void arp_send(int type, int ptype, __be32 dest_ip, | |||
| 699 | static int arp_process(struct sk_buff *skb) | 750 | static int arp_process(struct sk_buff *skb) |
| 700 | { | 751 | { |
| 701 | struct net_device *dev = skb->dev; | 752 | struct net_device *dev = skb->dev; |
| 702 | struct in_device *in_dev = in_dev_get(dev); | 753 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
| 703 | struct arphdr *arp; | 754 | struct arphdr *arp; |
| 704 | unsigned char *arp_ptr; | 755 | unsigned char *arp_ptr; |
| 705 | struct rtable *rt; | 756 | struct rtable *rt; |
| @@ -812,7 +863,7 @@ static int arp_process(struct sk_buff *skb) | |||
| 812 | } | 863 | } |
| 813 | 864 | ||
| 814 | if (arp->ar_op == htons(ARPOP_REQUEST) && | 865 | if (arp->ar_op == htons(ARPOP_REQUEST) && |
| 815 | ip_route_input(skb, tip, sip, 0, dev) == 0) { | 866 | ip_route_input_noref(skb, tip, sip, 0, dev) == 0) { |
| 816 | 867 | ||
| 817 | rt = skb_rtable(skb); | 868 | rt = skb_rtable(skb); |
| 818 | addr_type = rt->rt_type; | 869 | addr_type = rt->rt_type; |
| @@ -833,8 +884,11 @@ static int arp_process(struct sk_buff *skb) | |||
| 833 | } | 884 | } |
| 834 | goto out; | 885 | goto out; |
| 835 | } else if (IN_DEV_FORWARD(in_dev)) { | 886 | } else if (IN_DEV_FORWARD(in_dev)) { |
| 836 | if (addr_type == RTN_UNICAST && rt->u.dst.dev != dev && | 887 | if (addr_type == RTN_UNICAST && |
| 837 | (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) { | 888 | (arp_fwd_proxy(in_dev, dev, rt) || |
| 889 | arp_fwd_pvlan(in_dev, dev, rt, sip, tip) || | ||
| 890 | pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) | ||
| 891 | { | ||
| 838 | n = neigh_event_ns(&arp_tbl, sha, &sip, dev); | 892 | n = neigh_event_ns(&arp_tbl, sha, &sip, dev); |
| 839 | if (n) | 893 | if (n) |
| 840 | neigh_release(n); | 894 | neigh_release(n); |
| @@ -845,7 +899,6 @@ static int arp_process(struct sk_buff *skb) | |||
| 845 | arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); | 899 | arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); |
| 846 | } else { | 900 | } else { |
| 847 | pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb); | 901 | pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb); |
| 848 | in_dev_put(in_dev); | ||
| 849 | return 0; | 902 | return 0; |
| 850 | } | 903 | } |
| 851 | goto out; | 904 | goto out; |
| @@ -863,7 +916,8 @@ static int arp_process(struct sk_buff *skb) | |||
| 863 | devices (strip is candidate) | 916 | devices (strip is candidate) |
| 864 | */ | 917 | */ |
| 865 | if (n == NULL && | 918 | if (n == NULL && |
| 866 | arp->ar_op == htons(ARPOP_REPLY) && | 919 | (arp->ar_op == htons(ARPOP_REPLY) || |
| 920 | (arp->ar_op == htons(ARPOP_REQUEST) && tip == sip)) && | ||
| 867 | inet_addr_type(net, sip) == RTN_UNICAST) | 921 | inet_addr_type(net, sip) == RTN_UNICAST) |
| 868 | n = __neigh_lookup(&arp_tbl, &sip, dev, 1); | 922 | n = __neigh_lookup(&arp_tbl, &sip, dev, 1); |
| 869 | } | 923 | } |
| @@ -890,8 +944,6 @@ static int arp_process(struct sk_buff *skb) | |||
| 890 | } | 944 | } |
| 891 | 945 | ||
| 892 | out: | 946 | out: |
| 893 | if (in_dev) | ||
| 894 | in_dev_put(in_dev); | ||
| 895 | consume_skb(skb); | 947 | consume_skb(skb); |
| 896 | return 0; | 948 | return 0; |
| 897 | } | 949 | } |
| @@ -999,13 +1051,13 @@ static int arp_req_set(struct net *net, struct arpreq *r, | |||
| 999 | struct rtable * rt; | 1051 | struct rtable * rt; |
| 1000 | if ((err = ip_route_output_key(net, &rt, &fl)) != 0) | 1052 | if ((err = ip_route_output_key(net, &rt, &fl)) != 0) |
| 1001 | return err; | 1053 | return err; |
| 1002 | dev = rt->u.dst.dev; | 1054 | dev = rt->dst.dev; |
| 1003 | ip_rt_put(rt); | 1055 | ip_rt_put(rt); |
| 1004 | if (!dev) | 1056 | if (!dev) |
| 1005 | return -EINVAL; | 1057 | return -EINVAL; |
| 1006 | } | 1058 | } |
| 1007 | switch (dev->type) { | 1059 | switch (dev->type) { |
| 1008 | #ifdef CONFIG_FDDI | 1060 | #if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE) |
| 1009 | case ARPHRD_FDDI: | 1061 | case ARPHRD_FDDI: |
| 1010 | /* | 1062 | /* |
| 1011 | * According to RFC 1390, FDDI devices should accept ARP | 1063 | * According to RFC 1390, FDDI devices should accept ARP |
| @@ -1106,7 +1158,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r, | |||
| 1106 | struct rtable * rt; | 1158 | struct rtable * rt; |
| 1107 | if ((err = ip_route_output_key(net, &rt, &fl)) != 0) | 1159 | if ((err = ip_route_output_key(net, &rt, &fl)) != 0) |
| 1108 | return err; | 1160 | return err; |
| 1109 | dev = rt->u.dst.dev; | 1161 | dev = rt->dst.dev; |
| 1110 | ip_rt_put(rt); | 1162 | ip_rt_put(rt); |
| 1111 | if (!dev) | 1163 | if (!dev) |
| 1112 | return -EINVAL; | 1164 | return -EINVAL; |
| @@ -1239,8 +1291,7 @@ void __init arp_init(void) | |||
| 1239 | dev_add_pack(&arp_packet_type); | 1291 | dev_add_pack(&arp_packet_type); |
| 1240 | arp_proc_init(); | 1292 | arp_proc_init(); |
| 1241 | #ifdef CONFIG_SYSCTL | 1293 | #ifdef CONFIG_SYSCTL |
| 1242 | neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4, | 1294 | neigh_sysctl_register(NULL, &arp_tbl.parms, "ipv4", NULL); |
| 1243 | NET_IPV4_NEIGH, "ipv4", NULL); | ||
| 1244 | #endif | 1295 | #endif |
| 1245 | register_netdevice_notifier(&arp_netdev_notifier); | 1296 | register_netdevice_notifier(&arp_netdev_notifier); |
| 1246 | } | 1297 | } |
| @@ -1408,14 +1459,3 @@ static int __init arp_proc_init(void) | |||
| 1408 | } | 1459 | } |
| 1409 | 1460 | ||
| 1410 | #endif /* CONFIG_PROC_FS */ | 1461 | #endif /* CONFIG_PROC_FS */ |
| 1411 | |||
| 1412 | EXPORT_SYMBOL(arp_broken_ops); | ||
| 1413 | EXPORT_SYMBOL(arp_find); | ||
| 1414 | EXPORT_SYMBOL(arp_create); | ||
| 1415 | EXPORT_SYMBOL(arp_xmit); | ||
| 1416 | EXPORT_SYMBOL(arp_send); | ||
| 1417 | EXPORT_SYMBOL(arp_tbl); | ||
| 1418 | |||
| 1419 | #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) | ||
| 1420 | EXPORT_SYMBOL(clip_tbl_hook); | ||
| 1421 | #endif | ||
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 1e029dc75455..3a92a76ae41d 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c | |||
| @@ -44,6 +44,7 @@ | |||
| 44 | #include <linux/string.h> | 44 | #include <linux/string.h> |
| 45 | #include <linux/jhash.h> | 45 | #include <linux/jhash.h> |
| 46 | #include <linux/audit.h> | 46 | #include <linux/audit.h> |
| 47 | #include <linux/slab.h> | ||
| 47 | #include <net/ip.h> | 48 | #include <net/ip.h> |
| 48 | #include <net/icmp.h> | 49 | #include <net/icmp.h> |
| 49 | #include <net/tcp.h> | 50 | #include <net/tcp.h> |
| @@ -289,8 +290,6 @@ void cipso_v4_cache_invalidate(void) | |||
| 289 | cipso_v4_cache[iter].size = 0; | 290 | cipso_v4_cache[iter].size = 0; |
| 290 | spin_unlock_bh(&cipso_v4_cache[iter].lock); | 291 | spin_unlock_bh(&cipso_v4_cache[iter].lock); |
| 291 | } | 292 | } |
| 292 | |||
| 293 | return; | ||
| 294 | } | 293 | } |
| 295 | 294 | ||
| 296 | /** | 295 | /** |
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index fb2465811b48..721a8a37b45c 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c | |||
| @@ -62,16 +62,17 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
| 62 | } | 62 | } |
| 63 | if (!inet->inet_saddr) | 63 | if (!inet->inet_saddr) |
| 64 | inet->inet_saddr = rt->rt_src; /* Update source address */ | 64 | inet->inet_saddr = rt->rt_src; /* Update source address */ |
| 65 | if (!inet->inet_rcv_saddr) | 65 | if (!inet->inet_rcv_saddr) { |
| 66 | inet->inet_rcv_saddr = rt->rt_src; | 66 | inet->inet_rcv_saddr = rt->rt_src; |
| 67 | if (sk->sk_prot->rehash) | ||
| 68 | sk->sk_prot->rehash(sk); | ||
| 69 | } | ||
| 67 | inet->inet_daddr = rt->rt_dst; | 70 | inet->inet_daddr = rt->rt_dst; |
| 68 | inet->inet_dport = usin->sin_port; | 71 | inet->inet_dport = usin->sin_port; |
| 69 | sk->sk_state = TCP_ESTABLISHED; | 72 | sk->sk_state = TCP_ESTABLISHED; |
| 70 | inet->inet_id = jiffies; | 73 | inet->inet_id = jiffies; |
| 71 | 74 | ||
| 72 | sk_dst_set(sk, &rt->u.dst); | 75 | sk_dst_set(sk, &rt->dst); |
| 73 | return(0); | 76 | return(0); |
| 74 | } | 77 | } |
| 75 | |||
| 76 | EXPORT_SYMBOL(ip4_datagram_connect); | 78 | EXPORT_SYMBOL(ip4_datagram_connect); |
| 77 | |||
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 040c4f05b653..da14c49284f4 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
| @@ -50,6 +50,7 @@ | |||
| 50 | #include <linux/notifier.h> | 50 | #include <linux/notifier.h> |
| 51 | #include <linux/inetdevice.h> | 51 | #include <linux/inetdevice.h> |
| 52 | #include <linux/igmp.h> | 52 | #include <linux/igmp.h> |
| 53 | #include <linux/slab.h> | ||
| 53 | #ifdef CONFIG_SYSCTL | 54 | #ifdef CONFIG_SYSCTL |
| 54 | #include <linux/sysctl.h> | 55 | #include <linux/sysctl.h> |
| 55 | #endif | 56 | #endif |
| @@ -64,20 +65,20 @@ | |||
| 64 | 65 | ||
| 65 | static struct ipv4_devconf ipv4_devconf = { | 66 | static struct ipv4_devconf ipv4_devconf = { |
| 66 | .data = { | 67 | .data = { |
| 67 | [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, | 68 | [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, |
| 68 | [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, | 69 | [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, |
| 69 | [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1, | 70 | [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, |
| 70 | [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1, | 71 | [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, |
| 71 | }, | 72 | }, |
| 72 | }; | 73 | }; |
| 73 | 74 | ||
| 74 | static struct ipv4_devconf ipv4_devconf_dflt = { | 75 | static struct ipv4_devconf ipv4_devconf_dflt = { |
| 75 | .data = { | 76 | .data = { |
| 76 | [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, | 77 | [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, |
| 77 | [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, | 78 | [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, |
| 78 | [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1, | 79 | [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, |
| 79 | [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1, | 80 | [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, |
| 80 | [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1, | 81 | [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1, |
| 81 | }, | 82 | }, |
| 82 | }; | 83 | }; |
| 83 | 84 | ||
| @@ -1080,6 +1081,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, | |||
| 1080 | } | 1081 | } |
| 1081 | ip_mc_up(in_dev); | 1082 | ip_mc_up(in_dev); |
| 1082 | /* fall through */ | 1083 | /* fall through */ |
| 1084 | case NETDEV_NOTIFY_PEERS: | ||
| 1083 | case NETDEV_CHANGEADDR: | 1085 | case NETDEV_CHANGEADDR: |
| 1084 | /* Send gratuitous ARP to notify of link change */ | 1086 | /* Send gratuitous ARP to notify of link change */ |
| 1085 | if (IN_DEV_ARP_NOTIFY(in_dev)) { | 1087 | if (IN_DEV_ARP_NOTIFY(in_dev)) { |
| @@ -1095,10 +1097,10 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, | |||
| 1095 | case NETDEV_DOWN: | 1097 | case NETDEV_DOWN: |
| 1096 | ip_mc_down(in_dev); | 1098 | ip_mc_down(in_dev); |
| 1097 | break; | 1099 | break; |
| 1098 | case NETDEV_BONDING_OLDTYPE: | 1100 | case NETDEV_PRE_TYPE_CHANGE: |
| 1099 | ip_mc_unmap(in_dev); | 1101 | ip_mc_unmap(in_dev); |
| 1100 | break; | 1102 | break; |
| 1101 | case NETDEV_BONDING_NEWTYPE: | 1103 | case NETDEV_POST_TYPE_CHANGE: |
| 1102 | ip_mc_remap(in_dev); | 1104 | ip_mc_remap(in_dev); |
| 1103 | break; | 1105 | break; |
| 1104 | case NETDEV_CHANGEMTU: | 1106 | case NETDEV_CHANGEMTU: |
| @@ -1194,7 +1196,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) | |||
| 1194 | hlist_for_each_entry_rcu(dev, node, head, index_hlist) { | 1196 | hlist_for_each_entry_rcu(dev, node, head, index_hlist) { |
| 1195 | if (idx < s_idx) | 1197 | if (idx < s_idx) |
| 1196 | goto cont; | 1198 | goto cont; |
| 1197 | if (idx > s_idx) | 1199 | if (h > s_h || idx > s_idx) |
| 1198 | s_ip_idx = 0; | 1200 | s_ip_idx = 0; |
| 1199 | in_dev = __in_dev_get_rcu(dev); | 1201 | in_dev = __in_dev_get_rcu(dev); |
| 1200 | if (!in_dev) | 1202 | if (!in_dev) |
| @@ -1317,14 +1319,19 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, | |||
| 1317 | { | 1319 | { |
| 1318 | int *valp = ctl->data; | 1320 | int *valp = ctl->data; |
| 1319 | int val = *valp; | 1321 | int val = *valp; |
| 1322 | loff_t pos = *ppos; | ||
| 1320 | int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); | 1323 | int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); |
| 1321 | 1324 | ||
| 1322 | if (write && *valp != val) { | 1325 | if (write && *valp != val) { |
| 1323 | struct net *net = ctl->extra2; | 1326 | struct net *net = ctl->extra2; |
| 1324 | 1327 | ||
| 1325 | if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { | 1328 | if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { |
| 1326 | if (!rtnl_trylock()) | 1329 | if (!rtnl_trylock()) { |
| 1330 | /* Restore the original values before restarting */ | ||
| 1331 | *valp = val; | ||
| 1332 | *ppos = pos; | ||
| 1327 | return restart_syscall(); | 1333 | return restart_syscall(); |
| 1334 | } | ||
| 1328 | if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { | 1335 | if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { |
| 1329 | inet_forward_change(net); | 1336 | inet_forward_change(net); |
| 1330 | } else if (*valp) { | 1337 | } else if (*valp) { |
| @@ -1360,7 +1367,7 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write, | |||
| 1360 | { \ | 1367 | { \ |
| 1361 | .procname = name, \ | 1368 | .procname = name, \ |
| 1362 | .data = ipv4_devconf.data + \ | 1369 | .data = ipv4_devconf.data + \ |
| 1363 | NET_IPV4_CONF_ ## attr - 1, \ | 1370 | IPV4_DEVCONF_ ## attr - 1, \ |
| 1364 | .maxlen = sizeof(int), \ | 1371 | .maxlen = sizeof(int), \ |
| 1365 | .mode = mval, \ | 1372 | .mode = mval, \ |
| 1366 | .proc_handler = proc, \ | 1373 | .proc_handler = proc, \ |
| @@ -1381,7 +1388,7 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write, | |||
| 1381 | 1388 | ||
| 1382 | static struct devinet_sysctl_table { | 1389 | static struct devinet_sysctl_table { |
| 1383 | struct ctl_table_header *sysctl_header; | 1390 | struct ctl_table_header *sysctl_header; |
| 1384 | struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX]; | 1391 | struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX]; |
| 1385 | char *dev_name; | 1392 | char *dev_name; |
| 1386 | } devinet_sysctl = { | 1393 | } devinet_sysctl = { |
| 1387 | .devinet_vars = { | 1394 | .devinet_vars = { |
| @@ -1408,6 +1415,7 @@ static struct devinet_sysctl_table { | |||
| 1408 | DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), | 1415 | DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), |
| 1409 | DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), | 1416 | DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), |
| 1410 | DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), | 1417 | DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), |
| 1418 | DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"), | ||
| 1411 | 1419 | ||
| 1412 | DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), | 1420 | DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), |
| 1413 | DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), | 1421 | DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), |
| @@ -1486,8 +1494,7 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) | |||
| 1486 | 1494 | ||
| 1487 | static void devinet_sysctl_register(struct in_device *idev) | 1495 | static void devinet_sysctl_register(struct in_device *idev) |
| 1488 | { | 1496 | { |
| 1489 | neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4, | 1497 | neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL); |
| 1490 | NET_IPV4_NEIGH, "ipv4", NULL); | ||
| 1491 | __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, | 1498 | __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, |
| 1492 | &idev->cnf); | 1499 | &idev->cnf); |
| 1493 | } | 1500 | } |
| @@ -1502,7 +1509,7 @@ static struct ctl_table ctl_forward_entry[] = { | |||
| 1502 | { | 1509 | { |
| 1503 | .procname = "ip_forward", | 1510 | .procname = "ip_forward", |
| 1504 | .data = &ipv4_devconf.data[ | 1511 | .data = &ipv4_devconf.data[ |
| 1505 | NET_IPV4_CONF_FORWARDING - 1], | 1512 | IPV4_DEVCONF_FORWARDING - 1], |
| 1506 | .maxlen = sizeof(int), | 1513 | .maxlen = sizeof(int), |
| 1507 | .mode = 0644, | 1514 | .mode = 0644, |
| 1508 | .proc_handler = devinet_sysctl_forward, | 1515 | .proc_handler = devinet_sysctl_forward, |
| @@ -1546,7 +1553,7 @@ static __net_init int devinet_init_net(struct net *net) | |||
| 1546 | if (tbl == NULL) | 1553 | if (tbl == NULL) |
| 1547 | goto err_alloc_ctl; | 1554 | goto err_alloc_ctl; |
| 1548 | 1555 | ||
| 1549 | tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1]; | 1556 | tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1]; |
| 1550 | tbl[0].extra1 = all; | 1557 | tbl[0].extra1 = all; |
| 1551 | tbl[0].extra2 = net; | 1558 | tbl[0].extra2 = net; |
| 1552 | #endif | 1559 | #endif |
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 1948895beb6d..14ca1f1c3fb0 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c | |||
| @@ -422,7 +422,7 @@ static void esp4_err(struct sk_buff *skb, u32 info) | |||
| 422 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) | 422 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) |
| 423 | return; | 423 | return; |
| 424 | 424 | ||
| 425 | x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); | 425 | x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); |
| 426 | if (!x) | 426 | if (!x) |
| 427 | return; | 427 | return; |
| 428 | NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", | 428 | NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 82dbf711d6d0..7d02a9f999fa 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
| @@ -34,6 +34,7 @@ | |||
| 34 | #include <linux/skbuff.h> | 34 | #include <linux/skbuff.h> |
| 35 | #include <linux/init.h> | 35 | #include <linux/init.h> |
| 36 | #include <linux/list.h> | 36 | #include <linux/list.h> |
| 37 | #include <linux/slab.h> | ||
| 37 | 38 | ||
| 38 | #include <net/ip.h> | 39 | #include <net/ip.h> |
| 39 | #include <net/protocol.h> | 40 | #include <net/protocol.h> |
| @@ -174,6 +175,7 @@ out: | |||
| 174 | fib_res_put(&res); | 175 | fib_res_put(&res); |
| 175 | return dev; | 176 | return dev; |
| 176 | } | 177 | } |
| 178 | EXPORT_SYMBOL(ip_dev_find); | ||
| 177 | 179 | ||
| 178 | /* | 180 | /* |
| 179 | * Find address type as if only "dev" was present in the system. If | 181 | * Find address type as if only "dev" was present in the system. If |
| @@ -213,12 +215,14 @@ unsigned int inet_addr_type(struct net *net, __be32 addr) | |||
| 213 | { | 215 | { |
| 214 | return __inet_dev_addr_type(net, NULL, addr); | 216 | return __inet_dev_addr_type(net, NULL, addr); |
| 215 | } | 217 | } |
| 218 | EXPORT_SYMBOL(inet_addr_type); | ||
| 216 | 219 | ||
| 217 | unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, | 220 | unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, |
| 218 | __be32 addr) | 221 | __be32 addr) |
| 219 | { | 222 | { |
| 220 | return __inet_dev_addr_type(net, dev, addr); | 223 | return __inet_dev_addr_type(net, dev, addr); |
| 221 | } | 224 | } |
| 225 | EXPORT_SYMBOL(inet_dev_addr_type); | ||
| 222 | 226 | ||
| 223 | /* Given (packet source, input interface) and optional (dst, oif, tos): | 227 | /* Given (packet source, input interface) and optional (dst, oif, tos): |
| 224 | - (main) check, that source is valid i.e. not broadcast or our local | 228 | - (main) check, that source is valid i.e. not broadcast or our local |
| @@ -242,6 +246,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
| 242 | 246 | ||
| 243 | struct fib_result res; | 247 | struct fib_result res; |
| 244 | int no_addr, rpf, accept_local; | 248 | int no_addr, rpf, accept_local; |
| 249 | bool dev_match; | ||
| 245 | int ret; | 250 | int ret; |
| 246 | struct net *net; | 251 | struct net *net; |
| 247 | 252 | ||
| @@ -269,12 +274,22 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
| 269 | } | 274 | } |
| 270 | *spec_dst = FIB_RES_PREFSRC(res); | 275 | *spec_dst = FIB_RES_PREFSRC(res); |
| 271 | fib_combine_itag(itag, &res); | 276 | fib_combine_itag(itag, &res); |
| 277 | dev_match = false; | ||
| 278 | |||
| 272 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 279 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
| 273 | if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1) | 280 | for (ret = 0; ret < res.fi->fib_nhs; ret++) { |
| 281 | struct fib_nh *nh = &res.fi->fib_nh[ret]; | ||
| 282 | |||
| 283 | if (nh->nh_dev == dev) { | ||
| 284 | dev_match = true; | ||
| 285 | break; | ||
| 286 | } | ||
| 287 | } | ||
| 274 | #else | 288 | #else |
| 275 | if (FIB_RES_DEV(res) == dev) | 289 | if (FIB_RES_DEV(res) == dev) |
| 290 | dev_match = true; | ||
| 276 | #endif | 291 | #endif |
| 277 | { | 292 | if (dev_match) { |
| 278 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; | 293 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; |
| 279 | fib_res_put(&res); | 294 | fib_res_put(&res); |
| 280 | return ret; | 295 | return ret; |
| @@ -283,7 +298,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
| 283 | if (no_addr) | 298 | if (no_addr) |
| 284 | goto last_resort; | 299 | goto last_resort; |
| 285 | if (rpf == 1) | 300 | if (rpf == 1) |
| 286 | goto e_inval; | 301 | goto e_rpf; |
| 287 | fl.oif = dev->ifindex; | 302 | fl.oif = dev->ifindex; |
| 288 | 303 | ||
| 289 | ret = 0; | 304 | ret = 0; |
| @@ -298,7 +313,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
| 298 | 313 | ||
| 299 | last_resort: | 314 | last_resort: |
| 300 | if (rpf) | 315 | if (rpf) |
| 301 | goto e_inval; | 316 | goto e_rpf; |
| 302 | *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); | 317 | *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); |
| 303 | *itag = 0; | 318 | *itag = 0; |
| 304 | return 0; | 319 | return 0; |
| @@ -307,6 +322,8 @@ e_inval_res: | |||
| 307 | fib_res_put(&res); | 322 | fib_res_put(&res); |
| 308 | e_inval: | 323 | e_inval: |
| 309 | return -EINVAL; | 324 | return -EINVAL; |
| 325 | e_rpf: | ||
| 326 | return -EXDEV; | ||
| 310 | } | 327 | } |
| 311 | 328 | ||
| 312 | static inline __be32 sk_extract_addr(struct sockaddr *addr) | 329 | static inline __be32 sk_extract_addr(struct sockaddr *addr) |
| @@ -883,7 +900,7 @@ static void nl_fib_input(struct sk_buff *skb) | |||
| 883 | netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); | 900 | netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); |
| 884 | } | 901 | } |
| 885 | 902 | ||
| 886 | static int nl_fib_lookup_init(struct net *net) | 903 | static int __net_init nl_fib_lookup_init(struct net *net) |
| 887 | { | 904 | { |
| 888 | struct sock *sk; | 905 | struct sock *sk; |
| 889 | sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, | 906 | sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, |
| @@ -1004,7 +1021,7 @@ fail: | |||
| 1004 | return err; | 1021 | return err; |
| 1005 | } | 1022 | } |
| 1006 | 1023 | ||
| 1007 | static void __net_exit ip_fib_net_exit(struct net *net) | 1024 | static void ip_fib_net_exit(struct net *net) |
| 1008 | { | 1025 | { |
| 1009 | unsigned int i; | 1026 | unsigned int i; |
| 1010 | 1027 | ||
| @@ -1074,7 +1091,3 @@ void __init ip_fib_init(void) | |||
| 1074 | 1091 | ||
| 1075 | fib_hash_init(); | 1092 | fib_hash_init(); |
| 1076 | } | 1093 | } |
| 1077 | |||
| 1078 | EXPORT_SYMBOL(inet_addr_type); | ||
| 1079 | EXPORT_SYMBOL(inet_dev_addr_type); | ||
| 1080 | EXPORT_SYMBOL(ip_dev_find); | ||
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 14972017b9c2..4ed7e0dea1bc 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c | |||
| @@ -32,6 +32,7 @@ | |||
| 32 | #include <linux/skbuff.h> | 32 | #include <linux/skbuff.h> |
| 33 | #include <linux/netlink.h> | 33 | #include <linux/netlink.h> |
| 34 | #include <linux/init.h> | 34 | #include <linux/init.h> |
| 35 | #include <linux/slab.h> | ||
| 35 | 36 | ||
| 36 | #include <net/net_namespace.h> | 37 | #include <net/net_namespace.h> |
| 37 | #include <net/ip.h> | 38 | #include <net/ip.h> |
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index ca2d07b1c706..76daeb5ff564 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c | |||
| @@ -213,7 +213,6 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, | |||
| 213 | { | 213 | { |
| 214 | struct fib4_rule *rule4 = (struct fib4_rule *) rule; | 214 | struct fib4_rule *rule4 = (struct fib4_rule *) rule; |
| 215 | 215 | ||
| 216 | frh->family = AF_INET; | ||
| 217 | frh->dst_len = rule4->dst_len; | 216 | frh->dst_len = rule4->dst_len; |
| 218 | frh->src_len = rule4->src_len; | 217 | frh->src_len = rule4->src_len; |
| 219 | frh->tos = rule4->tos; | 218 | frh->tos = rule4->tos; |
| @@ -234,23 +233,6 @@ nla_put_failure: | |||
| 234 | return -ENOBUFS; | 233 | return -ENOBUFS; |
| 235 | } | 234 | } |
| 236 | 235 | ||
| 237 | static u32 fib4_rule_default_pref(struct fib_rules_ops *ops) | ||
| 238 | { | ||
| 239 | struct list_head *pos; | ||
| 240 | struct fib_rule *rule; | ||
| 241 | |||
| 242 | if (!list_empty(&ops->rules_list)) { | ||
| 243 | pos = ops->rules_list.next; | ||
| 244 | if (pos->next != &ops->rules_list) { | ||
| 245 | rule = list_entry(pos->next, struct fib_rule, list); | ||
| 246 | if (rule->pref) | ||
| 247 | return rule->pref - 1; | ||
| 248 | } | ||
| 249 | } | ||
| 250 | |||
| 251 | return 0; | ||
| 252 | } | ||
| 253 | |||
| 254 | static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) | 236 | static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) |
| 255 | { | 237 | { |
| 256 | return nla_total_size(4) /* dst */ | 238 | return nla_total_size(4) /* dst */ |
| @@ -263,7 +245,7 @@ static void fib4_rule_flush_cache(struct fib_rules_ops *ops) | |||
| 263 | rt_cache_flush(ops->fro_net, -1); | 245 | rt_cache_flush(ops->fro_net, -1); |
| 264 | } | 246 | } |
| 265 | 247 | ||
| 266 | static struct fib_rules_ops fib4_rules_ops_template = { | 248 | static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = { |
| 267 | .family = AF_INET, | 249 | .family = AF_INET, |
| 268 | .rule_size = sizeof(struct fib4_rule), | 250 | .rule_size = sizeof(struct fib4_rule), |
| 269 | .addr_size = sizeof(u32), | 251 | .addr_size = sizeof(u32), |
| @@ -272,7 +254,7 @@ static struct fib_rules_ops fib4_rules_ops_template = { | |||
| 272 | .configure = fib4_rule_configure, | 254 | .configure = fib4_rule_configure, |
| 273 | .compare = fib4_rule_compare, | 255 | .compare = fib4_rule_compare, |
| 274 | .fill = fib4_rule_fill, | 256 | .fill = fib4_rule_fill, |
| 275 | .default_pref = fib4_rule_default_pref, | 257 | .default_pref = fib_default_rule_pref, |
| 276 | .nlmsg_payload = fib4_rule_nlmsg_payload, | 258 | .nlmsg_payload = fib4_rule_nlmsg_payload, |
| 277 | .flush_cache = fib4_rule_flush_cache, | 259 | .flush_cache = fib4_rule_flush_cache, |
| 278 | .nlgroup = RTNLGRP_IPV4_RULE, | 260 | .nlgroup = RTNLGRP_IPV4_RULE, |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ed19aa6919c2..20f09c5b31e8 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
| @@ -32,6 +32,7 @@ | |||
| 32 | #include <linux/proc_fs.h> | 32 | #include <linux/proc_fs.h> |
| 33 | #include <linux/skbuff.h> | 33 | #include <linux/skbuff.h> |
| 34 | #include <linux/init.h> | 34 | #include <linux/init.h> |
| 35 | #include <linux/slab.h> | ||
| 35 | 36 | ||
| 36 | #include <net/arp.h> | 37 | #include <net/arp.h> |
| 37 | #include <net/ip.h> | 38 | #include <net/ip.h> |
| @@ -62,8 +63,8 @@ static DEFINE_SPINLOCK(fib_multipath_lock); | |||
| 62 | #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \ | 63 | #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \ |
| 63 | for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) | 64 | for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) |
| 64 | 65 | ||
| 65 | #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \ | 66 | #define change_nexthops(fi) { int nhsel; struct fib_nh *nexthop_nh; \ |
| 66 | for (nhsel=0, nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) | 67 | for (nhsel=0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nexthop_nh++, nhsel++) |
| 67 | 68 | ||
| 68 | #else /* CONFIG_IP_ROUTE_MULTIPATH */ | 69 | #else /* CONFIG_IP_ROUTE_MULTIPATH */ |
| 69 | 70 | ||
| @@ -72,7 +73,7 @@ for (nhsel=0, nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, | |||
| 72 | #define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \ | 73 | #define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \ |
| 73 | for (nhsel=0; nhsel < 1; nhsel++) | 74 | for (nhsel=0; nhsel < 1; nhsel++) |
| 74 | 75 | ||
| 75 | #define change_nexthops(fi) { int nhsel = 0; struct fib_nh * nh = (struct fib_nh *)((fi)->fib_nh); \ | 76 | #define change_nexthops(fi) { int nhsel = 0; struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ |
| 76 | for (nhsel=0; nhsel < 1; nhsel++) | 77 | for (nhsel=0; nhsel < 1; nhsel++) |
| 77 | 78 | ||
| 78 | #endif /* CONFIG_IP_ROUTE_MULTIPATH */ | 79 | #endif /* CONFIG_IP_ROUTE_MULTIPATH */ |
| @@ -145,9 +146,9 @@ void free_fib_info(struct fib_info *fi) | |||
| 145 | return; | 146 | return; |
| 146 | } | 147 | } |
| 147 | change_nexthops(fi) { | 148 | change_nexthops(fi) { |
| 148 | if (nh->nh_dev) | 149 | if (nexthop_nh->nh_dev) |
| 149 | dev_put(nh->nh_dev); | 150 | dev_put(nexthop_nh->nh_dev); |
| 150 | nh->nh_dev = NULL; | 151 | nexthop_nh->nh_dev = NULL; |
| 151 | } endfor_nexthops(fi); | 152 | } endfor_nexthops(fi); |
| 152 | fib_info_cnt--; | 153 | fib_info_cnt--; |
| 153 | release_net(fi->fib_net); | 154 | release_net(fi->fib_net); |
| @@ -162,9 +163,9 @@ void fib_release_info(struct fib_info *fi) | |||
| 162 | if (fi->fib_prefsrc) | 163 | if (fi->fib_prefsrc) |
| 163 | hlist_del(&fi->fib_lhash); | 164 | hlist_del(&fi->fib_lhash); |
| 164 | change_nexthops(fi) { | 165 | change_nexthops(fi) { |
| 165 | if (!nh->nh_dev) | 166 | if (!nexthop_nh->nh_dev) |
| 166 | continue; | 167 | continue; |
| 167 | hlist_del(&nh->nh_hash); | 168 | hlist_del(&nexthop_nh->nh_hash); |
| 168 | } endfor_nexthops(fi) | 169 | } endfor_nexthops(fi) |
| 169 | fi->fib_dead = 1; | 170 | fi->fib_dead = 1; |
| 170 | fib_info_put(fi); | 171 | fib_info_put(fi); |
| @@ -395,19 +396,20 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, | |||
| 395 | if (!rtnh_ok(rtnh, remaining)) | 396 | if (!rtnh_ok(rtnh, remaining)) |
| 396 | return -EINVAL; | 397 | return -EINVAL; |
| 397 | 398 | ||
| 398 | nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; | 399 | nexthop_nh->nh_flags = |
| 399 | nh->nh_oif = rtnh->rtnh_ifindex; | 400 | (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; |
| 400 | nh->nh_weight = rtnh->rtnh_hops + 1; | 401 | nexthop_nh->nh_oif = rtnh->rtnh_ifindex; |
| 402 | nexthop_nh->nh_weight = rtnh->rtnh_hops + 1; | ||
| 401 | 403 | ||
| 402 | attrlen = rtnh_attrlen(rtnh); | 404 | attrlen = rtnh_attrlen(rtnh); |
| 403 | if (attrlen > 0) { | 405 | if (attrlen > 0) { |
| 404 | struct nlattr *nla, *attrs = rtnh_attrs(rtnh); | 406 | struct nlattr *nla, *attrs = rtnh_attrs(rtnh); |
| 405 | 407 | ||
| 406 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); | 408 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); |
| 407 | nh->nh_gw = nla ? nla_get_be32(nla) : 0; | 409 | nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; |
| 408 | #ifdef CONFIG_NET_CLS_ROUTE | 410 | #ifdef CONFIG_NET_CLS_ROUTE |
| 409 | nla = nla_find(attrs, attrlen, RTA_FLOW); | 411 | nla = nla_find(attrs, attrlen, RTA_FLOW); |
| 410 | nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; | 412 | nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; |
| 411 | #endif | 413 | #endif |
| 412 | } | 414 | } |
| 413 | 415 | ||
| @@ -527,10 +529,6 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, | |||
| 527 | if (nh->nh_gw) { | 529 | if (nh->nh_gw) { |
| 528 | struct fib_result res; | 530 | struct fib_result res; |
| 529 | 531 | ||
| 530 | #ifdef CONFIG_IP_ROUTE_PERVASIVE | ||
| 531 | if (nh->nh_flags&RTNH_F_PERVASIVE) | ||
| 532 | return 0; | ||
| 533 | #endif | ||
| 534 | if (nh->nh_flags&RTNH_F_ONLINK) { | 532 | if (nh->nh_flags&RTNH_F_ONLINK) { |
| 535 | struct net_device *dev; | 533 | struct net_device *dev; |
| 536 | 534 | ||
| @@ -738,7 +736,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
| 738 | 736 | ||
| 739 | fi->fib_nhs = nhs; | 737 | fi->fib_nhs = nhs; |
| 740 | change_nexthops(fi) { | 738 | change_nexthops(fi) { |
| 741 | nh->nh_parent = fi; | 739 | nexthop_nh->nh_parent = fi; |
| 742 | } endfor_nexthops(fi) | 740 | } endfor_nexthops(fi) |
| 743 | 741 | ||
| 744 | if (cfg->fc_mx) { | 742 | if (cfg->fc_mx) { |
| @@ -808,7 +806,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
| 808 | goto failure; | 806 | goto failure; |
| 809 | } else { | 807 | } else { |
| 810 | change_nexthops(fi) { | 808 | change_nexthops(fi) { |
| 811 | if ((err = fib_check_nh(cfg, fi, nh)) != 0) | 809 | if ((err = fib_check_nh(cfg, fi, nexthop_nh)) != 0) |
| 812 | goto failure; | 810 | goto failure; |
| 813 | } endfor_nexthops(fi) | 811 | } endfor_nexthops(fi) |
| 814 | } | 812 | } |
| @@ -843,11 +841,11 @@ link_it: | |||
| 843 | struct hlist_head *head; | 841 | struct hlist_head *head; |
| 844 | unsigned int hash; | 842 | unsigned int hash; |
| 845 | 843 | ||
| 846 | if (!nh->nh_dev) | 844 | if (!nexthop_nh->nh_dev) |
| 847 | continue; | 845 | continue; |
| 848 | hash = fib_devindex_hashfn(nh->nh_dev->ifindex); | 846 | hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex); |
| 849 | head = &fib_info_devhash[hash]; | 847 | head = &fib_info_devhash[hash]; |
| 850 | hlist_add_head(&nh->nh_hash, head); | 848 | hlist_add_head(&nexthop_nh->nh_hash, head); |
| 851 | } endfor_nexthops(fi) | 849 | } endfor_nexthops(fi) |
| 852 | spin_unlock_bh(&fib_info_lock); | 850 | spin_unlock_bh(&fib_info_lock); |
| 853 | return fi; | 851 | return fi; |
| @@ -1080,21 +1078,21 @@ int fib_sync_down_dev(struct net_device *dev, int force) | |||
| 1080 | prev_fi = fi; | 1078 | prev_fi = fi; |
| 1081 | dead = 0; | 1079 | dead = 0; |
| 1082 | change_nexthops(fi) { | 1080 | change_nexthops(fi) { |
| 1083 | if (nh->nh_flags&RTNH_F_DEAD) | 1081 | if (nexthop_nh->nh_flags&RTNH_F_DEAD) |
| 1084 | dead++; | 1082 | dead++; |
| 1085 | else if (nh->nh_dev == dev && | 1083 | else if (nexthop_nh->nh_dev == dev && |
| 1086 | nh->nh_scope != scope) { | 1084 | nexthop_nh->nh_scope != scope) { |
| 1087 | nh->nh_flags |= RTNH_F_DEAD; | 1085 | nexthop_nh->nh_flags |= RTNH_F_DEAD; |
| 1088 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 1086 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
| 1089 | spin_lock_bh(&fib_multipath_lock); | 1087 | spin_lock_bh(&fib_multipath_lock); |
| 1090 | fi->fib_power -= nh->nh_power; | 1088 | fi->fib_power -= nexthop_nh->nh_power; |
| 1091 | nh->nh_power = 0; | 1089 | nexthop_nh->nh_power = 0; |
| 1092 | spin_unlock_bh(&fib_multipath_lock); | 1090 | spin_unlock_bh(&fib_multipath_lock); |
| 1093 | #endif | 1091 | #endif |
| 1094 | dead++; | 1092 | dead++; |
| 1095 | } | 1093 | } |
| 1096 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 1094 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
| 1097 | if (force > 1 && nh->nh_dev == dev) { | 1095 | if (force > 1 && nexthop_nh->nh_dev == dev) { |
| 1098 | dead = fi->fib_nhs; | 1096 | dead = fi->fib_nhs; |
| 1099 | break; | 1097 | break; |
| 1100 | } | 1098 | } |
| @@ -1144,18 +1142,20 @@ int fib_sync_up(struct net_device *dev) | |||
| 1144 | prev_fi = fi; | 1142 | prev_fi = fi; |
| 1145 | alive = 0; | 1143 | alive = 0; |
| 1146 | change_nexthops(fi) { | 1144 | change_nexthops(fi) { |
| 1147 | if (!(nh->nh_flags&RTNH_F_DEAD)) { | 1145 | if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) { |
| 1148 | alive++; | 1146 | alive++; |
| 1149 | continue; | 1147 | continue; |
| 1150 | } | 1148 | } |
| 1151 | if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) | 1149 | if (nexthop_nh->nh_dev == NULL || |
| 1150 | !(nexthop_nh->nh_dev->flags&IFF_UP)) | ||
| 1152 | continue; | 1151 | continue; |
| 1153 | if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev)) | 1152 | if (nexthop_nh->nh_dev != dev || |
| 1153 | !__in_dev_get_rtnl(dev)) | ||
| 1154 | continue; | 1154 | continue; |
| 1155 | alive++; | 1155 | alive++; |
| 1156 | spin_lock_bh(&fib_multipath_lock); | 1156 | spin_lock_bh(&fib_multipath_lock); |
| 1157 | nh->nh_power = 0; | 1157 | nexthop_nh->nh_power = 0; |
| 1158 | nh->nh_flags &= ~RTNH_F_DEAD; | 1158 | nexthop_nh->nh_flags &= ~RTNH_F_DEAD; |
| 1159 | spin_unlock_bh(&fib_multipath_lock); | 1159 | spin_unlock_bh(&fib_multipath_lock); |
| 1160 | } endfor_nexthops(fi) | 1160 | } endfor_nexthops(fi) |
| 1161 | 1161 | ||
| @@ -1182,9 +1182,9 @@ void fib_select_multipath(const struct flowi *flp, struct fib_result *res) | |||
| 1182 | if (fi->fib_power <= 0) { | 1182 | if (fi->fib_power <= 0) { |
| 1183 | int power = 0; | 1183 | int power = 0; |
| 1184 | change_nexthops(fi) { | 1184 | change_nexthops(fi) { |
| 1185 | if (!(nh->nh_flags&RTNH_F_DEAD)) { | 1185 | if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) { |
| 1186 | power += nh->nh_weight; | 1186 | power += nexthop_nh->nh_weight; |
| 1187 | nh->nh_power = nh->nh_weight; | 1187 | nexthop_nh->nh_power = nexthop_nh->nh_weight; |
| 1188 | } | 1188 | } |
| 1189 | } endfor_nexthops(fi); | 1189 | } endfor_nexthops(fi); |
| 1190 | fi->fib_power = power; | 1190 | fi->fib_power = power; |
| @@ -1204,9 +1204,10 @@ void fib_select_multipath(const struct flowi *flp, struct fib_result *res) | |||
| 1204 | w = jiffies % fi->fib_power; | 1204 | w = jiffies % fi->fib_power; |
| 1205 | 1205 | ||
| 1206 | change_nexthops(fi) { | 1206 | change_nexthops(fi) { |
| 1207 | if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { | 1207 | if (!(nexthop_nh->nh_flags&RTNH_F_DEAD) && |
| 1208 | if ((w -= nh->nh_power) <= 0) { | 1208 | nexthop_nh->nh_power) { |
| 1209 | nh->nh_power--; | 1209 | if ((w -= nexthop_nh->nh_power) <= 0) { |
| 1210 | nexthop_nh->nh_power--; | ||
| 1210 | fi->fib_power--; | 1211 | fi->fib_power--; |
| 1211 | res->nh_sel = nhsel; | 1212 | res->nh_sel = nhsel; |
| 1212 | spin_unlock_bh(&fib_multipath_lock); | 1213 | spin_unlock_bh(&fib_multipath_lock); |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index af5d89792860..4a8e370862bc 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
| @@ -71,6 +71,7 @@ | |||
| 71 | #include <linux/netlink.h> | 71 | #include <linux/netlink.h> |
| 72 | #include <linux/init.h> | 72 | #include <linux/init.h> |
| 73 | #include <linux/list.h> | 73 | #include <linux/list.h> |
| 74 | #include <linux/slab.h> | ||
| 74 | #include <net/net_namespace.h> | 75 | #include <net/net_namespace.h> |
| 75 | #include <net/ip.h> | 76 | #include <net/ip.h> |
| 76 | #include <net/protocol.h> | 77 | #include <net/protocol.h> |
| @@ -185,7 +186,9 @@ static inline struct tnode *node_parent_rcu(struct node *node) | |||
| 185 | { | 186 | { |
| 186 | struct tnode *ret = node_parent(node); | 187 | struct tnode *ret = node_parent(node); |
| 187 | 188 | ||
| 188 | return rcu_dereference(ret); | 189 | return rcu_dereference_check(ret, |
| 190 | rcu_read_lock_held() || | ||
| 191 | lockdep_rtnl_is_held()); | ||
| 189 | } | 192 | } |
| 190 | 193 | ||
| 191 | /* Same as rcu_assign_pointer | 194 | /* Same as rcu_assign_pointer |
| @@ -208,7 +211,9 @@ static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) | |||
| 208 | { | 211 | { |
| 209 | struct node *ret = tnode_get_child(tn, i); | 212 | struct node *ret = tnode_get_child(tn, i); |
| 210 | 213 | ||
| 211 | return rcu_dereference(ret); | 214 | return rcu_dereference_check(ret, |
| 215 | rcu_read_lock_held() || | ||
| 216 | lockdep_rtnl_is_held()); | ||
| 212 | } | 217 | } |
| 213 | 218 | ||
| 214 | static inline int tnode_child_length(const struct tnode *tn) | 219 | static inline int tnode_child_length(const struct tnode *tn) |
| @@ -961,7 +966,9 @@ fib_find_node(struct trie *t, u32 key) | |||
| 961 | struct node *n; | 966 | struct node *n; |
| 962 | 967 | ||
| 963 | pos = 0; | 968 | pos = 0; |
| 964 | n = rcu_dereference(t->trie); | 969 | n = rcu_dereference_check(t->trie, |
| 970 | rcu_read_lock_held() || | ||
| 971 | lockdep_rtnl_is_held()); | ||
| 965 | 972 | ||
| 966 | while (n != NULL && NODE_TYPE(n) == T_TNODE) { | 973 | while (n != NULL && NODE_TYPE(n) == T_TNODE) { |
| 967 | tn = (struct tnode *) n; | 974 | tn = (struct tnode *) n; |
| @@ -1017,8 +1024,6 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) | |||
| 1017 | 1024 | ||
| 1018 | rcu_assign_pointer(t->trie, (struct node *)tn); | 1025 | rcu_assign_pointer(t->trie, (struct node *)tn); |
| 1019 | tnode_free_flush(); | 1026 | tnode_free_flush(); |
| 1020 | |||
| 1021 | return; | ||
| 1022 | } | 1027 | } |
| 1023 | 1028 | ||
| 1024 | /* only used from updater-side */ | 1029 | /* only used from updater-side */ |
| @@ -1750,7 +1755,9 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) | |||
| 1750 | 1755 | ||
| 1751 | static struct leaf *trie_firstleaf(struct trie *t) | 1756 | static struct leaf *trie_firstleaf(struct trie *t) |
| 1752 | { | 1757 | { |
| 1753 | struct tnode *n = (struct tnode *) rcu_dereference(t->trie); | 1758 | struct tnode *n = (struct tnode *) rcu_dereference_check(t->trie, |
| 1759 | rcu_read_lock_held() || | ||
| 1760 | lockdep_rtnl_is_held()); | ||
| 1754 | 1761 | ||
| 1755 | if (!n) | 1762 | if (!n) |
| 1756 | return NULL; | 1763 | return NULL; |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index fe11f60ce41b..a0d847c7cba5 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
| @@ -74,6 +74,7 @@ | |||
| 74 | #include <linux/netdevice.h> | 74 | #include <linux/netdevice.h> |
| 75 | #include <linux/string.h> | 75 | #include <linux/string.h> |
| 76 | #include <linux/netfilter_ipv4.h> | 76 | #include <linux/netfilter_ipv4.h> |
| 77 | #include <linux/slab.h> | ||
| 77 | #include <net/snmp.h> | 78 | #include <net/snmp.h> |
| 78 | #include <net/ip.h> | 79 | #include <net/ip.h> |
| 79 | #include <net/route.h> | 80 | #include <net/route.h> |
| @@ -114,7 +115,7 @@ struct icmp_bxm { | |||
| 114 | /* An array of errno for error messages from dest unreach. */ | 115 | /* An array of errno for error messages from dest unreach. */ |
| 115 | /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */ | 116 | /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */ |
| 116 | 117 | ||
| 117 | struct icmp_err icmp_err_convert[] = { | 118 | const struct icmp_err icmp_err_convert[] = { |
| 118 | { | 119 | { |
| 119 | .errno = ENETUNREACH, /* ICMP_NET_UNREACH */ | 120 | .errno = ENETUNREACH, /* ICMP_NET_UNREACH */ |
| 120 | .fatal = 0, | 121 | .fatal = 0, |
| @@ -180,6 +181,7 @@ struct icmp_err icmp_err_convert[] = { | |||
| 180 | .fatal = 1, | 181 | .fatal = 1, |
| 181 | }, | 182 | }, |
| 182 | }; | 183 | }; |
| 184 | EXPORT_SYMBOL(icmp_err_convert); | ||
| 183 | 185 | ||
| 184 | /* | 186 | /* |
| 185 | * ICMP control array. This specifies what to do with each ICMP. | 187 | * ICMP control array. This specifies what to do with each ICMP. |
| @@ -266,11 +268,12 @@ int xrlim_allow(struct dst_entry *dst, int timeout) | |||
| 266 | dst->rate_tokens = token; | 268 | dst->rate_tokens = token; |
| 267 | return rc; | 269 | return rc; |
| 268 | } | 270 | } |
| 271 | EXPORT_SYMBOL(xrlim_allow); | ||
| 269 | 272 | ||
| 270 | static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, | 273 | static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, |
| 271 | int type, int code) | 274 | int type, int code) |
| 272 | { | 275 | { |
| 273 | struct dst_entry *dst = &rt->u.dst; | 276 | struct dst_entry *dst = &rt->dst; |
| 274 | int rc = 1; | 277 | int rc = 1; |
| 275 | 278 | ||
| 276 | if (type > NR_ICMP_TYPES) | 279 | if (type > NR_ICMP_TYPES) |
| @@ -326,13 +329,14 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, | |||
| 326 | struct sock *sk; | 329 | struct sock *sk; |
| 327 | struct sk_buff *skb; | 330 | struct sk_buff *skb; |
| 328 | 331 | ||
| 329 | sk = icmp_sk(dev_net((*rt)->u.dst.dev)); | 332 | sk = icmp_sk(dev_net((*rt)->dst.dev)); |
| 330 | if (ip_append_data(sk, icmp_glue_bits, icmp_param, | 333 | if (ip_append_data(sk, icmp_glue_bits, icmp_param, |
| 331 | icmp_param->data_len+icmp_param->head_len, | 334 | icmp_param->data_len+icmp_param->head_len, |
| 332 | icmp_param->head_len, | 335 | icmp_param->head_len, |
| 333 | ipc, rt, MSG_DONTWAIT) < 0) | 336 | ipc, rt, MSG_DONTWAIT) < 0) { |
| 337 | ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_OUTERRORS); | ||
| 334 | ip_flush_pending_frames(sk); | 338 | ip_flush_pending_frames(sk); |
| 335 | else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { | 339 | } else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { |
| 336 | struct icmphdr *icmph = icmp_hdr(skb); | 340 | struct icmphdr *icmph = icmp_hdr(skb); |
| 337 | __wsum csum = 0; | 341 | __wsum csum = 0; |
| 338 | struct sk_buff *skb1; | 342 | struct sk_buff *skb1; |
| @@ -357,7 +361,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
| 357 | { | 361 | { |
| 358 | struct ipcm_cookie ipc; | 362 | struct ipcm_cookie ipc; |
| 359 | struct rtable *rt = skb_rtable(skb); | 363 | struct rtable *rt = skb_rtable(skb); |
| 360 | struct net *net = dev_net(rt->u.dst.dev); | 364 | struct net *net = dev_net(rt->dst.dev); |
| 361 | struct sock *sk; | 365 | struct sock *sk; |
| 362 | struct inet_sock *inet; | 366 | struct inet_sock *inet; |
| 363 | __be32 daddr; | 367 | __be32 daddr; |
| @@ -425,7 +429,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
| 425 | 429 | ||
| 426 | if (!rt) | 430 | if (!rt) |
| 427 | goto out; | 431 | goto out; |
| 428 | net = dev_net(rt->u.dst.dev); | 432 | net = dev_net(rt->dst.dev); |
| 429 | 433 | ||
| 430 | /* | 434 | /* |
| 431 | * Find the original header. It is expected to be valid, of course. | 435 | * Find the original header. It is expected to be valid, of course. |
| @@ -585,20 +589,20 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
| 585 | err = __ip_route_output_key(net, &rt2, &fl); | 589 | err = __ip_route_output_key(net, &rt2, &fl); |
| 586 | else { | 590 | else { |
| 587 | struct flowi fl2 = {}; | 591 | struct flowi fl2 = {}; |
| 588 | struct dst_entry *odst; | 592 | unsigned long orefdst; |
| 589 | 593 | ||
| 590 | fl2.fl4_dst = fl.fl4_src; | 594 | fl2.fl4_dst = fl.fl4_src; |
| 591 | if (ip_route_output_key(net, &rt2, &fl2)) | 595 | if (ip_route_output_key(net, &rt2, &fl2)) |
| 592 | goto relookup_failed; | 596 | goto relookup_failed; |
| 593 | 597 | ||
| 594 | /* Ugh! */ | 598 | /* Ugh! */ |
| 595 | odst = skb_dst(skb_in); | 599 | orefdst = skb_in->_skb_refdst; /* save old refdst */ |
| 596 | err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, | 600 | err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, |
| 597 | RT_TOS(tos), rt2->u.dst.dev); | 601 | RT_TOS(tos), rt2->dst.dev); |
| 598 | 602 | ||
| 599 | dst_release(&rt2->u.dst); | 603 | dst_release(&rt2->dst); |
| 600 | rt2 = skb_rtable(skb_in); | 604 | rt2 = skb_rtable(skb_in); |
| 601 | skb_dst_set(skb_in, odst); | 605 | skb_in->_skb_refdst = orefdst; /* restore old refdst */ |
| 602 | } | 606 | } |
| 603 | 607 | ||
| 604 | if (err) | 608 | if (err) |
| @@ -608,7 +612,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
| 608 | XFRM_LOOKUP_ICMP); | 612 | XFRM_LOOKUP_ICMP); |
| 609 | switch (err) { | 613 | switch (err) { |
| 610 | case 0: | 614 | case 0: |
| 611 | dst_release(&rt->u.dst); | 615 | dst_release(&rt->dst); |
| 612 | rt = rt2; | 616 | rt = rt2; |
| 613 | break; | 617 | break; |
| 614 | case -EPERM: | 618 | case -EPERM: |
| @@ -627,7 +631,7 @@ route_done: | |||
| 627 | 631 | ||
| 628 | /* RFC says return as much as we can without exceeding 576 bytes. */ | 632 | /* RFC says return as much as we can without exceeding 576 bytes. */ |
| 629 | 633 | ||
| 630 | room = dst_mtu(&rt->u.dst); | 634 | room = dst_mtu(&rt->dst); |
| 631 | if (room > 576) | 635 | if (room > 576) |
| 632 | room = 576; | 636 | room = 576; |
| 633 | room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; | 637 | room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; |
| @@ -645,6 +649,7 @@ out_unlock: | |||
| 645 | icmp_xmit_unlock(sk); | 649 | icmp_xmit_unlock(sk); |
| 646 | out:; | 650 | out:; |
| 647 | } | 651 | } |
| 652 | EXPORT_SYMBOL(icmp_send); | ||
| 648 | 653 | ||
| 649 | 654 | ||
| 650 | /* | 655 | /* |
| @@ -923,6 +928,7 @@ static void icmp_address(struct sk_buff *skb) | |||
| 923 | /* | 928 | /* |
| 924 | * RFC1812 (4.3.3.9). A router SHOULD listen all replies, and complain | 929 | * RFC1812 (4.3.3.9). A router SHOULD listen all replies, and complain |
| 925 | * loudly if an inconsistency is found. | 930 | * loudly if an inconsistency is found. |
| 931 | * called with rcu_read_lock() | ||
| 926 | */ | 932 | */ |
| 927 | 933 | ||
| 928 | static void icmp_address_reply(struct sk_buff *skb) | 934 | static void icmp_address_reply(struct sk_buff *skb) |
| @@ -933,12 +939,12 @@ static void icmp_address_reply(struct sk_buff *skb) | |||
| 933 | struct in_ifaddr *ifa; | 939 | struct in_ifaddr *ifa; |
| 934 | 940 | ||
| 935 | if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC)) | 941 | if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC)) |
| 936 | goto out; | 942 | return; |
| 937 | 943 | ||
| 938 | in_dev = in_dev_get(dev); | 944 | in_dev = __in_dev_get_rcu(dev); |
| 939 | if (!in_dev) | 945 | if (!in_dev) |
| 940 | goto out; | 946 | return; |
| 941 | rcu_read_lock(); | 947 | |
| 942 | if (in_dev->ifa_list && | 948 | if (in_dev->ifa_list && |
| 943 | IN_DEV_LOG_MARTIANS(in_dev) && | 949 | IN_DEV_LOG_MARTIANS(in_dev) && |
| 944 | IN_DEV_FORWARD(in_dev)) { | 950 | IN_DEV_FORWARD(in_dev)) { |
| @@ -956,9 +962,6 @@ static void icmp_address_reply(struct sk_buff *skb) | |||
| 956 | mp, dev->name, &rt->rt_src); | 962 | mp, dev->name, &rt->rt_src); |
| 957 | } | 963 | } |
| 958 | } | 964 | } |
| 959 | rcu_read_unlock(); | ||
| 960 | in_dev_put(in_dev); | ||
| 961 | out:; | ||
| 962 | } | 965 | } |
| 963 | 966 | ||
| 964 | static void icmp_discard(struct sk_buff *skb) | 967 | static void icmp_discard(struct sk_buff *skb) |
| @@ -972,7 +975,7 @@ int icmp_rcv(struct sk_buff *skb) | |||
| 972 | { | 975 | { |
| 973 | struct icmphdr *icmph; | 976 | struct icmphdr *icmph; |
| 974 | struct rtable *rt = skb_rtable(skb); | 977 | struct rtable *rt = skb_rtable(skb); |
| 975 | struct net *net = dev_net(rt->u.dst.dev); | 978 | struct net *net = dev_net(rt->dst.dev); |
| 976 | 979 | ||
| 977 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { | 980 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { |
| 978 | struct sec_path *sp = skb_sec_path(skb); | 981 | struct sec_path *sp = skb_sec_path(skb); |
| @@ -1214,7 +1217,3 @@ int __init icmp_init(void) | |||
| 1214 | { | 1217 | { |
| 1215 | return register_pernet_subsys(&icmp_sk_ops); | 1218 | return register_pernet_subsys(&icmp_sk_ops); |
| 1216 | } | 1219 | } |
| 1217 | |||
| 1218 | EXPORT_SYMBOL(icmp_err_convert); | ||
| 1219 | EXPORT_SYMBOL(icmp_send); | ||
| 1220 | EXPORT_SYMBOL(xrlim_allow); | ||
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 76c08402c933..2a4bb76f2132 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
| @@ -71,6 +71,7 @@ | |||
| 71 | */ | 71 | */ |
| 72 | 72 | ||
| 73 | #include <linux/module.h> | 73 | #include <linux/module.h> |
| 74 | #include <linux/slab.h> | ||
| 74 | #include <asm/uaccess.h> | 75 | #include <asm/uaccess.h> |
| 75 | #include <asm/system.h> | 76 | #include <asm/system.h> |
| 76 | #include <linux/types.h> | 77 | #include <linux/types.h> |
| @@ -311,7 +312,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) | |||
| 311 | return NULL; | 312 | return NULL; |
| 312 | } | 313 | } |
| 313 | 314 | ||
| 314 | skb_dst_set(skb, &rt->u.dst); | 315 | skb_dst_set(skb, &rt->dst); |
| 315 | skb->dev = dev; | 316 | skb->dev = dev; |
| 316 | 317 | ||
| 317 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); | 318 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); |
| @@ -329,7 +330,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) | |||
| 329 | pip->saddr = rt->rt_src; | 330 | pip->saddr = rt->rt_src; |
| 330 | pip->protocol = IPPROTO_IGMP; | 331 | pip->protocol = IPPROTO_IGMP; |
| 331 | pip->tot_len = 0; /* filled in later */ | 332 | pip->tot_len = 0; /* filled in later */ |
| 332 | ip_select_ident(pip, &rt->u.dst, NULL); | 333 | ip_select_ident(pip, &rt->dst, NULL); |
| 333 | ((u8*)&pip[1])[0] = IPOPT_RA; | 334 | ((u8*)&pip[1])[0] = IPOPT_RA; |
| 334 | ((u8*)&pip[1])[1] = 4; | 335 | ((u8*)&pip[1])[1] = 4; |
| 335 | ((u8*)&pip[1])[2] = 0; | 336 | ((u8*)&pip[1])[2] = 0; |
| @@ -659,7 +660,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, | |||
| 659 | return -1; | 660 | return -1; |
| 660 | } | 661 | } |
| 661 | 662 | ||
| 662 | skb_dst_set(skb, &rt->u.dst); | 663 | skb_dst_set(skb, &rt->dst); |
| 663 | 664 | ||
| 664 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); | 665 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); |
| 665 | 666 | ||
| @@ -675,7 +676,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, | |||
| 675 | iph->daddr = dst; | 676 | iph->daddr = dst; |
| 676 | iph->saddr = rt->rt_src; | 677 | iph->saddr = rt->rt_src; |
| 677 | iph->protocol = IPPROTO_IGMP; | 678 | iph->protocol = IPPROTO_IGMP; |
| 678 | ip_select_ident(iph, &rt->u.dst, NULL); | 679 | ip_select_ident(iph, &rt->dst, NULL); |
| 679 | ((u8*)&iph[1])[0] = IPOPT_RA; | 680 | ((u8*)&iph[1])[0] = IPOPT_RA; |
| 680 | ((u8*)&iph[1])[1] = 4; | 681 | ((u8*)&iph[1])[1] = 4; |
| 681 | ((u8*)&iph[1])[2] = 0; | 682 | ((u8*)&iph[1])[2] = 0; |
| @@ -855,6 +856,18 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, | |||
| 855 | igmpv3_clear_delrec(in_dev); | 856 | igmpv3_clear_delrec(in_dev); |
| 856 | } else if (len < 12) { | 857 | } else if (len < 12) { |
| 857 | return; /* ignore bogus packet; freed by caller */ | 858 | return; /* ignore bogus packet; freed by caller */ |
| 859 | } else if (IGMP_V1_SEEN(in_dev)) { | ||
| 860 | /* This is a v3 query with v1 queriers present */ | ||
| 861 | max_delay = IGMP_Query_Response_Interval; | ||
| 862 | group = 0; | ||
| 863 | } else if (IGMP_V2_SEEN(in_dev)) { | ||
| 864 | /* this is a v3 query with v2 queriers present; | ||
| 865 | * Interpretation of the max_delay code is problematic here. | ||
| 866 | * A real v2 host would use ih_code directly, while v3 has a | ||
| 867 | * different encoding. We use the v3 encoding as more likely | ||
| 868 | * to be intended in a v3 query. | ||
| 869 | */ | ||
| 870 | max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE); | ||
| 858 | } else { /* v3 */ | 871 | } else { /* v3 */ |
| 859 | if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) | 872 | if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) |
| 860 | return; | 873 | return; |
| @@ -915,18 +928,19 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, | |||
| 915 | read_unlock(&in_dev->mc_list_lock); | 928 | read_unlock(&in_dev->mc_list_lock); |
| 916 | } | 929 | } |
| 917 | 930 | ||
| 931 | /* called in rcu_read_lock() section */ | ||
| 918 | int igmp_rcv(struct sk_buff *skb) | 932 | int igmp_rcv(struct sk_buff *skb) |
| 919 | { | 933 | { |
| 920 | /* This basically follows the spec line by line -- see RFC1112 */ | 934 | /* This basically follows the spec line by line -- see RFC1112 */ |
| 921 | struct igmphdr *ih; | 935 | struct igmphdr *ih; |
| 922 | struct in_device *in_dev = in_dev_get(skb->dev); | 936 | struct in_device *in_dev = __in_dev_get_rcu(skb->dev); |
| 923 | int len = skb->len; | 937 | int len = skb->len; |
| 924 | 938 | ||
| 925 | if (in_dev == NULL) | 939 | if (in_dev == NULL) |
| 926 | goto drop; | 940 | goto drop; |
| 927 | 941 | ||
| 928 | if (!pskb_may_pull(skb, sizeof(struct igmphdr))) | 942 | if (!pskb_may_pull(skb, sizeof(struct igmphdr))) |
| 929 | goto drop_ref; | 943 | goto drop; |
| 930 | 944 | ||
| 931 | switch (skb->ip_summed) { | 945 | switch (skb->ip_summed) { |
| 932 | case CHECKSUM_COMPLETE: | 946 | case CHECKSUM_COMPLETE: |
| @@ -936,7 +950,7 @@ int igmp_rcv(struct sk_buff *skb) | |||
| 936 | case CHECKSUM_NONE: | 950 | case CHECKSUM_NONE: |
| 937 | skb->csum = 0; | 951 | skb->csum = 0; |
| 938 | if (__skb_checksum_complete(skb)) | 952 | if (__skb_checksum_complete(skb)) |
| 939 | goto drop_ref; | 953 | goto drop; |
| 940 | } | 954 | } |
| 941 | 955 | ||
| 942 | ih = igmp_hdr(skb); | 956 | ih = igmp_hdr(skb); |
| @@ -946,7 +960,6 @@ int igmp_rcv(struct sk_buff *skb) | |||
| 946 | break; | 960 | break; |
| 947 | case IGMP_HOST_MEMBERSHIP_REPORT: | 961 | case IGMP_HOST_MEMBERSHIP_REPORT: |
| 948 | case IGMPV2_HOST_MEMBERSHIP_REPORT: | 962 | case IGMPV2_HOST_MEMBERSHIP_REPORT: |
| 949 | case IGMPV3_HOST_MEMBERSHIP_REPORT: | ||
| 950 | /* Is it our report looped back? */ | 963 | /* Is it our report looped back? */ |
| 951 | if (skb_rtable(skb)->fl.iif == 0) | 964 | if (skb_rtable(skb)->fl.iif == 0) |
| 952 | break; | 965 | break; |
| @@ -957,9 +970,9 @@ int igmp_rcv(struct sk_buff *skb) | |||
| 957 | break; | 970 | break; |
| 958 | case IGMP_PIM: | 971 | case IGMP_PIM: |
| 959 | #ifdef CONFIG_IP_PIMSM_V1 | 972 | #ifdef CONFIG_IP_PIMSM_V1 |
| 960 | in_dev_put(in_dev); | ||
| 961 | return pim_rcv_v1(skb); | 973 | return pim_rcv_v1(skb); |
| 962 | #endif | 974 | #endif |
| 975 | case IGMPV3_HOST_MEMBERSHIP_REPORT: | ||
| 963 | case IGMP_DVMRP: | 976 | case IGMP_DVMRP: |
| 964 | case IGMP_TRACE: | 977 | case IGMP_TRACE: |
| 965 | case IGMP_HOST_LEAVE_MESSAGE: | 978 | case IGMP_HOST_LEAVE_MESSAGE: |
| @@ -970,8 +983,6 @@ int igmp_rcv(struct sk_buff *skb) | |||
| 970 | break; | 983 | break; |
| 971 | } | 984 | } |
| 972 | 985 | ||
| 973 | drop_ref: | ||
| 974 | in_dev_put(in_dev); | ||
| 975 | drop: | 986 | drop: |
| 976 | kfree_skb(skb); | 987 | kfree_skb(skb); |
| 977 | return 0; | 988 | return 0; |
| @@ -997,7 +1008,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr) | |||
| 997 | --ANK | 1008 | --ANK |
| 998 | */ | 1009 | */ |
| 999 | if (arp_mc_map(addr, buf, dev, 0) == 0) | 1010 | if (arp_mc_map(addr, buf, dev, 0) == 0) |
| 1000 | dev_mc_add(dev, buf, dev->addr_len, 0); | 1011 | dev_mc_add(dev, buf); |
| 1001 | } | 1012 | } |
| 1002 | 1013 | ||
| 1003 | /* | 1014 | /* |
| @@ -1010,7 +1021,7 @@ static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr) | |||
| 1010 | struct net_device *dev = in_dev->dev; | 1021 | struct net_device *dev = in_dev->dev; |
| 1011 | 1022 | ||
| 1012 | if (arp_mc_map(addr, buf, dev, 0) == 0) | 1023 | if (arp_mc_map(addr, buf, dev, 0) == 0) |
| 1013 | dev_mc_delete(dev, buf, dev->addr_len, 0); | 1024 | dev_mc_del(dev, buf); |
| 1014 | } | 1025 | } |
| 1015 | 1026 | ||
| 1016 | #ifdef CONFIG_IP_MULTICAST | 1027 | #ifdef CONFIG_IP_MULTICAST |
| @@ -1245,6 +1256,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) | |||
| 1245 | out: | 1256 | out: |
| 1246 | return; | 1257 | return; |
| 1247 | } | 1258 | } |
| 1259 | EXPORT_SYMBOL(ip_mc_inc_group); | ||
| 1248 | 1260 | ||
| 1249 | /* | 1261 | /* |
| 1250 | * Resend IGMP JOIN report; used for bonding. | 1262 | * Resend IGMP JOIN report; used for bonding. |
| @@ -1267,6 +1279,7 @@ void ip_mc_rejoin_group(struct ip_mc_list *im) | |||
| 1267 | igmp_ifc_event(in_dev); | 1279 | igmp_ifc_event(in_dev); |
| 1268 | #endif | 1280 | #endif |
| 1269 | } | 1281 | } |
| 1282 | EXPORT_SYMBOL(ip_mc_rejoin_group); | ||
| 1270 | 1283 | ||
| 1271 | /* | 1284 | /* |
| 1272 | * A socket has left a multicast group on device dev | 1285 | * A socket has left a multicast group on device dev |
| @@ -1297,6 +1310,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) | |||
| 1297 | } | 1310 | } |
| 1298 | } | 1311 | } |
| 1299 | } | 1312 | } |
| 1313 | EXPORT_SYMBOL(ip_mc_dec_group); | ||
| 1300 | 1314 | ||
| 1301 | /* Device changing type */ | 1315 | /* Device changing type */ |
| 1302 | 1316 | ||
| @@ -1426,7 +1440,7 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) | |||
| 1426 | } | 1440 | } |
| 1427 | 1441 | ||
| 1428 | if (!dev && !ip_route_output_key(net, &rt, &fl)) { | 1442 | if (!dev && !ip_route_output_key(net, &rt, &fl)) { |
| 1429 | dev = rt->u.dst.dev; | 1443 | dev = rt->dst.dev; |
| 1430 | ip_rt_put(rt); | 1444 | ip_rt_put(rt); |
| 1431 | } | 1445 | } |
| 1432 | if (dev) { | 1446 | if (dev) { |
| @@ -1645,8 +1659,7 @@ static int sf_setstate(struct ip_mc_list *pmc) | |||
| 1645 | if (dpsf->sf_inaddr == psf->sf_inaddr) | 1659 | if (dpsf->sf_inaddr == psf->sf_inaddr) |
| 1646 | break; | 1660 | break; |
| 1647 | if (!dpsf) { | 1661 | if (!dpsf) { |
| 1648 | dpsf = (struct ip_sf_list *) | 1662 | dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC); |
| 1649 | kmalloc(sizeof(*dpsf), GFP_ATOMIC); | ||
| 1650 | if (!dpsf) | 1663 | if (!dpsf) |
| 1651 | continue; | 1664 | continue; |
| 1652 | *dpsf = *psf; | 1665 | *dpsf = *psf; |
| @@ -1799,32 +1812,55 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) | |||
| 1799 | iml->next = inet->mc_list; | 1812 | iml->next = inet->mc_list; |
| 1800 | iml->sflist = NULL; | 1813 | iml->sflist = NULL; |
| 1801 | iml->sfmode = MCAST_EXCLUDE; | 1814 | iml->sfmode = MCAST_EXCLUDE; |
| 1802 | inet->mc_list = iml; | 1815 | rcu_assign_pointer(inet->mc_list, iml); |
| 1803 | ip_mc_inc_group(in_dev, addr); | 1816 | ip_mc_inc_group(in_dev, addr); |
| 1804 | err = 0; | 1817 | err = 0; |
| 1805 | done: | 1818 | done: |
| 1806 | rtnl_unlock(); | 1819 | rtnl_unlock(); |
| 1807 | return err; | 1820 | return err; |
| 1808 | } | 1821 | } |
| 1822 | EXPORT_SYMBOL(ip_mc_join_group); | ||
| 1823 | |||
| 1824 | static void ip_sf_socklist_reclaim(struct rcu_head *rp) | ||
| 1825 | { | ||
| 1826 | struct ip_sf_socklist *psf; | ||
| 1827 | |||
| 1828 | psf = container_of(rp, struct ip_sf_socklist, rcu); | ||
| 1829 | /* sk_omem_alloc should have been decreased by the caller*/ | ||
| 1830 | kfree(psf); | ||
| 1831 | } | ||
| 1809 | 1832 | ||
| 1810 | static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, | 1833 | static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, |
| 1811 | struct in_device *in_dev) | 1834 | struct in_device *in_dev) |
| 1812 | { | 1835 | { |
| 1836 | struct ip_sf_socklist *psf = iml->sflist; | ||
| 1813 | int err; | 1837 | int err; |
| 1814 | 1838 | ||
| 1815 | if (iml->sflist == NULL) { | 1839 | if (psf == NULL) { |
| 1816 | /* any-source empty exclude case */ | 1840 | /* any-source empty exclude case */ |
| 1817 | return ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, | 1841 | return ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, |
| 1818 | iml->sfmode, 0, NULL, 0); | 1842 | iml->sfmode, 0, NULL, 0); |
| 1819 | } | 1843 | } |
| 1820 | err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, | 1844 | err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, |
| 1821 | iml->sfmode, iml->sflist->sl_count, | 1845 | iml->sfmode, psf->sl_count, psf->sl_addr, 0); |
| 1822 | iml->sflist->sl_addr, 0); | 1846 | rcu_assign_pointer(iml->sflist, NULL); |
| 1823 | sock_kfree_s(sk, iml->sflist, IP_SFLSIZE(iml->sflist->sl_max)); | 1847 | /* decrease mem now to avoid the memleak warning */ |
| 1824 | iml->sflist = NULL; | 1848 | atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc); |
| 1849 | call_rcu(&psf->rcu, ip_sf_socklist_reclaim); | ||
| 1825 | return err; | 1850 | return err; |
| 1826 | } | 1851 | } |
| 1827 | 1852 | ||
| 1853 | |||
| 1854 | static void ip_mc_socklist_reclaim(struct rcu_head *rp) | ||
| 1855 | { | ||
| 1856 | struct ip_mc_socklist *iml; | ||
| 1857 | |||
| 1858 | iml = container_of(rp, struct ip_mc_socklist, rcu); | ||
| 1859 | /* sk_omem_alloc should have been decreased by the caller*/ | ||
| 1860 | kfree(iml); | ||
| 1861 | } | ||
| 1862 | |||
| 1863 | |||
| 1828 | /* | 1864 | /* |
| 1829 | * Ask a socket to leave a group. | 1865 | * Ask a socket to leave a group. |
| 1830 | */ | 1866 | */ |
| @@ -1854,12 +1890,14 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) | |||
| 1854 | 1890 | ||
| 1855 | (void) ip_mc_leave_src(sk, iml, in_dev); | 1891 | (void) ip_mc_leave_src(sk, iml, in_dev); |
| 1856 | 1892 | ||
| 1857 | *imlp = iml->next; | 1893 | rcu_assign_pointer(*imlp, iml->next); |
| 1858 | 1894 | ||
| 1859 | if (in_dev) | 1895 | if (in_dev) |
| 1860 | ip_mc_dec_group(in_dev, group); | 1896 | ip_mc_dec_group(in_dev, group); |
| 1861 | rtnl_unlock(); | 1897 | rtnl_unlock(); |
| 1862 | sock_kfree_s(sk, iml, sizeof(*iml)); | 1898 | /* decrease mem now to avoid the memleak warning */ |
| 1899 | atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); | ||
| 1900 | call_rcu(&iml->rcu, ip_mc_socklist_reclaim); | ||
| 1863 | return 0; | 1901 | return 0; |
| 1864 | } | 1902 | } |
| 1865 | if (!in_dev) | 1903 | if (!in_dev) |
| @@ -1974,9 +2012,12 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct | |||
| 1974 | if (psl) { | 2012 | if (psl) { |
| 1975 | for (i=0; i<psl->sl_count; i++) | 2013 | for (i=0; i<psl->sl_count; i++) |
| 1976 | newpsl->sl_addr[i] = psl->sl_addr[i]; | 2014 | newpsl->sl_addr[i] = psl->sl_addr[i]; |
| 1977 | sock_kfree_s(sk, psl, IP_SFLSIZE(psl->sl_max)); | 2015 | /* decrease mem now to avoid the memleak warning */ |
| 2016 | atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); | ||
| 2017 | call_rcu(&psl->rcu, ip_sf_socklist_reclaim); | ||
| 1978 | } | 2018 | } |
| 1979 | pmc->sflist = psl = newpsl; | 2019 | rcu_assign_pointer(pmc->sflist, newpsl); |
| 2020 | psl = newpsl; | ||
| 1980 | } | 2021 | } |
| 1981 | rv = 1; /* > 0 for insert logic below if sl_count is 0 */ | 2022 | rv = 1; /* > 0 for insert logic below if sl_count is 0 */ |
| 1982 | for (i=0; i<psl->sl_count; i++) { | 2023 | for (i=0; i<psl->sl_count; i++) { |
| @@ -2072,11 +2113,13 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) | |||
| 2072 | if (psl) { | 2113 | if (psl) { |
| 2073 | (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, | 2114 | (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, |
| 2074 | psl->sl_count, psl->sl_addr, 0); | 2115 | psl->sl_count, psl->sl_addr, 0); |
| 2075 | sock_kfree_s(sk, psl, IP_SFLSIZE(psl->sl_max)); | 2116 | /* decrease mem now to avoid the memleak warning */ |
| 2117 | atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); | ||
| 2118 | call_rcu(&psl->rcu, ip_sf_socklist_reclaim); | ||
| 2076 | } else | 2119 | } else |
| 2077 | (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, | 2120 | (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, |
| 2078 | 0, NULL, 0); | 2121 | 0, NULL, 0); |
| 2079 | pmc->sflist = newpsl; | 2122 | rcu_assign_pointer(pmc->sflist, newpsl); |
| 2080 | pmc->sfmode = msf->imsf_fmode; | 2123 | pmc->sfmode = msf->imsf_fmode; |
| 2081 | err = 0; | 2124 | err = 0; |
| 2082 | done: | 2125 | done: |
| @@ -2209,30 +2252,40 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) | |||
| 2209 | struct ip_mc_socklist *pmc; | 2252 | struct ip_mc_socklist *pmc; |
| 2210 | struct ip_sf_socklist *psl; | 2253 | struct ip_sf_socklist *psl; |
| 2211 | int i; | 2254 | int i; |
| 2255 | int ret; | ||
| 2212 | 2256 | ||
| 2257 | ret = 1; | ||
| 2213 | if (!ipv4_is_multicast(loc_addr)) | 2258 | if (!ipv4_is_multicast(loc_addr)) |
| 2214 | return 1; | 2259 | goto out; |
| 2215 | 2260 | ||
| 2216 | for (pmc=inet->mc_list; pmc; pmc=pmc->next) { | 2261 | rcu_read_lock(); |
| 2262 | for (pmc=rcu_dereference(inet->mc_list); pmc; pmc=rcu_dereference(pmc->next)) { | ||
| 2217 | if (pmc->multi.imr_multiaddr.s_addr == loc_addr && | 2263 | if (pmc->multi.imr_multiaddr.s_addr == loc_addr && |
| 2218 | pmc->multi.imr_ifindex == dif) | 2264 | pmc->multi.imr_ifindex == dif) |
| 2219 | break; | 2265 | break; |
| 2220 | } | 2266 | } |
| 2267 | ret = inet->mc_all; | ||
| 2221 | if (!pmc) | 2268 | if (!pmc) |
| 2222 | return inet->mc_all; | 2269 | goto unlock; |
| 2223 | psl = pmc->sflist; | 2270 | psl = pmc->sflist; |
| 2271 | ret = (pmc->sfmode == MCAST_EXCLUDE); | ||
| 2224 | if (!psl) | 2272 | if (!psl) |
| 2225 | return pmc->sfmode == MCAST_EXCLUDE; | 2273 | goto unlock; |
| 2226 | 2274 | ||
| 2227 | for (i=0; i<psl->sl_count; i++) { | 2275 | for (i=0; i<psl->sl_count; i++) { |
| 2228 | if (psl->sl_addr[i] == rmt_addr) | 2276 | if (psl->sl_addr[i] == rmt_addr) |
| 2229 | break; | 2277 | break; |
| 2230 | } | 2278 | } |
| 2279 | ret = 0; | ||
| 2231 | if (pmc->sfmode == MCAST_INCLUDE && i >= psl->sl_count) | 2280 | if (pmc->sfmode == MCAST_INCLUDE && i >= psl->sl_count) |
| 2232 | return 0; | 2281 | goto unlock; |
| 2233 | if (pmc->sfmode == MCAST_EXCLUDE && i < psl->sl_count) | 2282 | if (pmc->sfmode == MCAST_EXCLUDE && i < psl->sl_count) |
| 2234 | return 0; | 2283 | goto unlock; |
| 2235 | return 1; | 2284 | ret = 1; |
| 2285 | unlock: | ||
| 2286 | rcu_read_unlock(); | ||
| 2287 | out: | ||
| 2288 | return ret; | ||
| 2236 | } | 2289 | } |
| 2237 | 2290 | ||
| 2238 | /* | 2291 | /* |
| @@ -2251,7 +2304,7 @@ void ip_mc_drop_socket(struct sock *sk) | |||
| 2251 | rtnl_lock(); | 2304 | rtnl_lock(); |
| 2252 | while ((iml = inet->mc_list) != NULL) { | 2305 | while ((iml = inet->mc_list) != NULL) { |
| 2253 | struct in_device *in_dev; | 2306 | struct in_device *in_dev; |
| 2254 | inet->mc_list = iml->next; | 2307 | rcu_assign_pointer(inet->mc_list, iml->next); |
| 2255 | 2308 | ||
| 2256 | in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); | 2309 | in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); |
| 2257 | (void) ip_mc_leave_src(sk, iml, in_dev); | 2310 | (void) ip_mc_leave_src(sk, iml, in_dev); |
| @@ -2259,7 +2312,9 @@ void ip_mc_drop_socket(struct sock *sk) | |||
| 2259 | ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); | 2312 | ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); |
| 2260 | in_dev_put(in_dev); | 2313 | in_dev_put(in_dev); |
| 2261 | } | 2314 | } |
| 2262 | sock_kfree_s(sk, iml, sizeof(*iml)); | 2315 | /* decrease mem now to avoid the memleak warning */ |
| 2316 | atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); | ||
| 2317 | call_rcu(&iml->rcu, ip_mc_socklist_reclaim); | ||
| 2263 | } | 2318 | } |
| 2264 | rtnl_unlock(); | 2319 | rtnl_unlock(); |
| 2265 | } | 2320 | } |
| @@ -2603,7 +2658,7 @@ static const struct file_operations igmp_mcf_seq_fops = { | |||
| 2603 | .release = seq_release_net, | 2658 | .release = seq_release_net, |
| 2604 | }; | 2659 | }; |
| 2605 | 2660 | ||
| 2606 | static int igmp_net_init(struct net *net) | 2661 | static int __net_init igmp_net_init(struct net *net) |
| 2607 | { | 2662 | { |
| 2608 | struct proc_dir_entry *pde; | 2663 | struct proc_dir_entry *pde; |
| 2609 | 2664 | ||
| @@ -2621,7 +2676,7 @@ out_igmp: | |||
| 2621 | return -ENOMEM; | 2676 | return -ENOMEM; |
| 2622 | } | 2677 | } |
| 2623 | 2678 | ||
| 2624 | static void igmp_net_exit(struct net *net) | 2679 | static void __net_exit igmp_net_exit(struct net *net) |
| 2625 | { | 2680 | { |
| 2626 | proc_net_remove(net, "mcfilter"); | 2681 | proc_net_remove(net, "mcfilter"); |
| 2627 | proc_net_remove(net, "igmp"); | 2682 | proc_net_remove(net, "igmp"); |
| @@ -2637,8 +2692,3 @@ int __init igmp_mc_proc_init(void) | |||
| 2637 | return register_pernet_subsys(&igmp_net_ops); | 2692 | return register_pernet_subsys(&igmp_net_ops); |
| 2638 | } | 2693 | } |
| 2639 | #endif | 2694 | #endif |
| 2640 | |||
| 2641 | EXPORT_SYMBOL(ip_mc_dec_group); | ||
| 2642 | EXPORT_SYMBOL(ip_mc_inc_group); | ||
| 2643 | EXPORT_SYMBOL(ip_mc_join_group); | ||
| 2644 | EXPORT_SYMBOL(ip_mc_rejoin_group); | ||
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index ee16475f8fc3..7174370b1195 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
| @@ -37,6 +37,9 @@ struct local_ports sysctl_local_ports __read_mostly = { | |||
| 37 | .range = { 32768, 61000 }, | 37 | .range = { 32768, 61000 }, |
| 38 | }; | 38 | }; |
| 39 | 39 | ||
| 40 | unsigned long *sysctl_local_reserved_ports; | ||
| 41 | EXPORT_SYMBOL(sysctl_local_reserved_ports); | ||
| 42 | |||
| 40 | void inet_get_local_port_range(int *low, int *high) | 43 | void inet_get_local_port_range(int *low, int *high) |
| 41 | { | 44 | { |
| 42 | unsigned seq; | 45 | unsigned seq; |
| @@ -81,7 +84,6 @@ int inet_csk_bind_conflict(const struct sock *sk, | |||
| 81 | } | 84 | } |
| 82 | return node != NULL; | 85 | return node != NULL; |
| 83 | } | 86 | } |
| 84 | |||
| 85 | EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); | 87 | EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); |
| 86 | 88 | ||
| 87 | /* Obtain a reference to a local port for the given sock, | 89 | /* Obtain a reference to a local port for the given sock, |
| @@ -108,6 +110,8 @@ again: | |||
| 108 | 110 | ||
| 109 | smallest_size = -1; | 111 | smallest_size = -1; |
| 110 | do { | 112 | do { |
| 113 | if (inet_is_reserved_local_port(rover)) | ||
| 114 | goto next_nolock; | ||
| 111 | head = &hashinfo->bhash[inet_bhashfn(net, rover, | 115 | head = &hashinfo->bhash[inet_bhashfn(net, rover, |
| 112 | hashinfo->bhash_size)]; | 116 | hashinfo->bhash_size)]; |
| 113 | spin_lock(&head->lock); | 117 | spin_lock(&head->lock); |
| @@ -130,6 +134,7 @@ again: | |||
| 130 | break; | 134 | break; |
| 131 | next: | 135 | next: |
| 132 | spin_unlock(&head->lock); | 136 | spin_unlock(&head->lock); |
| 137 | next_nolock: | ||
| 133 | if (++rover > high) | 138 | if (++rover > high) |
| 134 | rover = low; | 139 | rover = low; |
| 135 | } while (--remaining > 0); | 140 | } while (--remaining > 0); |
| @@ -206,7 +211,6 @@ fail: | |||
| 206 | local_bh_enable(); | 211 | local_bh_enable(); |
| 207 | return ret; | 212 | return ret; |
| 208 | } | 213 | } |
| 209 | |||
| 210 | EXPORT_SYMBOL_GPL(inet_csk_get_port); | 214 | EXPORT_SYMBOL_GPL(inet_csk_get_port); |
| 211 | 215 | ||
| 212 | /* | 216 | /* |
| @@ -234,7 +238,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) | |||
| 234 | * having to remove and re-insert us on the wait queue. | 238 | * having to remove and re-insert us on the wait queue. |
| 235 | */ | 239 | */ |
| 236 | for (;;) { | 240 | for (;;) { |
| 237 | prepare_to_wait_exclusive(sk->sk_sleep, &wait, | 241 | prepare_to_wait_exclusive(sk_sleep(sk), &wait, |
| 238 | TASK_INTERRUPTIBLE); | 242 | TASK_INTERRUPTIBLE); |
| 239 | release_sock(sk); | 243 | release_sock(sk); |
| 240 | if (reqsk_queue_empty(&icsk->icsk_accept_queue)) | 244 | if (reqsk_queue_empty(&icsk->icsk_accept_queue)) |
| @@ -253,7 +257,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) | |||
| 253 | if (!timeo) | 257 | if (!timeo) |
| 254 | break; | 258 | break; |
| 255 | } | 259 | } |
| 256 | finish_wait(sk->sk_sleep, &wait); | 260 | finish_wait(sk_sleep(sk), &wait); |
| 257 | return err; | 261 | return err; |
| 258 | } | 262 | } |
| 259 | 263 | ||
| @@ -299,7 +303,6 @@ out_err: | |||
| 299 | *err = error; | 303 | *err = error; |
| 300 | goto out; | 304 | goto out; |
| 301 | } | 305 | } |
| 302 | |||
| 303 | EXPORT_SYMBOL(inet_csk_accept); | 306 | EXPORT_SYMBOL(inet_csk_accept); |
| 304 | 307 | ||
| 305 | /* | 308 | /* |
| @@ -321,7 +324,6 @@ void inet_csk_init_xmit_timers(struct sock *sk, | |||
| 321 | setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk); | 324 | setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk); |
| 322 | icsk->icsk_pending = icsk->icsk_ack.pending = 0; | 325 | icsk->icsk_pending = icsk->icsk_ack.pending = 0; |
| 323 | } | 326 | } |
| 324 | |||
| 325 | EXPORT_SYMBOL(inet_csk_init_xmit_timers); | 327 | EXPORT_SYMBOL(inet_csk_init_xmit_timers); |
| 326 | 328 | ||
| 327 | void inet_csk_clear_xmit_timers(struct sock *sk) | 329 | void inet_csk_clear_xmit_timers(struct sock *sk) |
| @@ -334,21 +336,18 @@ void inet_csk_clear_xmit_timers(struct sock *sk) | |||
| 334 | sk_stop_timer(sk, &icsk->icsk_delack_timer); | 336 | sk_stop_timer(sk, &icsk->icsk_delack_timer); |
| 335 | sk_stop_timer(sk, &sk->sk_timer); | 337 | sk_stop_timer(sk, &sk->sk_timer); |
| 336 | } | 338 | } |
| 337 | |||
| 338 | EXPORT_SYMBOL(inet_csk_clear_xmit_timers); | 339 | EXPORT_SYMBOL(inet_csk_clear_xmit_timers); |
| 339 | 340 | ||
| 340 | void inet_csk_delete_keepalive_timer(struct sock *sk) | 341 | void inet_csk_delete_keepalive_timer(struct sock *sk) |
| 341 | { | 342 | { |
| 342 | sk_stop_timer(sk, &sk->sk_timer); | 343 | sk_stop_timer(sk, &sk->sk_timer); |
| 343 | } | 344 | } |
| 344 | |||
| 345 | EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); | 345 | EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); |
| 346 | 346 | ||
| 347 | void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) | 347 | void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) |
| 348 | { | 348 | { |
| 349 | sk_reset_timer(sk, &sk->sk_timer, jiffies + len); | 349 | sk_reset_timer(sk, &sk->sk_timer, jiffies + len); |
| 350 | } | 350 | } |
| 351 | |||
| 352 | EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); | 351 | EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); |
| 353 | 352 | ||
| 354 | struct dst_entry *inet_csk_route_req(struct sock *sk, | 353 | struct dst_entry *inet_csk_route_req(struct sock *sk, |
| @@ -377,7 +376,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, | |||
| 377 | goto no_route; | 376 | goto no_route; |
| 378 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) | 377 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) |
| 379 | goto route_err; | 378 | goto route_err; |
| 380 | return &rt->u.dst; | 379 | return &rt->dst; |
| 381 | 380 | ||
| 382 | route_err: | 381 | route_err: |
| 383 | ip_rt_put(rt); | 382 | ip_rt_put(rt); |
| @@ -385,7 +384,6 @@ no_route: | |||
| 385 | IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); | 384 | IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); |
| 386 | return NULL; | 385 | return NULL; |
| 387 | } | 386 | } |
| 388 | |||
| 389 | EXPORT_SYMBOL_GPL(inet_csk_route_req); | 387 | EXPORT_SYMBOL_GPL(inet_csk_route_req); |
| 390 | 388 | ||
| 391 | static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, | 389 | static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, |
| @@ -427,7 +425,6 @@ struct request_sock *inet_csk_search_req(const struct sock *sk, | |||
| 427 | 425 | ||
| 428 | return req; | 426 | return req; |
| 429 | } | 427 | } |
| 430 | |||
| 431 | EXPORT_SYMBOL_GPL(inet_csk_search_req); | 428 | EXPORT_SYMBOL_GPL(inet_csk_search_req); |
| 432 | 429 | ||
| 433 | void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, | 430 | void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, |
| @@ -441,11 +438,11 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, | |||
| 441 | reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); | 438 | reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); |
| 442 | inet_csk_reqsk_queue_added(sk, timeout); | 439 | inet_csk_reqsk_queue_added(sk, timeout); |
| 443 | } | 440 | } |
| 441 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); | ||
| 444 | 442 | ||
| 445 | /* Only thing we need from tcp.h */ | 443 | /* Only thing we need from tcp.h */ |
| 446 | extern int sysctl_tcp_synack_retries; | 444 | extern int sysctl_tcp_synack_retries; |
| 447 | 445 | ||
| 448 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); | ||
| 449 | 446 | ||
| 450 | /* Decide when to expire the request and when to resend SYN-ACK */ | 447 | /* Decide when to expire the request and when to resend SYN-ACK */ |
| 451 | static inline void syn_ack_recalc(struct request_sock *req, const int thresh, | 448 | static inline void syn_ack_recalc(struct request_sock *req, const int thresh, |
| @@ -529,6 +526,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, | |||
| 529 | syn_ack_recalc(req, thresh, max_retries, | 526 | syn_ack_recalc(req, thresh, max_retries, |
| 530 | queue->rskq_defer_accept, | 527 | queue->rskq_defer_accept, |
| 531 | &expire, &resend); | 528 | &expire, &resend); |
| 529 | if (req->rsk_ops->syn_ack_timeout) | ||
| 530 | req->rsk_ops->syn_ack_timeout(parent, req); | ||
| 532 | if (!expire && | 531 | if (!expire && |
| 533 | (!resend || | 532 | (!resend || |
| 534 | !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || | 533 | !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || |
| @@ -561,7 +560,6 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, | |||
| 561 | if (lopt->qlen) | 560 | if (lopt->qlen) |
| 562 | inet_csk_reset_keepalive_timer(parent, interval); | 561 | inet_csk_reset_keepalive_timer(parent, interval); |
| 563 | } | 562 | } |
| 564 | |||
| 565 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); | 563 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); |
| 566 | 564 | ||
| 567 | struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, | 565 | struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, |
| @@ -591,7 +589,6 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, | |||
| 591 | } | 589 | } |
| 592 | return newsk; | 590 | return newsk; |
| 593 | } | 591 | } |
| 594 | |||
| 595 | EXPORT_SYMBOL_GPL(inet_csk_clone); | 592 | EXPORT_SYMBOL_GPL(inet_csk_clone); |
| 596 | 593 | ||
| 597 | /* | 594 | /* |
| @@ -622,7 +619,6 @@ void inet_csk_destroy_sock(struct sock *sk) | |||
| 622 | percpu_counter_dec(sk->sk_prot->orphan_count); | 619 | percpu_counter_dec(sk->sk_prot->orphan_count); |
| 623 | sock_put(sk); | 620 | sock_put(sk); |
| 624 | } | 621 | } |
| 625 | |||
| 626 | EXPORT_SYMBOL(inet_csk_destroy_sock); | 622 | EXPORT_SYMBOL(inet_csk_destroy_sock); |
| 627 | 623 | ||
| 628 | int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) | 624 | int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) |
| @@ -657,7 +653,6 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) | |||
| 657 | __reqsk_queue_destroy(&icsk->icsk_accept_queue); | 653 | __reqsk_queue_destroy(&icsk->icsk_accept_queue); |
| 658 | return -EADDRINUSE; | 654 | return -EADDRINUSE; |
| 659 | } | 655 | } |
| 660 | |||
| 661 | EXPORT_SYMBOL_GPL(inet_csk_listen_start); | 656 | EXPORT_SYMBOL_GPL(inet_csk_listen_start); |
| 662 | 657 | ||
| 663 | /* | 658 | /* |
| @@ -712,7 +707,6 @@ void inet_csk_listen_stop(struct sock *sk) | |||
| 712 | } | 707 | } |
| 713 | WARN_ON(sk->sk_ack_backlog); | 708 | WARN_ON(sk->sk_ack_backlog); |
| 714 | } | 709 | } |
| 715 | |||
| 716 | EXPORT_SYMBOL_GPL(inet_csk_listen_stop); | 710 | EXPORT_SYMBOL_GPL(inet_csk_listen_stop); |
| 717 | 711 | ||
| 718 | void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) | 712 | void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) |
| @@ -724,7 +718,6 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) | |||
| 724 | sin->sin_addr.s_addr = inet->inet_daddr; | 718 | sin->sin_addr.s_addr = inet->inet_daddr; |
| 725 | sin->sin_port = inet->inet_dport; | 719 | sin->sin_port = inet->inet_dport; |
| 726 | } | 720 | } |
| 727 | |||
| 728 | EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); | 721 | EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); |
| 729 | 722 | ||
| 730 | #ifdef CONFIG_COMPAT | 723 | #ifdef CONFIG_COMPAT |
| @@ -739,7 +732,6 @@ int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, | |||
| 739 | return icsk->icsk_af_ops->getsockopt(sk, level, optname, | 732 | return icsk->icsk_af_ops->getsockopt(sk, level, optname, |
| 740 | optval, optlen); | 733 | optval, optlen); |
| 741 | } | 734 | } |
| 742 | |||
| 743 | EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt); | 735 | EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt); |
| 744 | 736 | ||
| 745 | int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, | 737 | int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, |
| @@ -753,6 +745,5 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, | |||
| 753 | return icsk->icsk_af_ops->setsockopt(sk, level, optname, | 745 | return icsk->icsk_af_ops->setsockopt(sk, level, optname, |
| 754 | optval, optlen); | 746 | optval, optlen); |
| 755 | } | 747 | } |
| 756 | |||
| 757 | EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); | 748 | EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); |
| 758 | #endif | 749 | #endif |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 1aaa8110d84b..e5fa2ddce320 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <linux/types.h> | 14 | #include <linux/types.h> |
| 15 | #include <linux/fcntl.h> | 15 | #include <linux/fcntl.h> |
| 16 | #include <linux/random.h> | 16 | #include <linux/random.h> |
| 17 | #include <linux/slab.h> | ||
| 17 | #include <linux/cache.h> | 18 | #include <linux/cache.h> |
| 18 | #include <linux/init.h> | 19 | #include <linux/init.h> |
| 19 | #include <linux/time.h> | 20 | #include <linux/time.h> |
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index eaf3e2c8646a..5ff2a51b6d0c 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include <linux/random.h> | 19 | #include <linux/random.h> |
| 20 | #include <linux/skbuff.h> | 20 | #include <linux/skbuff.h> |
| 21 | #include <linux/rtnetlink.h> | 21 | #include <linux/rtnetlink.h> |
| 22 | #include <linux/slab.h> | ||
| 22 | 23 | ||
| 23 | #include <net/inet_frag.h> | 24 | #include <net/inet_frag.h> |
| 24 | 25 | ||
| @@ -113,7 +114,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) | |||
| 113 | fq->last_in |= INET_FRAG_COMPLETE; | 114 | fq->last_in |= INET_FRAG_COMPLETE; |
| 114 | } | 115 | } |
| 115 | } | 116 | } |
| 116 | |||
| 117 | EXPORT_SYMBOL(inet_frag_kill); | 117 | EXPORT_SYMBOL(inet_frag_kill); |
| 118 | 118 | ||
| 119 | static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, | 119 | static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, |
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 2b79377b468d..fb7ad5a21ff3 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
| @@ -99,7 +99,6 @@ void inet_put_port(struct sock *sk) | |||
| 99 | __inet_put_port(sk); | 99 | __inet_put_port(sk); |
| 100 | local_bh_enable(); | 100 | local_bh_enable(); |
| 101 | } | 101 | } |
| 102 | |||
| 103 | EXPORT_SYMBOL(inet_put_port); | 102 | EXPORT_SYMBOL(inet_put_port); |
| 104 | 103 | ||
| 105 | void __inet_inherit_port(struct sock *sk, struct sock *child) | 104 | void __inet_inherit_port(struct sock *sk, struct sock *child) |
| @@ -116,7 +115,6 @@ void __inet_inherit_port(struct sock *sk, struct sock *child) | |||
| 116 | inet_csk(child)->icsk_bind_hash = tb; | 115 | inet_csk(child)->icsk_bind_hash = tb; |
| 117 | spin_unlock(&head->lock); | 116 | spin_unlock(&head->lock); |
| 118 | } | 117 | } |
| 119 | |||
| 120 | EXPORT_SYMBOL_GPL(__inet_inherit_port); | 118 | EXPORT_SYMBOL_GPL(__inet_inherit_port); |
| 121 | 119 | ||
| 122 | static inline int compute_score(struct sock *sk, struct net *net, | 120 | static inline int compute_score(struct sock *sk, struct net *net, |
| @@ -456,6 +454,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, | |||
| 456 | local_bh_disable(); | 454 | local_bh_disable(); |
| 457 | for (i = 1; i <= remaining; i++) { | 455 | for (i = 1; i <= remaining; i++) { |
| 458 | port = low + (i + offset) % remaining; | 456 | port = low + (i + offset) % remaining; |
| 457 | if (inet_is_reserved_local_port(port)) | ||
| 458 | continue; | ||
| 459 | head = &hinfo->bhash[inet_bhashfn(net, port, | 459 | head = &hinfo->bhash[inet_bhashfn(net, port, |
| 460 | hinfo->bhash_size)]; | 460 | hinfo->bhash_size)]; |
| 461 | spin_lock(&head->lock); | 461 | spin_lock(&head->lock); |
| @@ -544,7 +544,6 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, | |||
| 544 | return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk), | 544 | return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk), |
| 545 | __inet_check_established, __inet_hash_nolisten); | 545 | __inet_check_established, __inet_hash_nolisten); |
| 546 | } | 546 | } |
| 547 | |||
| 548 | EXPORT_SYMBOL_GPL(inet_hash_connect); | 547 | EXPORT_SYMBOL_GPL(inet_hash_connect); |
| 549 | 548 | ||
| 550 | void inet_hashinfo_init(struct inet_hashinfo *h) | 549 | void inet_hashinfo_init(struct inet_hashinfo *h) |
| @@ -558,5 +557,4 @@ void inet_hashinfo_init(struct inet_hashinfo *h) | |||
| 558 | i + LISTENING_NULLS_BASE); | 557 | i + LISTENING_NULLS_BASE); |
| 559 | } | 558 | } |
| 560 | } | 559 | } |
| 561 | |||
| 562 | EXPORT_SYMBOL_GPL(inet_hashinfo_init); | 560 | EXPORT_SYMBOL_GPL(inet_hashinfo_init); |
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index cc94cc2d8b2d..c5af909cf701 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | 10 | ||
| 11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
| 12 | #include <linux/kmemcheck.h> | 12 | #include <linux/kmemcheck.h> |
| 13 | #include <linux/slab.h> | ||
| 13 | #include <net/inet_hashtables.h> | 14 | #include <net/inet_hashtables.h> |
| 14 | #include <net/inet_timewait_sock.h> | 15 | #include <net/inet_timewait_sock.h> |
| 15 | #include <net/ip.h> | 16 | #include <net/ip.h> |
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 6bcfe52a9c87..9ffa24b9a804 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
| @@ -51,8 +51,8 @@ | |||
| 51 | * lookups performed with disabled BHs. | 51 | * lookups performed with disabled BHs. |
| 52 | * | 52 | * |
| 53 | * Serialisation issues. | 53 | * Serialisation issues. |
| 54 | * 1. Nodes may appear in the tree only with the pool write lock held. | 54 | * 1. Nodes may appear in the tree only with the pool lock held. |
| 55 | * 2. Nodes may disappear from the tree only with the pool write lock held | 55 | * 2. Nodes may disappear from the tree only with the pool lock held |
| 56 | * AND reference count being 0. | 56 | * AND reference count being 0. |
| 57 | * 3. Nodes appears and disappears from unused node list only under | 57 | * 3. Nodes appears and disappears from unused node list only under |
| 58 | * "inet_peer_unused_lock". | 58 | * "inet_peer_unused_lock". |
| @@ -64,23 +64,31 @@ | |||
| 64 | * usually under some other lock to prevent node disappearing | 64 | * usually under some other lock to prevent node disappearing |
| 65 | * dtime: unused node list lock | 65 | * dtime: unused node list lock |
| 66 | * v4daddr: unchangeable | 66 | * v4daddr: unchangeable |
| 67 | * ip_id_count: idlock | 67 | * ip_id_count: atomic value (no lock needed) |
| 68 | */ | 68 | */ |
| 69 | 69 | ||
| 70 | static struct kmem_cache *peer_cachep __read_mostly; | 70 | static struct kmem_cache *peer_cachep __read_mostly; |
| 71 | 71 | ||
| 72 | #define node_height(x) x->avl_height | 72 | #define node_height(x) x->avl_height |
| 73 | static struct inet_peer peer_fake_node = { | 73 | |
| 74 | .avl_left = &peer_fake_node, | 74 | #define peer_avl_empty ((struct inet_peer *)&peer_fake_node) |
| 75 | .avl_right = &peer_fake_node, | 75 | static const struct inet_peer peer_fake_node = { |
| 76 | .avl_left = peer_avl_empty, | ||
| 77 | .avl_right = peer_avl_empty, | ||
| 76 | .avl_height = 0 | 78 | .avl_height = 0 |
| 77 | }; | 79 | }; |
| 78 | #define peer_avl_empty (&peer_fake_node) | 80 | |
| 79 | static struct inet_peer *peer_root = peer_avl_empty; | 81 | static struct { |
| 80 | static DEFINE_RWLOCK(peer_pool_lock); | 82 | struct inet_peer *root; |
| 83 | spinlock_t lock; | ||
| 84 | int total; | ||
| 85 | } peers = { | ||
| 86 | .root = peer_avl_empty, | ||
| 87 | .lock = __SPIN_LOCK_UNLOCKED(peers.lock), | ||
| 88 | .total = 0, | ||
| 89 | }; | ||
| 81 | #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ | 90 | #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ |
| 82 | 91 | ||
| 83 | static int peer_total; | ||
| 84 | /* Exported for sysctl_net_ipv4. */ | 92 | /* Exported for sysctl_net_ipv4. */ |
| 85 | int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more | 93 | int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more |
| 86 | * aggressively at this stage */ | 94 | * aggressively at this stage */ |
| @@ -89,8 +97,13 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min | |||
| 89 | int inet_peer_gc_mintime __read_mostly = 10 * HZ; | 97 | int inet_peer_gc_mintime __read_mostly = 10 * HZ; |
| 90 | int inet_peer_gc_maxtime __read_mostly = 120 * HZ; | 98 | int inet_peer_gc_maxtime __read_mostly = 120 * HZ; |
| 91 | 99 | ||
| 92 | static LIST_HEAD(unused_peers); | 100 | static struct { |
| 93 | static DEFINE_SPINLOCK(inet_peer_unused_lock); | 101 | struct list_head list; |
| 102 | spinlock_t lock; | ||
| 103 | } unused_peers = { | ||
| 104 | .list = LIST_HEAD_INIT(unused_peers.list), | ||
| 105 | .lock = __SPIN_LOCK_UNLOCKED(unused_peers.lock), | ||
| 106 | }; | ||
| 94 | 107 | ||
| 95 | static void peer_check_expire(unsigned long dummy); | 108 | static void peer_check_expire(unsigned long dummy); |
| 96 | static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); | 109 | static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); |
| @@ -116,7 +129,7 @@ void __init inet_initpeers(void) | |||
| 116 | 129 | ||
| 117 | peer_cachep = kmem_cache_create("inet_peer_cache", | 130 | peer_cachep = kmem_cache_create("inet_peer_cache", |
| 118 | sizeof(struct inet_peer), | 131 | sizeof(struct inet_peer), |
| 119 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, | 132 | 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, |
| 120 | NULL); | 133 | NULL); |
| 121 | 134 | ||
| 122 | /* All the timers, started at system startup tend | 135 | /* All the timers, started at system startup tend |
| @@ -131,38 +144,69 @@ void __init inet_initpeers(void) | |||
| 131 | /* Called with or without local BH being disabled. */ | 144 | /* Called with or without local BH being disabled. */ |
| 132 | static void unlink_from_unused(struct inet_peer *p) | 145 | static void unlink_from_unused(struct inet_peer *p) |
| 133 | { | 146 | { |
| 134 | spin_lock_bh(&inet_peer_unused_lock); | 147 | if (!list_empty(&p->unused)) { |
| 135 | list_del_init(&p->unused); | 148 | spin_lock_bh(&unused_peers.lock); |
| 136 | spin_unlock_bh(&inet_peer_unused_lock); | 149 | list_del_init(&p->unused); |
| 150 | spin_unlock_bh(&unused_peers.lock); | ||
| 151 | } | ||
| 137 | } | 152 | } |
| 138 | 153 | ||
| 139 | /* | 154 | /* |
| 140 | * Called with local BH disabled and the pool lock held. | 155 | * Called with local BH disabled and the pool lock held. |
| 141 | * _stack is known to be NULL or not at compile time, | ||
| 142 | * so compiler will optimize the if (_stack) tests. | ||
| 143 | */ | 156 | */ |
| 144 | #define lookup(_daddr, _stack) \ | 157 | #define lookup(_daddr, _stack) \ |
| 145 | ({ \ | 158 | ({ \ |
| 146 | struct inet_peer *u, **v; \ | 159 | struct inet_peer *u, **v; \ |
| 147 | if (_stack != NULL) { \ | 160 | \ |
| 148 | stackptr = _stack; \ | 161 | stackptr = _stack; \ |
| 149 | *stackptr++ = &peer_root; \ | 162 | *stackptr++ = &peers.root; \ |
| 150 | } \ | 163 | for (u = peers.root; u != peer_avl_empty; ) { \ |
| 151 | for (u = peer_root; u != peer_avl_empty; ) { \ | ||
| 152 | if (_daddr == u->v4daddr) \ | 164 | if (_daddr == u->v4daddr) \ |
| 153 | break; \ | 165 | break; \ |
| 154 | if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ | 166 | if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ |
| 155 | v = &u->avl_left; \ | 167 | v = &u->avl_left; \ |
| 156 | else \ | 168 | else \ |
| 157 | v = &u->avl_right; \ | 169 | v = &u->avl_right; \ |
| 158 | if (_stack != NULL) \ | 170 | *stackptr++ = v; \ |
| 159 | *stackptr++ = v; \ | ||
| 160 | u = *v; \ | 171 | u = *v; \ |
| 161 | } \ | 172 | } \ |
| 162 | u; \ | 173 | u; \ |
| 163 | }) | 174 | }) |
| 164 | 175 | ||
| 165 | /* Called with local BH disabled and the pool write lock held. */ | 176 | /* |
| 177 | * Called with rcu_read_lock_bh() | ||
| 178 | * Because we hold no lock against a writer, its quite possible we fall | ||
| 179 | * in an endless loop. | ||
| 180 | * But every pointer we follow is guaranteed to be valid thanks to RCU. | ||
| 181 | * We exit from this function if number of links exceeds PEER_MAXDEPTH | ||
| 182 | */ | ||
| 183 | static struct inet_peer *lookup_rcu_bh(__be32 daddr) | ||
| 184 | { | ||
| 185 | struct inet_peer *u = rcu_dereference_bh(peers.root); | ||
| 186 | int count = 0; | ||
| 187 | |||
| 188 | while (u != peer_avl_empty) { | ||
| 189 | if (daddr == u->v4daddr) { | ||
| 190 | /* Before taking a reference, check if this entry was | ||
| 191 | * deleted, unlink_from_pool() sets refcnt=-1 to make | ||
| 192 | * distinction between an unused entry (refcnt=0) and | ||
| 193 | * a freed one. | ||
| 194 | */ | ||
| 195 | if (unlikely(!atomic_add_unless(&u->refcnt, 1, -1))) | ||
| 196 | u = NULL; | ||
| 197 | return u; | ||
| 198 | } | ||
| 199 | if ((__force __u32)daddr < (__force __u32)u->v4daddr) | ||
| 200 | u = rcu_dereference_bh(u->avl_left); | ||
| 201 | else | ||
| 202 | u = rcu_dereference_bh(u->avl_right); | ||
| 203 | if (unlikely(++count == PEER_MAXDEPTH)) | ||
| 204 | break; | ||
| 205 | } | ||
| 206 | return NULL; | ||
| 207 | } | ||
| 208 | |||
| 209 | /* Called with local BH disabled and the pool lock held. */ | ||
| 166 | #define lookup_rightempty(start) \ | 210 | #define lookup_rightempty(start) \ |
| 167 | ({ \ | 211 | ({ \ |
| 168 | struct inet_peer *u, **v; \ | 212 | struct inet_peer *u, **v; \ |
| @@ -176,9 +220,10 @@ static void unlink_from_unused(struct inet_peer *p) | |||
| 176 | u; \ | 220 | u; \ |
| 177 | }) | 221 | }) |
| 178 | 222 | ||
| 179 | /* Called with local BH disabled and the pool write lock held. | 223 | /* Called with local BH disabled and the pool lock held. |
| 180 | * Variable names are the proof of operation correctness. | 224 | * Variable names are the proof of operation correctness. |
| 181 | * Look into mm/map_avl.c for more detail description of the ideas. */ | 225 | * Look into mm/map_avl.c for more detail description of the ideas. |
| 226 | */ | ||
| 182 | static void peer_avl_rebalance(struct inet_peer **stack[], | 227 | static void peer_avl_rebalance(struct inet_peer **stack[], |
| 183 | struct inet_peer ***stackend) | 228 | struct inet_peer ***stackend) |
| 184 | { | 229 | { |
| @@ -254,15 +299,21 @@ static void peer_avl_rebalance(struct inet_peer **stack[], | |||
| 254 | } | 299 | } |
| 255 | } | 300 | } |
| 256 | 301 | ||
| 257 | /* Called with local BH disabled and the pool write lock held. */ | 302 | /* Called with local BH disabled and the pool lock held. */ |
| 258 | #define link_to_pool(n) \ | 303 | #define link_to_pool(n) \ |
| 259 | do { \ | 304 | do { \ |
| 260 | n->avl_height = 1; \ | 305 | n->avl_height = 1; \ |
| 261 | n->avl_left = peer_avl_empty; \ | 306 | n->avl_left = peer_avl_empty; \ |
| 262 | n->avl_right = peer_avl_empty; \ | 307 | n->avl_right = peer_avl_empty; \ |
| 308 | smp_wmb(); /* lockless readers can catch us now */ \ | ||
| 263 | **--stackptr = n; \ | 309 | **--stackptr = n; \ |
| 264 | peer_avl_rebalance(stack, stackptr); \ | 310 | peer_avl_rebalance(stack, stackptr); \ |
| 265 | } while(0) | 311 | } while (0) |
| 312 | |||
| 313 | static void inetpeer_free_rcu(struct rcu_head *head) | ||
| 314 | { | ||
| 315 | kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu)); | ||
| 316 | } | ||
| 266 | 317 | ||
| 267 | /* May be called with local BH enabled. */ | 318 | /* May be called with local BH enabled. */ |
| 268 | static void unlink_from_pool(struct inet_peer *p) | 319 | static void unlink_from_pool(struct inet_peer *p) |
| @@ -271,13 +322,14 @@ static void unlink_from_pool(struct inet_peer *p) | |||
| 271 | 322 | ||
| 272 | do_free = 0; | 323 | do_free = 0; |
| 273 | 324 | ||
| 274 | write_lock_bh(&peer_pool_lock); | 325 | spin_lock_bh(&peers.lock); |
| 275 | /* Check the reference counter. It was artificially incremented by 1 | 326 | /* Check the reference counter. It was artificially incremented by 1 |
| 276 | * in cleanup() function to prevent sudden disappearing. If the | 327 | * in cleanup() function to prevent sudden disappearing. If we can |
| 277 | * reference count is still 1 then the node is referenced only as `p' | 328 | * atomically (because of lockless readers) take this last reference, |
| 278 | * here and from the pool. So under the exclusive pool lock it's safe | 329 | * it's safe to remove the node and free it later. |
| 279 | * to remove the node and free it later. */ | 330 | * We use refcnt=-1 to alert lockless readers this entry is deleted. |
| 280 | if (atomic_read(&p->refcnt) == 1) { | 331 | */ |
| 332 | if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { | ||
| 281 | struct inet_peer **stack[PEER_MAXDEPTH]; | 333 | struct inet_peer **stack[PEER_MAXDEPTH]; |
| 282 | struct inet_peer ***stackptr, ***delp; | 334 | struct inet_peer ***stackptr, ***delp; |
| 283 | if (lookup(p->v4daddr, stack) != p) | 335 | if (lookup(p->v4daddr, stack) != p) |
| @@ -303,20 +355,21 @@ static void unlink_from_pool(struct inet_peer *p) | |||
| 303 | delp[1] = &t->avl_left; /* was &p->avl_left */ | 355 | delp[1] = &t->avl_left; /* was &p->avl_left */ |
| 304 | } | 356 | } |
| 305 | peer_avl_rebalance(stack, stackptr); | 357 | peer_avl_rebalance(stack, stackptr); |
| 306 | peer_total--; | 358 | peers.total--; |
| 307 | do_free = 1; | 359 | do_free = 1; |
| 308 | } | 360 | } |
| 309 | write_unlock_bh(&peer_pool_lock); | 361 | spin_unlock_bh(&peers.lock); |
| 310 | 362 | ||
| 311 | if (do_free) | 363 | if (do_free) |
| 312 | kmem_cache_free(peer_cachep, p); | 364 | call_rcu_bh(&p->rcu, inetpeer_free_rcu); |
| 313 | else | 365 | else |
| 314 | /* The node is used again. Decrease the reference counter | 366 | /* The node is used again. Decrease the reference counter |
| 315 | * back. The loop "cleanup -> unlink_from_unused | 367 | * back. The loop "cleanup -> unlink_from_unused |
| 316 | * -> unlink_from_pool -> putpeer -> link_to_unused | 368 | * -> unlink_from_pool -> putpeer -> link_to_unused |
| 317 | * -> cleanup (for the same node)" | 369 | * -> cleanup (for the same node)" |
| 318 | * doesn't really exist because the entry will have a | 370 | * doesn't really exist because the entry will have a |
| 319 | * recent deletion time and will not be cleaned again soon. */ | 371 | * recent deletion time and will not be cleaned again soon. |
| 372 | */ | ||
| 320 | inet_putpeer(p); | 373 | inet_putpeer(p); |
| 321 | } | 374 | } |
| 322 | 375 | ||
| @@ -326,16 +379,16 @@ static int cleanup_once(unsigned long ttl) | |||
| 326 | struct inet_peer *p = NULL; | 379 | struct inet_peer *p = NULL; |
| 327 | 380 | ||
| 328 | /* Remove the first entry from the list of unused nodes. */ | 381 | /* Remove the first entry from the list of unused nodes. */ |
| 329 | spin_lock_bh(&inet_peer_unused_lock); | 382 | spin_lock_bh(&unused_peers.lock); |
| 330 | if (!list_empty(&unused_peers)) { | 383 | if (!list_empty(&unused_peers.list)) { |
| 331 | __u32 delta; | 384 | __u32 delta; |
| 332 | 385 | ||
| 333 | p = list_first_entry(&unused_peers, struct inet_peer, unused); | 386 | p = list_first_entry(&unused_peers.list, struct inet_peer, unused); |
| 334 | delta = (__u32)jiffies - p->dtime; | 387 | delta = (__u32)jiffies - p->dtime; |
| 335 | 388 | ||
| 336 | if (delta < ttl) { | 389 | if (delta < ttl) { |
| 337 | /* Do not prune fresh entries. */ | 390 | /* Do not prune fresh entries. */ |
| 338 | spin_unlock_bh(&inet_peer_unused_lock); | 391 | spin_unlock_bh(&unused_peers.lock); |
| 339 | return -1; | 392 | return -1; |
| 340 | } | 393 | } |
| 341 | 394 | ||
| @@ -345,7 +398,7 @@ static int cleanup_once(unsigned long ttl) | |||
| 345 | * before unlink_from_pool() call. */ | 398 | * before unlink_from_pool() call. */ |
| 346 | atomic_inc(&p->refcnt); | 399 | atomic_inc(&p->refcnt); |
| 347 | } | 400 | } |
| 348 | spin_unlock_bh(&inet_peer_unused_lock); | 401 | spin_unlock_bh(&unused_peers.lock); |
| 349 | 402 | ||
| 350 | if (p == NULL) | 403 | if (p == NULL) |
| 351 | /* It means that the total number of USED entries has | 404 | /* It means that the total number of USED entries has |
| @@ -360,62 +413,56 @@ static int cleanup_once(unsigned long ttl) | |||
| 360 | /* Called with or without local BH being disabled. */ | 413 | /* Called with or without local BH being disabled. */ |
| 361 | struct inet_peer *inet_getpeer(__be32 daddr, int create) | 414 | struct inet_peer *inet_getpeer(__be32 daddr, int create) |
| 362 | { | 415 | { |
| 363 | struct inet_peer *p, *n; | 416 | struct inet_peer *p; |
| 364 | struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; | 417 | struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; |
| 365 | 418 | ||
| 366 | /* Look up for the address quickly. */ | 419 | /* Look up for the address quickly, lockless. |
| 367 | read_lock_bh(&peer_pool_lock); | 420 | * Because of a concurrent writer, we might not find an existing entry. |
| 368 | p = lookup(daddr, NULL); | 421 | */ |
| 369 | if (p != peer_avl_empty) | 422 | rcu_read_lock_bh(); |
| 370 | atomic_inc(&p->refcnt); | 423 | p = lookup_rcu_bh(daddr); |
| 371 | read_unlock_bh(&peer_pool_lock); | 424 | rcu_read_unlock_bh(); |
| 425 | |||
| 426 | if (p) { | ||
| 427 | /* The existing node has been found. | ||
| 428 | * Remove the entry from unused list if it was there. | ||
| 429 | */ | ||
| 430 | unlink_from_unused(p); | ||
| 431 | return p; | ||
| 432 | } | ||
| 372 | 433 | ||
| 434 | /* retry an exact lookup, taking the lock before. | ||
| 435 | * At least, nodes should be hot in our cache. | ||
| 436 | */ | ||
| 437 | spin_lock_bh(&peers.lock); | ||
| 438 | p = lookup(daddr, stack); | ||
| 373 | if (p != peer_avl_empty) { | 439 | if (p != peer_avl_empty) { |
| 374 | /* The existing node has been found. */ | 440 | atomic_inc(&p->refcnt); |
| 441 | spin_unlock_bh(&peers.lock); | ||
| 375 | /* Remove the entry from unused list if it was there. */ | 442 | /* Remove the entry from unused list if it was there. */ |
| 376 | unlink_from_unused(p); | 443 | unlink_from_unused(p); |
| 377 | return p; | 444 | return p; |
| 378 | } | 445 | } |
| 446 | p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; | ||
| 447 | if (p) { | ||
| 448 | p->v4daddr = daddr; | ||
| 449 | atomic_set(&p->refcnt, 1); | ||
| 450 | atomic_set(&p->rid, 0); | ||
| 451 | atomic_set(&p->ip_id_count, secure_ip_id(daddr)); | ||
| 452 | p->tcp_ts_stamp = 0; | ||
| 453 | INIT_LIST_HEAD(&p->unused); | ||
| 454 | |||
| 455 | |||
| 456 | /* Link the node. */ | ||
| 457 | link_to_pool(p); | ||
| 458 | peers.total++; | ||
| 459 | } | ||
| 460 | spin_unlock_bh(&peers.lock); | ||
| 379 | 461 | ||
| 380 | if (!create) | 462 | if (peers.total >= inet_peer_threshold) |
| 381 | return NULL; | ||
| 382 | |||
| 383 | /* Allocate the space outside the locked region. */ | ||
| 384 | n = kmem_cache_alloc(peer_cachep, GFP_ATOMIC); | ||
| 385 | if (n == NULL) | ||
| 386 | return NULL; | ||
| 387 | n->v4daddr = daddr; | ||
| 388 | atomic_set(&n->refcnt, 1); | ||
| 389 | atomic_set(&n->rid, 0); | ||
| 390 | atomic_set(&n->ip_id_count, secure_ip_id(daddr)); | ||
| 391 | n->tcp_ts_stamp = 0; | ||
| 392 | |||
| 393 | write_lock_bh(&peer_pool_lock); | ||
| 394 | /* Check if an entry has suddenly appeared. */ | ||
| 395 | p = lookup(daddr, stack); | ||
| 396 | if (p != peer_avl_empty) | ||
| 397 | goto out_free; | ||
| 398 | |||
| 399 | /* Link the node. */ | ||
| 400 | link_to_pool(n); | ||
| 401 | INIT_LIST_HEAD(&n->unused); | ||
| 402 | peer_total++; | ||
| 403 | write_unlock_bh(&peer_pool_lock); | ||
| 404 | |||
| 405 | if (peer_total >= inet_peer_threshold) | ||
| 406 | /* Remove one less-recently-used entry. */ | 463 | /* Remove one less-recently-used entry. */ |
| 407 | cleanup_once(0); | 464 | cleanup_once(0); |
| 408 | 465 | ||
| 409 | return n; | ||
| 410 | |||
| 411 | out_free: | ||
| 412 | /* The appropriate node is already in the pool. */ | ||
| 413 | atomic_inc(&p->refcnt); | ||
| 414 | write_unlock_bh(&peer_pool_lock); | ||
| 415 | /* Remove the entry from unused list if it was there. */ | ||
| 416 | unlink_from_unused(p); | ||
| 417 | /* Free preallocated the preallocated node. */ | ||
| 418 | kmem_cache_free(peer_cachep, n); | ||
| 419 | return p; | 466 | return p; |
| 420 | } | 467 | } |
| 421 | 468 | ||
| @@ -425,12 +472,12 @@ static void peer_check_expire(unsigned long dummy) | |||
| 425 | unsigned long now = jiffies; | 472 | unsigned long now = jiffies; |
| 426 | int ttl; | 473 | int ttl; |
| 427 | 474 | ||
| 428 | if (peer_total >= inet_peer_threshold) | 475 | if (peers.total >= inet_peer_threshold) |
| 429 | ttl = inet_peer_minttl; | 476 | ttl = inet_peer_minttl; |
| 430 | else | 477 | else |
| 431 | ttl = inet_peer_maxttl | 478 | ttl = inet_peer_maxttl |
| 432 | - (inet_peer_maxttl - inet_peer_minttl) / HZ * | 479 | - (inet_peer_maxttl - inet_peer_minttl) / HZ * |
| 433 | peer_total / inet_peer_threshold * HZ; | 480 | peers.total / inet_peer_threshold * HZ; |
| 434 | while (!cleanup_once(ttl)) { | 481 | while (!cleanup_once(ttl)) { |
| 435 | if (jiffies != now) | 482 | if (jiffies != now) |
| 436 | break; | 483 | break; |
| @@ -439,22 +486,25 @@ static void peer_check_expire(unsigned long dummy) | |||
| 439 | /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime | 486 | /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime |
| 440 | * interval depending on the total number of entries (more entries, | 487 | * interval depending on the total number of entries (more entries, |
| 441 | * less interval). */ | 488 | * less interval). */ |
| 442 | if (peer_total >= inet_peer_threshold) | 489 | if (peers.total >= inet_peer_threshold) |
| 443 | peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; | 490 | peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; |
| 444 | else | 491 | else |
| 445 | peer_periodic_timer.expires = jiffies | 492 | peer_periodic_timer.expires = jiffies |
| 446 | + inet_peer_gc_maxtime | 493 | + inet_peer_gc_maxtime |
| 447 | - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * | 494 | - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * |
| 448 | peer_total / inet_peer_threshold * HZ; | 495 | peers.total / inet_peer_threshold * HZ; |
| 449 | add_timer(&peer_periodic_timer); | 496 | add_timer(&peer_periodic_timer); |
| 450 | } | 497 | } |
| 451 | 498 | ||
| 452 | void inet_putpeer(struct inet_peer *p) | 499 | void inet_putpeer(struct inet_peer *p) |
| 453 | { | 500 | { |
| 454 | spin_lock_bh(&inet_peer_unused_lock); | 501 | local_bh_disable(); |
| 455 | if (atomic_dec_and_test(&p->refcnt)) { | 502 | |
| 456 | list_add_tail(&p->unused, &unused_peers); | 503 | if (atomic_dec_and_lock(&p->refcnt, &unused_peers.lock)) { |
| 504 | list_add_tail(&p->unused, &unused_peers.list); | ||
| 457 | p->dtime = (__u32)jiffies; | 505 | p->dtime = (__u32)jiffies; |
| 506 | spin_unlock(&unused_peers.lock); | ||
| 458 | } | 507 | } |
| 459 | spin_unlock_bh(&inet_peer_unused_lock); | 508 | |
| 509 | local_bh_enable(); | ||
| 460 | } | 510 | } |
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index a2991bc8e32e..99461f09320f 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | #include <linux/ip.h> | 25 | #include <linux/ip.h> |
| 26 | #include <linux/icmp.h> | 26 | #include <linux/icmp.h> |
| 27 | #include <linux/netdevice.h> | 27 | #include <linux/netdevice.h> |
| 28 | #include <linux/slab.h> | ||
| 28 | #include <net/sock.h> | 29 | #include <net/sock.h> |
| 29 | #include <net/ip.h> | 30 | #include <net/ip.h> |
| 30 | #include <net/tcp.h> | 31 | #include <net/tcp.h> |
| @@ -86,16 +87,16 @@ int ip_forward(struct sk_buff *skb) | |||
| 86 | if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) | 87 | if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) |
| 87 | goto sr_failed; | 88 | goto sr_failed; |
| 88 | 89 | ||
| 89 | if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) && | 90 | if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && |
| 90 | (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { | 91 | (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { |
| 91 | IP_INC_STATS(dev_net(rt->u.dst.dev), IPSTATS_MIB_FRAGFAILS); | 92 | IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS); |
| 92 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, | 93 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, |
| 93 | htonl(dst_mtu(&rt->u.dst))); | 94 | htonl(dst_mtu(&rt->dst))); |
| 94 | goto drop; | 95 | goto drop; |
| 95 | } | 96 | } |
| 96 | 97 | ||
| 97 | /* We are about to mangle packet. Copy it! */ | 98 | /* We are about to mangle packet. Copy it! */ |
| 98 | if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len)) | 99 | if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+rt->dst.header_len)) |
| 99 | goto drop; | 100 | goto drop; |
| 100 | iph = ip_hdr(skb); | 101 | iph = ip_hdr(skb); |
| 101 | 102 | ||
| @@ -111,8 +112,8 @@ int ip_forward(struct sk_buff *skb) | |||
| 111 | 112 | ||
| 112 | skb->priority = rt_tos2priority(iph->tos); | 113 | skb->priority = rt_tos2priority(iph->tos); |
| 113 | 114 | ||
| 114 | return NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, rt->u.dst.dev, | 115 | return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, |
| 115 | ip_forward_finish); | 116 | rt->dst.dev, ip_forward_finish); |
| 116 | 117 | ||
| 117 | sr_failed: | 118 | sr_failed: |
| 118 | /* | 119 | /* |
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 86964b353c31..b7c41654dde5 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
| @@ -32,6 +32,9 @@ | |||
| 32 | #include <linux/netdevice.h> | 32 | #include <linux/netdevice.h> |
| 33 | #include <linux/jhash.h> | 33 | #include <linux/jhash.h> |
| 34 | #include <linux/random.h> | 34 | #include <linux/random.h> |
| 35 | #include <linux/slab.h> | ||
| 36 | #include <net/route.h> | ||
| 37 | #include <net/dst.h> | ||
| 35 | #include <net/sock.h> | 38 | #include <net/sock.h> |
| 36 | #include <net/ip.h> | 39 | #include <net/ip.h> |
| 37 | #include <net/icmp.h> | 40 | #include <net/icmp.h> |
| @@ -121,11 +124,8 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a) | |||
| 121 | } | 124 | } |
| 122 | 125 | ||
| 123 | /* Memory Tracking Functions. */ | 126 | /* Memory Tracking Functions. */ |
| 124 | static __inline__ void frag_kfree_skb(struct netns_frags *nf, | 127 | static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb) |
| 125 | struct sk_buff *skb, int *work) | ||
| 126 | { | 128 | { |
| 127 | if (work) | ||
| 128 | *work -= skb->truesize; | ||
| 129 | atomic_sub(skb->truesize, &nf->mem); | 129 | atomic_sub(skb->truesize, &nf->mem); |
| 130 | kfree_skb(skb); | 130 | kfree_skb(skb); |
| 131 | } | 131 | } |
| @@ -205,11 +205,34 @@ static void ip_expire(unsigned long arg) | |||
| 205 | if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { | 205 | if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { |
| 206 | struct sk_buff *head = qp->q.fragments; | 206 | struct sk_buff *head = qp->q.fragments; |
| 207 | 207 | ||
| 208 | /* Send an ICMP "Fragment Reassembly Timeout" message. */ | ||
| 209 | rcu_read_lock(); | 208 | rcu_read_lock(); |
| 210 | head->dev = dev_get_by_index_rcu(net, qp->iif); | 209 | head->dev = dev_get_by_index_rcu(net, qp->iif); |
| 211 | if (head->dev) | 210 | if (!head->dev) |
| 212 | icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); | 211 | goto out_rcu_unlock; |
| 212 | |||
| 213 | /* | ||
| 214 | * Only search router table for the head fragment, | ||
| 215 | * when defraging timeout at PRE_ROUTING HOOK. | ||
| 216 | */ | ||
| 217 | if (qp->user == IP_DEFRAG_CONNTRACK_IN && !skb_dst(head)) { | ||
| 218 | const struct iphdr *iph = ip_hdr(head); | ||
| 219 | int err = ip_route_input(head, iph->daddr, iph->saddr, | ||
| 220 | iph->tos, head->dev); | ||
| 221 | if (unlikely(err)) | ||
| 222 | goto out_rcu_unlock; | ||
| 223 | |||
| 224 | /* | ||
| 225 | * Only an end host needs to send an ICMP | ||
| 226 | * "Fragment Reassembly Timeout" message, per RFC792. | ||
| 227 | */ | ||
| 228 | if (skb_rtable(head)->rt_type != RTN_LOCAL) | ||
| 229 | goto out_rcu_unlock; | ||
| 230 | |||
| 231 | } | ||
| 232 | |||
| 233 | /* Send an ICMP "Fragment Reassembly Timeout" message. */ | ||
| 234 | icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); | ||
| 235 | out_rcu_unlock: | ||
| 213 | rcu_read_unlock(); | 236 | rcu_read_unlock(); |
| 214 | } | 237 | } |
| 215 | out: | 238 | out: |
| @@ -283,7 +306,7 @@ static int ip_frag_reinit(struct ipq *qp) | |||
| 283 | fp = qp->q.fragments; | 306 | fp = qp->q.fragments; |
| 284 | do { | 307 | do { |
| 285 | struct sk_buff *xp = fp->next; | 308 | struct sk_buff *xp = fp->next; |
| 286 | frag_kfree_skb(qp->q.net, fp, NULL); | 309 | frag_kfree_skb(qp->q.net, fp); |
| 287 | fp = xp; | 310 | fp = xp; |
| 288 | } while (fp); | 311 | } while (fp); |
| 289 | 312 | ||
| @@ -291,6 +314,7 @@ static int ip_frag_reinit(struct ipq *qp) | |||
| 291 | qp->q.len = 0; | 314 | qp->q.len = 0; |
| 292 | qp->q.meat = 0; | 315 | qp->q.meat = 0; |
| 293 | qp->q.fragments = NULL; | 316 | qp->q.fragments = NULL; |
| 317 | qp->q.fragments_tail = NULL; | ||
| 294 | qp->iif = 0; | 318 | qp->iif = 0; |
| 295 | 319 | ||
| 296 | return 0; | 320 | return 0; |
| @@ -363,6 +387,11 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
| 363 | * in the chain of fragments so far. We must know where to put | 387 | * in the chain of fragments so far. We must know where to put |
| 364 | * this fragment, right? | 388 | * this fragment, right? |
| 365 | */ | 389 | */ |
| 390 | prev = qp->q.fragments_tail; | ||
| 391 | if (!prev || FRAG_CB(prev)->offset < offset) { | ||
| 392 | next = NULL; | ||
| 393 | goto found; | ||
| 394 | } | ||
| 366 | prev = NULL; | 395 | prev = NULL; |
| 367 | for (next = qp->q.fragments; next != NULL; next = next->next) { | 396 | for (next = qp->q.fragments; next != NULL; next = next->next) { |
| 368 | if (FRAG_CB(next)->offset >= offset) | 397 | if (FRAG_CB(next)->offset >= offset) |
| @@ -370,6 +399,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
| 370 | prev = next; | 399 | prev = next; |
| 371 | } | 400 | } |
| 372 | 401 | ||
| 402 | found: | ||
| 373 | /* We found where to put this one. Check for overlap with | 403 | /* We found where to put this one. Check for overlap with |
| 374 | * preceding fragment, and, if needed, align things so that | 404 | * preceding fragment, and, if needed, align things so that |
| 375 | * any overlaps are eliminated. | 405 | * any overlaps are eliminated. |
| @@ -420,7 +450,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
| 420 | qp->q.fragments = next; | 450 | qp->q.fragments = next; |
| 421 | 451 | ||
| 422 | qp->q.meat -= free_it->len; | 452 | qp->q.meat -= free_it->len; |
| 423 | frag_kfree_skb(qp->q.net, free_it, NULL); | 453 | frag_kfree_skb(qp->q.net, free_it); |
| 424 | } | 454 | } |
| 425 | } | 455 | } |
| 426 | 456 | ||
| @@ -428,6 +458,8 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
| 428 | 458 | ||
| 429 | /* Insert this fragment in the chain of fragments. */ | 459 | /* Insert this fragment in the chain of fragments. */ |
| 430 | skb->next = next; | 460 | skb->next = next; |
| 461 | if (!next) | ||
| 462 | qp->q.fragments_tail = skb; | ||
| 431 | if (prev) | 463 | if (prev) |
| 432 | prev->next = skb; | 464 | prev->next = skb; |
| 433 | else | 465 | else |
| @@ -481,6 +513,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | |||
| 481 | goto out_nomem; | 513 | goto out_nomem; |
| 482 | 514 | ||
| 483 | fp->next = head->next; | 515 | fp->next = head->next; |
| 516 | if (!fp->next) | ||
| 517 | qp->q.fragments_tail = fp; | ||
| 484 | prev->next = fp; | 518 | prev->next = fp; |
| 485 | 519 | ||
| 486 | skb_morph(head, qp->q.fragments); | 520 | skb_morph(head, qp->q.fragments); |
| @@ -530,7 +564,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | |||
| 530 | 564 | ||
| 531 | skb_shinfo(head)->frag_list = head->next; | 565 | skb_shinfo(head)->frag_list = head->next; |
| 532 | skb_push(head, head->data - skb_network_header(head)); | 566 | skb_push(head, head->data - skb_network_header(head)); |
| 533 | atomic_sub(head->truesize, &qp->q.net->mem); | ||
| 534 | 567 | ||
| 535 | for (fp=head->next; fp; fp = fp->next) { | 568 | for (fp=head->next; fp; fp = fp->next) { |
| 536 | head->data_len += fp->len; | 569 | head->data_len += fp->len; |
| @@ -540,8 +573,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | |||
| 540 | else if (head->ip_summed == CHECKSUM_COMPLETE) | 573 | else if (head->ip_summed == CHECKSUM_COMPLETE) |
| 541 | head->csum = csum_add(head->csum, fp->csum); | 574 | head->csum = csum_add(head->csum, fp->csum); |
| 542 | head->truesize += fp->truesize; | 575 | head->truesize += fp->truesize; |
| 543 | atomic_sub(fp->truesize, &qp->q.net->mem); | ||
| 544 | } | 576 | } |
| 577 | atomic_sub(head->truesize, &qp->q.net->mem); | ||
| 545 | 578 | ||
| 546 | head->next = NULL; | 579 | head->next = NULL; |
| 547 | head->dev = dev; | 580 | head->dev = dev; |
| @@ -552,6 +585,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | |||
| 552 | iph->tot_len = htons(len); | 585 | iph->tot_len = htons(len); |
| 553 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); | 586 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); |
| 554 | qp->q.fragments = NULL; | 587 | qp->q.fragments = NULL; |
| 588 | qp->q.fragments_tail = NULL; | ||
| 555 | return 0; | 589 | return 0; |
| 556 | 590 | ||
| 557 | out_nomem: | 591 | out_nomem: |
| @@ -598,6 +632,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) | |||
| 598 | kfree_skb(skb); | 632 | kfree_skb(skb); |
| 599 | return -ENOMEM; | 633 | return -ENOMEM; |
| 600 | } | 634 | } |
| 635 | EXPORT_SYMBOL(ip_defrag); | ||
| 601 | 636 | ||
| 602 | #ifdef CONFIG_SYSCTL | 637 | #ifdef CONFIG_SYSCTL |
| 603 | static int zero; | 638 | static int zero; |
| @@ -646,7 +681,7 @@ static struct ctl_table ip4_frags_ctl_table[] = { | |||
| 646 | { } | 681 | { } |
| 647 | }; | 682 | }; |
| 648 | 683 | ||
| 649 | static int ip4_frags_ns_ctl_register(struct net *net) | 684 | static int __net_init ip4_frags_ns_ctl_register(struct net *net) |
| 650 | { | 685 | { |
| 651 | struct ctl_table *table; | 686 | struct ctl_table *table; |
| 652 | struct ctl_table_header *hdr; | 687 | struct ctl_table_header *hdr; |
| @@ -676,7 +711,7 @@ err_alloc: | |||
| 676 | return -ENOMEM; | 711 | return -ENOMEM; |
| 677 | } | 712 | } |
| 678 | 713 | ||
| 679 | static void ip4_frags_ns_ctl_unregister(struct net *net) | 714 | static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net) |
| 680 | { | 715 | { |
| 681 | struct ctl_table *table; | 716 | struct ctl_table *table; |
| 682 | 717 | ||
| @@ -704,7 +739,7 @@ static inline void ip4_frags_ctl_register(void) | |||
| 704 | } | 739 | } |
| 705 | #endif | 740 | #endif |
| 706 | 741 | ||
| 707 | static int ipv4_frags_init_net(struct net *net) | 742 | static int __net_init ipv4_frags_init_net(struct net *net) |
| 708 | { | 743 | { |
| 709 | /* | 744 | /* |
| 710 | * Fragment cache limits. We will commit 256K at one time. Should we | 745 | * Fragment cache limits. We will commit 256K at one time. Should we |
| @@ -726,7 +761,7 @@ static int ipv4_frags_init_net(struct net *net) | |||
| 726 | return ip4_frags_ns_ctl_register(net); | 761 | return ip4_frags_ns_ctl_register(net); |
| 727 | } | 762 | } |
| 728 | 763 | ||
| 729 | static void ipv4_frags_exit_net(struct net *net) | 764 | static void __net_exit ipv4_frags_exit_net(struct net *net) |
| 730 | { | 765 | { |
| 731 | ip4_frags_ns_ctl_unregister(net); | 766 | ip4_frags_ns_ctl_unregister(net); |
| 732 | inet_frags_exit_net(&net->ipv4.frags, &ip4_frags); | 767 | inet_frags_exit_net(&net->ipv4.frags, &ip4_frags); |
| @@ -751,5 +786,3 @@ void __init ipfrag_init(void) | |||
| 751 | ip4_frags.secret_interval = 10 * 60 * HZ; | 786 | ip4_frags.secret_interval = 10 * 60 * HZ; |
| 752 | inet_frags_init(&ip4_frags); | 787 | inet_frags_init(&ip4_frags); |
| 753 | } | 788 | } |
| 754 | |||
| 755 | EXPORT_SYMBOL(ip_defrag); | ||
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f36ce156cac6..35c93e8b6a46 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
| 15 | #include <linux/types.h> | 15 | #include <linux/types.h> |
| 16 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
| 17 | #include <linux/slab.h> | ||
| 17 | #include <asm/uaccess.h> | 18 | #include <asm/uaccess.h> |
| 18 | #include <linux/skbuff.h> | 19 | #include <linux/skbuff.h> |
| 19 | #include <linux/netdevice.h> | 20 | #include <linux/netdevice.h> |
| @@ -44,7 +45,7 @@ | |||
| 44 | #include <net/netns/generic.h> | 45 | #include <net/netns/generic.h> |
| 45 | #include <net/rtnetlink.h> | 46 | #include <net/rtnetlink.h> |
| 46 | 47 | ||
| 47 | #ifdef CONFIG_IPV6 | 48 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
| 48 | #include <net/ipv6.h> | 49 | #include <net/ipv6.h> |
| 49 | #include <net/ip6_fib.h> | 50 | #include <net/ip6_fib.h> |
| 50 | #include <net/ip6_route.h> | 51 | #include <net/ip6_route.h> |
| @@ -501,7 +502,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info) | |||
| 501 | t->err_time = jiffies; | 502 | t->err_time = jiffies; |
| 502 | out: | 503 | out: |
| 503 | rcu_read_unlock(); | 504 | rcu_read_unlock(); |
| 504 | return; | ||
| 505 | } | 505 | } |
| 506 | 506 | ||
| 507 | static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) | 507 | static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) |
| @@ -537,7 +537,6 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
| 537 | struct ip_tunnel *tunnel; | 537 | struct ip_tunnel *tunnel; |
| 538 | int offset = 4; | 538 | int offset = 4; |
| 539 | __be16 gre_proto; | 539 | __be16 gre_proto; |
| 540 | unsigned int len; | ||
| 541 | 540 | ||
| 542 | if (!pskb_may_pull(skb, 16)) | 541 | if (!pskb_may_pull(skb, 16)) |
| 543 | goto drop_nolock; | 542 | goto drop_nolock; |
| @@ -628,8 +627,6 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
| 628 | tunnel->i_seqno = seqno + 1; | 627 | tunnel->i_seqno = seqno + 1; |
| 629 | } | 628 | } |
| 630 | 629 | ||
| 631 | len = skb->len; | ||
| 632 | |||
| 633 | /* Warning: All skb pointers will be invalidated! */ | 630 | /* Warning: All skb pointers will be invalidated! */ |
| 634 | if (tunnel->dev->type == ARPHRD_ETHER) { | 631 | if (tunnel->dev->type == ARPHRD_ETHER) { |
| 635 | if (!pskb_may_pull(skb, ETH_HLEN)) { | 632 | if (!pskb_may_pull(skb, ETH_HLEN)) { |
| @@ -643,11 +640,7 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
| 643 | skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); | 640 | skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); |
| 644 | } | 641 | } |
| 645 | 642 | ||
| 646 | stats->rx_packets++; | 643 | skb_tunnel_rx(skb, tunnel->dev); |
| 647 | stats->rx_bytes += len; | ||
| 648 | skb->dev = tunnel->dev; | ||
| 649 | skb_dst_drop(skb); | ||
| 650 | nf_reset(skb); | ||
| 651 | 644 | ||
| 652 | skb_reset_network_header(skb); | 645 | skb_reset_network_header(skb); |
| 653 | ipgre_ecn_decapsulate(iph, skb); | 646 | ipgre_ecn_decapsulate(iph, skb); |
| @@ -706,7 +699,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
| 706 | if ((dst = rt->rt_gateway) == 0) | 699 | if ((dst = rt->rt_gateway) == 0) |
| 707 | goto tx_error_icmp; | 700 | goto tx_error_icmp; |
| 708 | } | 701 | } |
| 709 | #ifdef CONFIG_IPV6 | 702 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
| 710 | else if (skb->protocol == htons(ETH_P_IPV6)) { | 703 | else if (skb->protocol == htons(ETH_P_IPV6)) { |
| 711 | struct in6_addr *addr6; | 704 | struct in6_addr *addr6; |
| 712 | int addr_type; | 705 | int addr_type; |
| @@ -738,6 +731,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
| 738 | tos = 0; | 731 | tos = 0; |
| 739 | if (skb->protocol == htons(ETH_P_IP)) | 732 | if (skb->protocol == htons(ETH_P_IP)) |
| 740 | tos = old_iph->tos; | 733 | tos = old_iph->tos; |
| 734 | else if (skb->protocol == htons(ETH_P_IPV6)) | ||
| 735 | tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph); | ||
| 741 | } | 736 | } |
| 742 | 737 | ||
| 743 | { | 738 | { |
| @@ -752,7 +747,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
| 752 | goto tx_error; | 747 | goto tx_error; |
| 753 | } | 748 | } |
| 754 | } | 749 | } |
| 755 | tdev = rt->u.dst.dev; | 750 | tdev = rt->dst.dev; |
| 756 | 751 | ||
| 757 | if (tdev == dev) { | 752 | if (tdev == dev) { |
| 758 | ip_rt_put(rt); | 753 | ip_rt_put(rt); |
| @@ -762,7 +757,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
| 762 | 757 | ||
| 763 | df = tiph->frag_off; | 758 | df = tiph->frag_off; |
| 764 | if (df) | 759 | if (df) |
| 765 | mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen; | 760 | mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen; |
| 766 | else | 761 | else |
| 767 | mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; | 762 | mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; |
| 768 | 763 | ||
| @@ -779,7 +774,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
| 779 | goto tx_error; | 774 | goto tx_error; |
| 780 | } | 775 | } |
| 781 | } | 776 | } |
| 782 | #ifdef CONFIG_IPV6 | 777 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
| 783 | else if (skb->protocol == htons(ETH_P_IPV6)) { | 778 | else if (skb->protocol == htons(ETH_P_IPV6)) { |
| 784 | struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); | 779 | struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); |
| 785 | 780 | ||
| @@ -793,7 +788,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
| 793 | } | 788 | } |
| 794 | 789 | ||
| 795 | if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { | 790 | if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { |
| 796 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); | 791 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
| 797 | ip_rt_put(rt); | 792 | ip_rt_put(rt); |
| 798 | goto tx_error; | 793 | goto tx_error; |
| 799 | } | 794 | } |
| @@ -810,11 +805,13 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
| 810 | tunnel->err_count = 0; | 805 | tunnel->err_count = 0; |
| 811 | } | 806 | } |
| 812 | 807 | ||
| 813 | max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen; | 808 | max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len; |
| 814 | 809 | ||
| 815 | if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| | 810 | if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| |
| 816 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { | 811 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { |
| 817 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); | 812 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); |
| 813 | if (max_headroom > dev->needed_headroom) | ||
| 814 | dev->needed_headroom = max_headroom; | ||
| 818 | if (!new_skb) { | 815 | if (!new_skb) { |
| 819 | ip_rt_put(rt); | 816 | ip_rt_put(rt); |
| 820 | txq->tx_dropped++; | 817 | txq->tx_dropped++; |
| @@ -835,7 +832,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
| 835 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | | 832 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | |
| 836 | IPSKB_REROUTED); | 833 | IPSKB_REROUTED); |
| 837 | skb_dst_drop(skb); | 834 | skb_dst_drop(skb); |
| 838 | skb_dst_set(skb, &rt->u.dst); | 835 | skb_dst_set(skb, &rt->dst); |
| 839 | 836 | ||
| 840 | /* | 837 | /* |
| 841 | * Push down and install the IPIP header. | 838 | * Push down and install the IPIP header. |
| @@ -853,12 +850,12 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
| 853 | if ((iph->ttl = tiph->ttl) == 0) { | 850 | if ((iph->ttl = tiph->ttl) == 0) { |
| 854 | if (skb->protocol == htons(ETH_P_IP)) | 851 | if (skb->protocol == htons(ETH_P_IP)) |
| 855 | iph->ttl = old_iph->ttl; | 852 | iph->ttl = old_iph->ttl; |
| 856 | #ifdef CONFIG_IPV6 | 853 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
| 857 | else if (skb->protocol == htons(ETH_P_IPV6)) | 854 | else if (skb->protocol == htons(ETH_P_IPV6)) |
| 858 | iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; | 855 | iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; |
| 859 | #endif | 856 | #endif |
| 860 | else | 857 | else |
| 861 | iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); | 858 | iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT); |
| 862 | } | 859 | } |
| 863 | 860 | ||
| 864 | ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; | 861 | ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; |
| @@ -920,7 +917,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) | |||
| 920 | .proto = IPPROTO_GRE }; | 917 | .proto = IPPROTO_GRE }; |
| 921 | struct rtable *rt; | 918 | struct rtable *rt; |
| 922 | if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { | 919 | if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { |
| 923 | tdev = rt->u.dst.dev; | 920 | tdev = rt->dst.dev; |
| 924 | ip_rt_put(rt); | 921 | ip_rt_put(rt); |
| 925 | } | 922 | } |
| 926 | 923 | ||
| @@ -1144,12 +1141,9 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, | |||
| 1144 | 1141 | ||
| 1145 | if (saddr) | 1142 | if (saddr) |
| 1146 | memcpy(&iph->saddr, saddr, 4); | 1143 | memcpy(&iph->saddr, saddr, 4); |
| 1147 | 1144 | if (daddr) | |
| 1148 | if (daddr) { | ||
| 1149 | memcpy(&iph->daddr, daddr, 4); | 1145 | memcpy(&iph->daddr, daddr, 4); |
| 1150 | return t->hlen; | 1146 | if (iph->daddr) |
| 1151 | } | ||
| 1152 | if (iph->daddr && !ipv4_is_multicast(iph->daddr)) | ||
| 1153 | return t->hlen; | 1147 | return t->hlen; |
| 1154 | 1148 | ||
| 1155 | return -t->hlen; | 1149 | return -t->hlen; |
| @@ -1182,7 +1176,7 @@ static int ipgre_open(struct net_device *dev) | |||
| 1182 | struct rtable *rt; | 1176 | struct rtable *rt; |
| 1183 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) | 1177 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) |
| 1184 | return -EADDRNOTAVAIL; | 1178 | return -EADDRNOTAVAIL; |
| 1185 | dev = rt->u.dst.dev; | 1179 | dev = rt->dst.dev; |
| 1186 | ip_rt_put(rt); | 1180 | ip_rt_put(rt); |
| 1187 | if (__in_dev_get_rtnl(dev) == NULL) | 1181 | if (__in_dev_get_rtnl(dev) == NULL) |
| 1188 | return -EADDRNOTAVAIL; | 1182 | return -EADDRNOTAVAIL; |
| @@ -1307,7 +1301,7 @@ static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) | |||
| 1307 | } | 1301 | } |
| 1308 | } | 1302 | } |
| 1309 | 1303 | ||
| 1310 | static int ipgre_init_net(struct net *net) | 1304 | static int __net_init ipgre_init_net(struct net *net) |
| 1311 | { | 1305 | { |
| 1312 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); | 1306 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); |
| 1313 | int err; | 1307 | int err; |
| @@ -1334,7 +1328,7 @@ err_alloc_dev: | |||
| 1334 | return err; | 1328 | return err; |
| 1335 | } | 1329 | } |
| 1336 | 1330 | ||
| 1337 | static void ipgre_exit_net(struct net *net) | 1331 | static void __net_exit ipgre_exit_net(struct net *net) |
| 1338 | { | 1332 | { |
| 1339 | struct ipgre_net *ign; | 1333 | struct ipgre_net *ign; |
| 1340 | LIST_HEAD(list); | 1334 | LIST_HEAD(list); |
| @@ -1665,14 +1659,15 @@ static int __init ipgre_init(void) | |||
| 1665 | 1659 | ||
| 1666 | printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); | 1660 | printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); |
| 1667 | 1661 | ||
| 1668 | if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) { | ||
| 1669 | printk(KERN_INFO "ipgre init: can't add protocol\n"); | ||
| 1670 | return -EAGAIN; | ||
| 1671 | } | ||
| 1672 | |||
| 1673 | err = register_pernet_device(&ipgre_net_ops); | 1662 | err = register_pernet_device(&ipgre_net_ops); |
| 1674 | if (err < 0) | 1663 | if (err < 0) |
| 1675 | goto gen_device_failed; | 1664 | return err; |
| 1665 | |||
| 1666 | err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE); | ||
| 1667 | if (err < 0) { | ||
| 1668 | printk(KERN_INFO "ipgre init: can't add protocol\n"); | ||
| 1669 | goto add_proto_failed; | ||
| 1670 | } | ||
| 1676 | 1671 | ||
| 1677 | err = rtnl_link_register(&ipgre_link_ops); | 1672 | err = rtnl_link_register(&ipgre_link_ops); |
| 1678 | if (err < 0) | 1673 | if (err < 0) |
| @@ -1688,9 +1683,9 @@ out: | |||
| 1688 | tap_ops_failed: | 1683 | tap_ops_failed: |
| 1689 | rtnl_link_unregister(&ipgre_link_ops); | 1684 | rtnl_link_unregister(&ipgre_link_ops); |
| 1690 | rtnl_link_failed: | 1685 | rtnl_link_failed: |
| 1691 | unregister_pernet_device(&ipgre_net_ops); | ||
| 1692 | gen_device_failed: | ||
| 1693 | inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); | 1686 | inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); |
| 1687 | add_proto_failed: | ||
| 1688 | unregister_pernet_device(&ipgre_net_ops); | ||
| 1694 | goto out; | 1689 | goto out; |
| 1695 | } | 1690 | } |
| 1696 | 1691 | ||
| @@ -1698,9 +1693,9 @@ static void __exit ipgre_fini(void) | |||
| 1698 | { | 1693 | { |
| 1699 | rtnl_link_unregister(&ipgre_tap_ops); | 1694 | rtnl_link_unregister(&ipgre_tap_ops); |
| 1700 | rtnl_link_unregister(&ipgre_link_ops); | 1695 | rtnl_link_unregister(&ipgre_link_ops); |
| 1701 | unregister_pernet_device(&ipgre_net_ops); | ||
| 1702 | if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) | 1696 | if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) |
| 1703 | printk(KERN_INFO "ipgre close: can't remove protocol\n"); | 1697 | printk(KERN_INFO "ipgre close: can't remove protocol\n"); |
| 1698 | unregister_pernet_device(&ipgre_net_ops); | ||
| 1704 | } | 1699 | } |
| 1705 | 1700 | ||
| 1706 | module_init(ipgre_init); | 1701 | module_init(ipgre_init); |
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index c29de9879fda..d859bcc26cb7 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
| @@ -119,6 +119,7 @@ | |||
| 119 | #include <linux/kernel.h> | 119 | #include <linux/kernel.h> |
| 120 | #include <linux/string.h> | 120 | #include <linux/string.h> |
| 121 | #include <linux/errno.h> | 121 | #include <linux/errno.h> |
| 122 | #include <linux/slab.h> | ||
| 122 | 123 | ||
| 123 | #include <linux/net.h> | 124 | #include <linux/net.h> |
| 124 | #include <linux/socket.h> | 125 | #include <linux/socket.h> |
| @@ -145,7 +146,7 @@ | |||
| 145 | #include <linux/netlink.h> | 146 | #include <linux/netlink.h> |
| 146 | 147 | ||
| 147 | /* | 148 | /* |
| 148 | * Process Router Attention IP option | 149 | * Process Router Attention IP option (RFC 2113) |
| 149 | */ | 150 | */ |
| 150 | int ip_call_ra_chain(struct sk_buff *skb) | 151 | int ip_call_ra_chain(struct sk_buff *skb) |
| 151 | { | 152 | { |
| @@ -154,8 +155,7 @@ int ip_call_ra_chain(struct sk_buff *skb) | |||
| 154 | struct sock *last = NULL; | 155 | struct sock *last = NULL; |
| 155 | struct net_device *dev = skb->dev; | 156 | struct net_device *dev = skb->dev; |
| 156 | 157 | ||
| 157 | read_lock(&ip_ra_lock); | 158 | for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) { |
| 158 | for (ra = ip_ra_chain; ra; ra = ra->next) { | ||
| 159 | struct sock *sk = ra->sk; | 159 | struct sock *sk = ra->sk; |
| 160 | 160 | ||
| 161 | /* If socket is bound to an interface, only report | 161 | /* If socket is bound to an interface, only report |
| @@ -166,10 +166,8 @@ int ip_call_ra_chain(struct sk_buff *skb) | |||
| 166 | sk->sk_bound_dev_if == dev->ifindex) && | 166 | sk->sk_bound_dev_if == dev->ifindex) && |
| 167 | net_eq(sock_net(sk), dev_net(dev))) { | 167 | net_eq(sock_net(sk), dev_net(dev))) { |
| 168 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { | 168 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { |
| 169 | if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { | 169 | if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) |
| 170 | read_unlock(&ip_ra_lock); | ||
| 171 | return 1; | 170 | return 1; |
| 172 | } | ||
| 173 | } | 171 | } |
| 174 | if (last) { | 172 | if (last) { |
| 175 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); | 173 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); |
| @@ -182,10 +180,8 @@ int ip_call_ra_chain(struct sk_buff *skb) | |||
| 182 | 180 | ||
| 183 | if (last) { | 181 | if (last) { |
| 184 | raw_rcv(last, skb); | 182 | raw_rcv(last, skb); |
| 185 | read_unlock(&ip_ra_lock); | ||
| 186 | return 1; | 183 | return 1; |
| 187 | } | 184 | } |
| 188 | read_unlock(&ip_ra_lock); | ||
| 189 | return 0; | 185 | return 0; |
| 190 | } | 186 | } |
| 191 | 187 | ||
| @@ -265,7 +261,7 @@ int ip_local_deliver(struct sk_buff *skb) | |||
| 265 | return 0; | 261 | return 0; |
| 266 | } | 262 | } |
| 267 | 263 | ||
| 268 | return NF_HOOK(PF_INET, NF_INET_LOCAL_IN, skb, skb->dev, NULL, | 264 | return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, skb, skb->dev, NULL, |
| 269 | ip_local_deliver_finish); | 265 | ip_local_deliver_finish); |
| 270 | } | 266 | } |
| 271 | 267 | ||
| @@ -297,18 +293,16 @@ static inline int ip_rcv_options(struct sk_buff *skb) | |||
| 297 | } | 293 | } |
| 298 | 294 | ||
| 299 | if (unlikely(opt->srr)) { | 295 | if (unlikely(opt->srr)) { |
| 300 | struct in_device *in_dev = in_dev_get(dev); | 296 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
| 297 | |||
| 301 | if (in_dev) { | 298 | if (in_dev) { |
| 302 | if (!IN_DEV_SOURCE_ROUTE(in_dev)) { | 299 | if (!IN_DEV_SOURCE_ROUTE(in_dev)) { |
| 303 | if (IN_DEV_LOG_MARTIANS(in_dev) && | 300 | if (IN_DEV_LOG_MARTIANS(in_dev) && |
| 304 | net_ratelimit()) | 301 | net_ratelimit()) |
| 305 | printk(KERN_INFO "source route option %pI4 -> %pI4\n", | 302 | printk(KERN_INFO "source route option %pI4 -> %pI4\n", |
| 306 | &iph->saddr, &iph->daddr); | 303 | &iph->saddr, &iph->daddr); |
| 307 | in_dev_put(in_dev); | ||
| 308 | goto drop; | 304 | goto drop; |
| 309 | } | 305 | } |
| 310 | |||
| 311 | in_dev_put(in_dev); | ||
| 312 | } | 306 | } |
| 313 | 307 | ||
| 314 | if (ip_options_rcv_srr(skb)) | 308 | if (ip_options_rcv_srr(skb)) |
| @@ -330,8 +324,8 @@ static int ip_rcv_finish(struct sk_buff *skb) | |||
| 330 | * how the packet travels inside Linux networking. | 324 | * how the packet travels inside Linux networking. |
| 331 | */ | 325 | */ |
| 332 | if (skb_dst(skb) == NULL) { | 326 | if (skb_dst(skb) == NULL) { |
| 333 | int err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, | 327 | int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, |
| 334 | skb->dev); | 328 | iph->tos, skb->dev); |
| 335 | if (unlikely(err)) { | 329 | if (unlikely(err)) { |
| 336 | if (err == -EHOSTUNREACH) | 330 | if (err == -EHOSTUNREACH) |
| 337 | IP_INC_STATS_BH(dev_net(skb->dev), | 331 | IP_INC_STATS_BH(dev_net(skb->dev), |
| @@ -339,13 +333,16 @@ static int ip_rcv_finish(struct sk_buff *skb) | |||
| 339 | else if (err == -ENETUNREACH) | 333 | else if (err == -ENETUNREACH) |
| 340 | IP_INC_STATS_BH(dev_net(skb->dev), | 334 | IP_INC_STATS_BH(dev_net(skb->dev), |
| 341 | IPSTATS_MIB_INNOROUTES); | 335 | IPSTATS_MIB_INNOROUTES); |
| 336 | else if (err == -EXDEV) | ||
| 337 | NET_INC_STATS_BH(dev_net(skb->dev), | ||
| 338 | LINUX_MIB_IPRPFILTER); | ||
| 342 | goto drop; | 339 | goto drop; |
| 343 | } | 340 | } |
| 344 | } | 341 | } |
| 345 | 342 | ||
| 346 | #ifdef CONFIG_NET_CLS_ROUTE | 343 | #ifdef CONFIG_NET_CLS_ROUTE |
| 347 | if (unlikely(skb_dst(skb)->tclassid)) { | 344 | if (unlikely(skb_dst(skb)->tclassid)) { |
| 348 | struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id()); | 345 | struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); |
| 349 | u32 idx = skb_dst(skb)->tclassid; | 346 | u32 idx = skb_dst(skb)->tclassid; |
| 350 | st[idx&0xFF].o_packets++; | 347 | st[idx&0xFF].o_packets++; |
| 351 | st[idx&0xFF].o_bytes += skb->len; | 348 | st[idx&0xFF].o_bytes += skb->len; |
| @@ -359,10 +356,10 @@ static int ip_rcv_finish(struct sk_buff *skb) | |||
| 359 | 356 | ||
| 360 | rt = skb_rtable(skb); | 357 | rt = skb_rtable(skb); |
| 361 | if (rt->rt_type == RTN_MULTICAST) { | 358 | if (rt->rt_type == RTN_MULTICAST) { |
| 362 | IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCAST, | 359 | IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INMCAST, |
| 363 | skb->len); | 360 | skb->len); |
| 364 | } else if (rt->rt_type == RTN_BROADCAST) | 361 | } else if (rt->rt_type == RTN_BROADCAST) |
| 365 | IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCAST, | 362 | IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST, |
| 366 | skb->len); | 363 | skb->len); |
| 367 | 364 | ||
| 368 | return dst_input(skb); | 365 | return dst_input(skb); |
| @@ -443,7 +440,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, | |||
| 443 | /* Must drop socket now because of tproxy. */ | 440 | /* Must drop socket now because of tproxy. */ |
| 444 | skb_orphan(skb); | 441 | skb_orphan(skb); |
| 445 | 442 | ||
| 446 | return NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, dev, NULL, | 443 | return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL, |
| 447 | ip_rcv_finish); | 444 | ip_rcv_finish); |
| 448 | 445 | ||
| 449 | inhdr_error: | 446 | inhdr_error: |
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 94bf105ef3c9..ba9836c488ed 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | 11 | ||
| 12 | #include <linux/capability.h> | 12 | #include <linux/capability.h> |
| 13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
| 14 | #include <linux/slab.h> | ||
| 14 | #include <linux/types.h> | 15 | #include <linux/types.h> |
| 15 | #include <asm/uaccess.h> | 16 | #include <asm/uaccess.h> |
| 16 | #include <linux/skbuff.h> | 17 | #include <linux/skbuff.h> |
| @@ -237,7 +238,6 @@ void ip_options_fragment(struct sk_buff * skb) | |||
| 237 | opt->rr_needaddr = 0; | 238 | opt->rr_needaddr = 0; |
| 238 | opt->ts_needaddr = 0; | 239 | opt->ts_needaddr = 0; |
| 239 | opt->ts_needtime = 0; | 240 | opt->ts_needtime = 0; |
| 240 | return; | ||
| 241 | } | 241 | } |
| 242 | 242 | ||
| 243 | /* | 243 | /* |
| @@ -600,6 +600,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) | |||
| 600 | unsigned char *optptr = skb_network_header(skb) + opt->srr; | 600 | unsigned char *optptr = skb_network_header(skb) + opt->srr; |
| 601 | struct rtable *rt = skb_rtable(skb); | 601 | struct rtable *rt = skb_rtable(skb); |
| 602 | struct rtable *rt2; | 602 | struct rtable *rt2; |
| 603 | unsigned long orefdst; | ||
| 603 | int err; | 604 | int err; |
| 604 | 605 | ||
| 605 | if (!opt->srr) | 606 | if (!opt->srr) |
| @@ -623,16 +624,16 @@ int ip_options_rcv_srr(struct sk_buff *skb) | |||
| 623 | } | 624 | } |
| 624 | memcpy(&nexthop, &optptr[srrptr-1], 4); | 625 | memcpy(&nexthop, &optptr[srrptr-1], 4); |
| 625 | 626 | ||
| 626 | rt = skb_rtable(skb); | 627 | orefdst = skb->_skb_refdst; |
| 627 | skb_dst_set(skb, NULL); | 628 | skb_dst_set(skb, NULL); |
| 628 | err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev); | 629 | err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev); |
| 629 | rt2 = skb_rtable(skb); | 630 | rt2 = skb_rtable(skb); |
| 630 | if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { | 631 | if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { |
| 631 | ip_rt_put(rt2); | 632 | skb_dst_drop(skb); |
| 632 | skb_dst_set(skb, &rt->u.dst); | 633 | skb->_skb_refdst = orefdst; |
| 633 | return -EINVAL; | 634 | return -EINVAL; |
| 634 | } | 635 | } |
| 635 | ip_rt_put(rt); | 636 | refdst_drop(orefdst); |
| 636 | if (rt2->rt_type != RTN_LOCAL) | 637 | if (rt2->rt_type != RTN_LOCAL) |
| 637 | break; | 638 | break; |
| 638 | /* Superfast 8) loopback forward */ | 639 | /* Superfast 8) loopback forward */ |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 3451799e3dbf..7649d7750075 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
| @@ -51,6 +51,7 @@ | |||
| 51 | #include <linux/string.h> | 51 | #include <linux/string.h> |
| 52 | #include <linux/errno.h> | 52 | #include <linux/errno.h> |
| 53 | #include <linux/highmem.h> | 53 | #include <linux/highmem.h> |
| 54 | #include <linux/slab.h> | ||
| 54 | 55 | ||
| 55 | #include <linux/socket.h> | 56 | #include <linux/socket.h> |
| 56 | #include <linux/sockios.h> | 57 | #include <linux/sockios.h> |
| @@ -88,6 +89,7 @@ __inline__ void ip_send_check(struct iphdr *iph) | |||
| 88 | iph->check = 0; | 89 | iph->check = 0; |
| 89 | iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); | 90 | iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); |
| 90 | } | 91 | } |
| 92 | EXPORT_SYMBOL(ip_send_check); | ||
| 91 | 93 | ||
| 92 | int __ip_local_out(struct sk_buff *skb) | 94 | int __ip_local_out(struct sk_buff *skb) |
| 93 | { | 95 | { |
| @@ -95,8 +97,8 @@ int __ip_local_out(struct sk_buff *skb) | |||
| 95 | 97 | ||
| 96 | iph->tot_len = htons(skb->len); | 98 | iph->tot_len = htons(skb->len); |
| 97 | ip_send_check(iph); | 99 | ip_send_check(iph); |
| 98 | return nf_hook(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, | 100 | return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL, |
| 99 | dst_output); | 101 | skb_dst(skb)->dev, dst_output); |
| 100 | } | 102 | } |
| 101 | 103 | ||
| 102 | int ip_local_out(struct sk_buff *skb) | 104 | int ip_local_out(struct sk_buff *skb) |
| @@ -119,7 +121,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb) | |||
| 119 | newskb->pkt_type = PACKET_LOOPBACK; | 121 | newskb->pkt_type = PACKET_LOOPBACK; |
| 120 | newskb->ip_summed = CHECKSUM_UNNECESSARY; | 122 | newskb->ip_summed = CHECKSUM_UNNECESSARY; |
| 121 | WARN_ON(!skb_dst(newskb)); | 123 | WARN_ON(!skb_dst(newskb)); |
| 122 | netif_rx(newskb); | 124 | netif_rx_ni(newskb); |
| 123 | return 0; | 125 | return 0; |
| 124 | } | 126 | } |
| 125 | 127 | ||
| @@ -150,15 +152,15 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | |||
| 150 | iph->version = 4; | 152 | iph->version = 4; |
| 151 | iph->ihl = 5; | 153 | iph->ihl = 5; |
| 152 | iph->tos = inet->tos; | 154 | iph->tos = inet->tos; |
| 153 | if (ip_dont_fragment(sk, &rt->u.dst)) | 155 | if (ip_dont_fragment(sk, &rt->dst)) |
| 154 | iph->frag_off = htons(IP_DF); | 156 | iph->frag_off = htons(IP_DF); |
| 155 | else | 157 | else |
| 156 | iph->frag_off = 0; | 158 | iph->frag_off = 0; |
| 157 | iph->ttl = ip_select_ttl(inet, &rt->u.dst); | 159 | iph->ttl = ip_select_ttl(inet, &rt->dst); |
| 158 | iph->daddr = rt->rt_dst; | 160 | iph->daddr = rt->rt_dst; |
| 159 | iph->saddr = rt->rt_src; | 161 | iph->saddr = rt->rt_src; |
| 160 | iph->protocol = sk->sk_protocol; | 162 | iph->protocol = sk->sk_protocol; |
| 161 | ip_select_ident(iph, &rt->u.dst, sk); | 163 | ip_select_ident(iph, &rt->dst, sk); |
| 162 | 164 | ||
| 163 | if (opt && opt->optlen) { | 165 | if (opt && opt->optlen) { |
| 164 | iph->ihl += opt->optlen>>2; | 166 | iph->ihl += opt->optlen>>2; |
| @@ -171,7 +173,6 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | |||
| 171 | /* Send it out. */ | 173 | /* Send it out. */ |
| 172 | return ip_local_out(skb); | 174 | return ip_local_out(skb); |
| 173 | } | 175 | } |
| 174 | |||
| 175 | EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); | 176 | EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); |
| 176 | 177 | ||
| 177 | static inline int ip_finish_output2(struct sk_buff *skb) | 178 | static inline int ip_finish_output2(struct sk_buff *skb) |
| @@ -239,7 +240,7 @@ int ip_mc_output(struct sk_buff *skb) | |||
| 239 | { | 240 | { |
| 240 | struct sock *sk = skb->sk; | 241 | struct sock *sk = skb->sk; |
| 241 | struct rtable *rt = skb_rtable(skb); | 242 | struct rtable *rt = skb_rtable(skb); |
| 242 | struct net_device *dev = rt->u.dst.dev; | 243 | struct net_device *dev = rt->dst.dev; |
| 243 | 244 | ||
| 244 | /* | 245 | /* |
| 245 | * If the indicated interface is up and running, send the packet. | 246 | * If the indicated interface is up and running, send the packet. |
| @@ -271,8 +272,8 @@ int ip_mc_output(struct sk_buff *skb) | |||
| 271 | ) { | 272 | ) { |
| 272 | struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); | 273 | struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); |
| 273 | if (newskb) | 274 | if (newskb) |
| 274 | NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb, | 275 | NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, |
| 275 | NULL, newskb->dev, | 276 | newskb, NULL, newskb->dev, |
| 276 | ip_dev_loopback_xmit); | 277 | ip_dev_loopback_xmit); |
| 277 | } | 278 | } |
| 278 | 279 | ||
| @@ -287,12 +288,12 @@ int ip_mc_output(struct sk_buff *skb) | |||
| 287 | if (rt->rt_flags&RTCF_BROADCAST) { | 288 | if (rt->rt_flags&RTCF_BROADCAST) { |
| 288 | struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); | 289 | struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); |
| 289 | if (newskb) | 290 | if (newskb) |
| 290 | NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb, NULL, | 291 | NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb, |
| 291 | newskb->dev, ip_dev_loopback_xmit); | 292 | NULL, newskb->dev, ip_dev_loopback_xmit); |
| 292 | } | 293 | } |
| 293 | 294 | ||
| 294 | return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, skb->dev, | 295 | return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, |
| 295 | ip_finish_output, | 296 | skb->dev, ip_finish_output, |
| 296 | !(IPCB(skb)->flags & IPSKB_REROUTED)); | 297 | !(IPCB(skb)->flags & IPSKB_REROUTED)); |
| 297 | } | 298 | } |
| 298 | 299 | ||
| @@ -305,22 +306,24 @@ int ip_output(struct sk_buff *skb) | |||
| 305 | skb->dev = dev; | 306 | skb->dev = dev; |
| 306 | skb->protocol = htons(ETH_P_IP); | 307 | skb->protocol = htons(ETH_P_IP); |
| 307 | 308 | ||
| 308 | return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, dev, | 309 | return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, dev, |
| 309 | ip_finish_output, | 310 | ip_finish_output, |
| 310 | !(IPCB(skb)->flags & IPSKB_REROUTED)); | 311 | !(IPCB(skb)->flags & IPSKB_REROUTED)); |
| 311 | } | 312 | } |
| 312 | 313 | ||
| 313 | int ip_queue_xmit(struct sk_buff *skb, int ipfragok) | 314 | int ip_queue_xmit(struct sk_buff *skb) |
| 314 | { | 315 | { |
| 315 | struct sock *sk = skb->sk; | 316 | struct sock *sk = skb->sk; |
| 316 | struct inet_sock *inet = inet_sk(sk); | 317 | struct inet_sock *inet = inet_sk(sk); |
| 317 | struct ip_options *opt = inet->opt; | 318 | struct ip_options *opt = inet->opt; |
| 318 | struct rtable *rt; | 319 | struct rtable *rt; |
| 319 | struct iphdr *iph; | 320 | struct iphdr *iph; |
| 321 | int res; | ||
| 320 | 322 | ||
| 321 | /* Skip all of this if the packet is already routed, | 323 | /* Skip all of this if the packet is already routed, |
| 322 | * f.e. by something like SCTP. | 324 | * f.e. by something like SCTP. |
| 323 | */ | 325 | */ |
| 326 | rcu_read_lock(); | ||
| 324 | rt = skb_rtable(skb); | 327 | rt = skb_rtable(skb); |
| 325 | if (rt != NULL) | 328 | if (rt != NULL) |
| 326 | goto packet_routed; | 329 | goto packet_routed; |
| @@ -356,9 +359,9 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) | |||
| 356 | if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) | 359 | if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) |
| 357 | goto no_route; | 360 | goto no_route; |
| 358 | } | 361 | } |
| 359 | sk_setup_caps(sk, &rt->u.dst); | 362 | sk_setup_caps(sk, &rt->dst); |
| 360 | } | 363 | } |
| 361 | skb_dst_set(skb, dst_clone(&rt->u.dst)); | 364 | skb_dst_set_noref(skb, &rt->dst); |
| 362 | 365 | ||
| 363 | packet_routed: | 366 | packet_routed: |
| 364 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) | 367 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) |
| @@ -369,11 +372,11 @@ packet_routed: | |||
| 369 | skb_reset_network_header(skb); | 372 | skb_reset_network_header(skb); |
| 370 | iph = ip_hdr(skb); | 373 | iph = ip_hdr(skb); |
| 371 | *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); | 374 | *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); |
| 372 | if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok) | 375 | if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df) |
| 373 | iph->frag_off = htons(IP_DF); | 376 | iph->frag_off = htons(IP_DF); |
| 374 | else | 377 | else |
| 375 | iph->frag_off = 0; | 378 | iph->frag_off = 0; |
| 376 | iph->ttl = ip_select_ttl(inet, &rt->u.dst); | 379 | iph->ttl = ip_select_ttl(inet, &rt->dst); |
| 377 | iph->protocol = sk->sk_protocol; | 380 | iph->protocol = sk->sk_protocol; |
| 378 | iph->saddr = rt->rt_src; | 381 | iph->saddr = rt->rt_src; |
| 379 | iph->daddr = rt->rt_dst; | 382 | iph->daddr = rt->rt_dst; |
| @@ -384,19 +387,23 @@ packet_routed: | |||
| 384 | ip_options_build(skb, opt, inet->inet_daddr, rt, 0); | 387 | ip_options_build(skb, opt, inet->inet_daddr, rt, 0); |
| 385 | } | 388 | } |
| 386 | 389 | ||
| 387 | ip_select_ident_more(iph, &rt->u.dst, sk, | 390 | ip_select_ident_more(iph, &rt->dst, sk, |
| 388 | (skb_shinfo(skb)->gso_segs ?: 1) - 1); | 391 | (skb_shinfo(skb)->gso_segs ?: 1) - 1); |
| 389 | 392 | ||
| 390 | skb->priority = sk->sk_priority; | 393 | skb->priority = sk->sk_priority; |
| 391 | skb->mark = sk->sk_mark; | 394 | skb->mark = sk->sk_mark; |
| 392 | 395 | ||
| 393 | return ip_local_out(skb); | 396 | res = ip_local_out(skb); |
| 397 | rcu_read_unlock(); | ||
| 398 | return res; | ||
| 394 | 399 | ||
| 395 | no_route: | 400 | no_route: |
| 401 | rcu_read_unlock(); | ||
| 396 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); | 402 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); |
| 397 | kfree_skb(skb); | 403 | kfree_skb(skb); |
| 398 | return -EHOSTUNREACH; | 404 | return -EHOSTUNREACH; |
| 399 | } | 405 | } |
| 406 | EXPORT_SYMBOL(ip_queue_xmit); | ||
| 400 | 407 | ||
| 401 | 408 | ||
| 402 | static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) | 409 | static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) |
| @@ -405,7 +412,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) | |||
| 405 | to->priority = from->priority; | 412 | to->priority = from->priority; |
| 406 | to->protocol = from->protocol; | 413 | to->protocol = from->protocol; |
| 407 | skb_dst_drop(to); | 414 | skb_dst_drop(to); |
| 408 | skb_dst_set(to, dst_clone(skb_dst(from))); | 415 | skb_dst_copy(to, from); |
| 409 | to->dev = from->dev; | 416 | to->dev = from->dev; |
| 410 | to->mark = from->mark; | 417 | to->mark = from->mark; |
| 411 | 418 | ||
| @@ -436,17 +443,16 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) | |||
| 436 | int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | 443 | int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) |
| 437 | { | 444 | { |
| 438 | struct iphdr *iph; | 445 | struct iphdr *iph; |
| 439 | int raw = 0; | ||
| 440 | int ptr; | 446 | int ptr; |
| 441 | struct net_device *dev; | 447 | struct net_device *dev; |
| 442 | struct sk_buff *skb2; | 448 | struct sk_buff *skb2; |
| 443 | unsigned int mtu, hlen, left, len, ll_rs, pad; | 449 | unsigned int mtu, hlen, left, len, ll_rs; |
| 444 | int offset; | 450 | int offset; |
| 445 | __be16 not_last_frag; | 451 | __be16 not_last_frag; |
| 446 | struct rtable *rt = skb_rtable(skb); | 452 | struct rtable *rt = skb_rtable(skb); |
| 447 | int err = 0; | 453 | int err = 0; |
| 448 | 454 | ||
| 449 | dev = rt->u.dst.dev; | 455 | dev = rt->dst.dev; |
| 450 | 456 | ||
| 451 | /* | 457 | /* |
| 452 | * Point into the IP datagram header. | 458 | * Point into the IP datagram header. |
| @@ -467,7 +473,11 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
| 467 | */ | 473 | */ |
| 468 | 474 | ||
| 469 | hlen = iph->ihl * 4; | 475 | hlen = iph->ihl * 4; |
| 470 | mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */ | 476 | mtu = dst_mtu(&rt->dst) - hlen; /* Size of data space */ |
| 477 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
| 478 | if (skb->nf_bridge) | ||
| 479 | mtu -= nf_bridge_mtu_reduction(skb); | ||
| 480 | #endif | ||
| 471 | IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; | 481 | IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; |
| 472 | 482 | ||
| 473 | /* When frag_list is given, use it. First, check its validity: | 483 | /* When frag_list is given, use it. First, check its validity: |
| @@ -478,9 +488,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
| 478 | * we can switch to copy when see the first bad fragment. | 488 | * we can switch to copy when see the first bad fragment. |
| 479 | */ | 489 | */ |
| 480 | if (skb_has_frags(skb)) { | 490 | if (skb_has_frags(skb)) { |
| 481 | struct sk_buff *frag; | 491 | struct sk_buff *frag, *frag2; |
| 482 | int first_len = skb_pagelen(skb); | 492 | int first_len = skb_pagelen(skb); |
| 483 | int truesizes = 0; | ||
| 484 | 493 | ||
| 485 | if (first_len - hlen > mtu || | 494 | if (first_len - hlen > mtu || |
| 486 | ((first_len - hlen) & 7) || | 495 | ((first_len - hlen) & 7) || |
| @@ -493,18 +502,18 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
| 493 | if (frag->len > mtu || | 502 | if (frag->len > mtu || |
| 494 | ((frag->len & 7) && frag->next) || | 503 | ((frag->len & 7) && frag->next) || |
| 495 | skb_headroom(frag) < hlen) | 504 | skb_headroom(frag) < hlen) |
| 496 | goto slow_path; | 505 | goto slow_path_clean; |
| 497 | 506 | ||
| 498 | /* Partially cloned skb? */ | 507 | /* Partially cloned skb? */ |
| 499 | if (skb_shared(frag)) | 508 | if (skb_shared(frag)) |
| 500 | goto slow_path; | 509 | goto slow_path_clean; |
| 501 | 510 | ||
| 502 | BUG_ON(frag->sk); | 511 | BUG_ON(frag->sk); |
| 503 | if (skb->sk) { | 512 | if (skb->sk) { |
| 504 | frag->sk = skb->sk; | 513 | frag->sk = skb->sk; |
| 505 | frag->destructor = sock_wfree; | 514 | frag->destructor = sock_wfree; |
| 506 | } | 515 | } |
| 507 | truesizes += frag->truesize; | 516 | skb->truesize -= frag->truesize; |
| 508 | } | 517 | } |
| 509 | 518 | ||
| 510 | /* Everything is OK. Generate! */ | 519 | /* Everything is OK. Generate! */ |
| @@ -514,7 +523,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
| 514 | frag = skb_shinfo(skb)->frag_list; | 523 | frag = skb_shinfo(skb)->frag_list; |
| 515 | skb_frag_list_init(skb); | 524 | skb_frag_list_init(skb); |
| 516 | skb->data_len = first_len - skb_headlen(skb); | 525 | skb->data_len = first_len - skb_headlen(skb); |
| 517 | skb->truesize -= truesizes; | ||
| 518 | skb->len = first_len; | 526 | skb->len = first_len; |
| 519 | iph->tot_len = htons(first_len); | 527 | iph->tot_len = htons(first_len); |
| 520 | iph->frag_off = htons(IP_MF); | 528 | iph->frag_off = htons(IP_MF); |
| @@ -566,18 +574,25 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
| 566 | } | 574 | } |
| 567 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); | 575 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); |
| 568 | return err; | 576 | return err; |
| 577 | |||
| 578 | slow_path_clean: | ||
| 579 | skb_walk_frags(skb, frag2) { | ||
| 580 | if (frag2 == frag) | ||
| 581 | break; | ||
| 582 | frag2->sk = NULL; | ||
| 583 | frag2->destructor = NULL; | ||
| 584 | skb->truesize += frag2->truesize; | ||
| 585 | } | ||
| 569 | } | 586 | } |
| 570 | 587 | ||
| 571 | slow_path: | 588 | slow_path: |
| 572 | left = skb->len - hlen; /* Space per frame */ | 589 | left = skb->len - hlen; /* Space per frame */ |
| 573 | ptr = raw + hlen; /* Where to start from */ | 590 | ptr = hlen; /* Where to start from */ |
| 574 | 591 | ||
| 575 | /* for bridged IP traffic encapsulated inside f.e. a vlan header, | 592 | /* for bridged IP traffic encapsulated inside f.e. a vlan header, |
| 576 | * we need to make room for the encapsulating header | 593 | * we need to make room for the encapsulating header |
| 577 | */ | 594 | */ |
| 578 | pad = nf_bridge_pad(skb); | 595 | ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb)); |
| 579 | ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad); | ||
| 580 | mtu -= pad; | ||
| 581 | 596 | ||
| 582 | /* | 597 | /* |
| 583 | * Fragment the datagram. | 598 | * Fragment the datagram. |
| @@ -687,7 +702,6 @@ fail: | |||
| 687 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); | 702 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); |
| 688 | return err; | 703 | return err; |
| 689 | } | 704 | } |
| 690 | |||
| 691 | EXPORT_SYMBOL(ip_fragment); | 705 | EXPORT_SYMBOL(ip_fragment); |
| 692 | 706 | ||
| 693 | int | 707 | int |
| @@ -706,6 +720,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk | |||
| 706 | } | 720 | } |
| 707 | return 0; | 721 | return 0; |
| 708 | } | 722 | } |
| 723 | EXPORT_SYMBOL(ip_generic_getfrag); | ||
| 709 | 724 | ||
| 710 | static inline __wsum | 725 | static inline __wsum |
| 711 | csum_page(struct page *page, int offset, int copy) | 726 | csum_page(struct page *page, int offset, int copy) |
| @@ -823,13 +838,13 @@ int ip_append_data(struct sock *sk, | |||
| 823 | */ | 838 | */ |
| 824 | *rtp = NULL; | 839 | *rtp = NULL; |
| 825 | inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? | 840 | inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? |
| 826 | rt->u.dst.dev->mtu : | 841 | rt->dst.dev->mtu : |
| 827 | dst_mtu(rt->u.dst.path); | 842 | dst_mtu(rt->dst.path); |
| 828 | inet->cork.dst = &rt->u.dst; | 843 | inet->cork.dst = &rt->dst; |
| 829 | inet->cork.length = 0; | 844 | inet->cork.length = 0; |
| 830 | sk->sk_sndmsg_page = NULL; | 845 | sk->sk_sndmsg_page = NULL; |
| 831 | sk->sk_sndmsg_off = 0; | 846 | sk->sk_sndmsg_off = 0; |
| 832 | if ((exthdrlen = rt->u.dst.header_len) != 0) { | 847 | if ((exthdrlen = rt->dst.header_len) != 0) { |
| 833 | length += exthdrlen; | 848 | length += exthdrlen; |
| 834 | transhdrlen += exthdrlen; | 849 | transhdrlen += exthdrlen; |
| 835 | } | 850 | } |
| @@ -842,7 +857,7 @@ int ip_append_data(struct sock *sk, | |||
| 842 | exthdrlen = 0; | 857 | exthdrlen = 0; |
| 843 | mtu = inet->cork.fragsize; | 858 | mtu = inet->cork.fragsize; |
| 844 | } | 859 | } |
| 845 | hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); | 860 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); |
| 846 | 861 | ||
| 847 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); | 862 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); |
| 848 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; | 863 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; |
| @@ -859,14 +874,16 @@ int ip_append_data(struct sock *sk, | |||
| 859 | */ | 874 | */ |
| 860 | if (transhdrlen && | 875 | if (transhdrlen && |
| 861 | length + fragheaderlen <= mtu && | 876 | length + fragheaderlen <= mtu && |
| 862 | rt->u.dst.dev->features & NETIF_F_V4_CSUM && | 877 | rt->dst.dev->features & NETIF_F_V4_CSUM && |
| 863 | !exthdrlen) | 878 | !exthdrlen) |
| 864 | csummode = CHECKSUM_PARTIAL; | 879 | csummode = CHECKSUM_PARTIAL; |
| 865 | 880 | ||
| 881 | skb = skb_peek_tail(&sk->sk_write_queue); | ||
| 882 | |||
| 866 | inet->cork.length += length; | 883 | inet->cork.length += length; |
| 867 | if (((length> mtu) || !skb_queue_empty(&sk->sk_write_queue)) && | 884 | if (((length > mtu) || (skb && skb_is_gso(skb))) && |
| 868 | (sk->sk_protocol == IPPROTO_UDP) && | 885 | (sk->sk_protocol == IPPROTO_UDP) && |
| 869 | (rt->u.dst.dev->features & NETIF_F_UFO)) { | 886 | (rt->dst.dev->features & NETIF_F_UFO)) { |
| 870 | err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, | 887 | err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, |
| 871 | fragheaderlen, transhdrlen, mtu, | 888 | fragheaderlen, transhdrlen, mtu, |
| 872 | flags); | 889 | flags); |
| @@ -882,7 +899,7 @@ int ip_append_data(struct sock *sk, | |||
| 882 | * adding appropriate IP header. | 899 | * adding appropriate IP header. |
| 883 | */ | 900 | */ |
| 884 | 901 | ||
| 885 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) | 902 | if (!skb) |
| 886 | goto alloc_new_skb; | 903 | goto alloc_new_skb; |
| 887 | 904 | ||
| 888 | while (length > 0) { | 905 | while (length > 0) { |
| @@ -914,7 +931,7 @@ alloc_new_skb: | |||
| 914 | fraglen = datalen + fragheaderlen; | 931 | fraglen = datalen + fragheaderlen; |
| 915 | 932 | ||
| 916 | if ((flags & MSG_MORE) && | 933 | if ((flags & MSG_MORE) && |
| 917 | !(rt->u.dst.dev->features&NETIF_F_SG)) | 934 | !(rt->dst.dev->features&NETIF_F_SG)) |
| 918 | alloclen = mtu; | 935 | alloclen = mtu; |
| 919 | else | 936 | else |
| 920 | alloclen = datalen + fragheaderlen; | 937 | alloclen = datalen + fragheaderlen; |
| @@ -925,7 +942,7 @@ alloc_new_skb: | |||
| 925 | * the last. | 942 | * the last. |
| 926 | */ | 943 | */ |
| 927 | if (datalen == length + fraggap) | 944 | if (datalen == length + fraggap) |
| 928 | alloclen += rt->u.dst.trailer_len; | 945 | alloclen += rt->dst.trailer_len; |
| 929 | 946 | ||
| 930 | if (transhdrlen) { | 947 | if (transhdrlen) { |
| 931 | skb = sock_alloc_send_skb(sk, | 948 | skb = sock_alloc_send_skb(sk, |
| @@ -998,7 +1015,7 @@ alloc_new_skb: | |||
| 998 | if (copy > length) | 1015 | if (copy > length) |
| 999 | copy = length; | 1016 | copy = length; |
| 1000 | 1017 | ||
| 1001 | if (!(rt->u.dst.dev->features&NETIF_F_SG)) { | 1018 | if (!(rt->dst.dev->features&NETIF_F_SG)) { |
| 1002 | unsigned int off; | 1019 | unsigned int off; |
| 1003 | 1020 | ||
| 1004 | off = skb->len; | 1021 | off = skb->len; |
| @@ -1093,10 +1110,10 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, | |||
| 1093 | if (inet->cork.flags & IPCORK_OPT) | 1110 | if (inet->cork.flags & IPCORK_OPT) |
| 1094 | opt = inet->cork.opt; | 1111 | opt = inet->cork.opt; |
| 1095 | 1112 | ||
| 1096 | if (!(rt->u.dst.dev->features&NETIF_F_SG)) | 1113 | if (!(rt->dst.dev->features&NETIF_F_SG)) |
| 1097 | return -EOPNOTSUPP; | 1114 | return -EOPNOTSUPP; |
| 1098 | 1115 | ||
| 1099 | hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); | 1116 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); |
| 1100 | mtu = inet->cork.fragsize; | 1117 | mtu = inet->cork.fragsize; |
| 1101 | 1118 | ||
| 1102 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); | 1119 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); |
| @@ -1111,8 +1128,9 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, | |||
| 1111 | return -EINVAL; | 1128 | return -EINVAL; |
| 1112 | 1129 | ||
| 1113 | inet->cork.length += size; | 1130 | inet->cork.length += size; |
| 1114 | if ((sk->sk_protocol == IPPROTO_UDP) && | 1131 | if ((size + skb->len > mtu) && |
| 1115 | (rt->u.dst.dev->features & NETIF_F_UFO)) { | 1132 | (sk->sk_protocol == IPPROTO_UDP) && |
| 1133 | (rt->dst.dev->features & NETIF_F_UFO)) { | ||
| 1116 | skb_shinfo(skb)->gso_size = mtu - fragheaderlen; | 1134 | skb_shinfo(skb)->gso_size = mtu - fragheaderlen; |
| 1117 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; | 1135 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; |
| 1118 | } | 1136 | } |
| @@ -1264,8 +1282,8 @@ int ip_push_pending_frames(struct sock *sk) | |||
| 1264 | * If local_df is set too, we still allow to fragment this frame | 1282 | * If local_df is set too, we still allow to fragment this frame |
| 1265 | * locally. */ | 1283 | * locally. */ |
| 1266 | if (inet->pmtudisc >= IP_PMTUDISC_DO || | 1284 | if (inet->pmtudisc >= IP_PMTUDISC_DO || |
| 1267 | (skb->len <= dst_mtu(&rt->u.dst) && | 1285 | (skb->len <= dst_mtu(&rt->dst) && |
| 1268 | ip_dont_fragment(sk, &rt->u.dst))) | 1286 | ip_dont_fragment(sk, &rt->dst))) |
| 1269 | df = htons(IP_DF); | 1287 | df = htons(IP_DF); |
| 1270 | 1288 | ||
| 1271 | if (inet->cork.flags & IPCORK_OPT) | 1289 | if (inet->cork.flags & IPCORK_OPT) |
| @@ -1274,7 +1292,7 @@ int ip_push_pending_frames(struct sock *sk) | |||
| 1274 | if (rt->rt_type == RTN_MULTICAST) | 1292 | if (rt->rt_type == RTN_MULTICAST) |
| 1275 | ttl = inet->mc_ttl; | 1293 | ttl = inet->mc_ttl; |
| 1276 | else | 1294 | else |
| 1277 | ttl = ip_select_ttl(inet, &rt->u.dst); | 1295 | ttl = ip_select_ttl(inet, &rt->dst); |
| 1278 | 1296 | ||
| 1279 | iph = (struct iphdr *)skb->data; | 1297 | iph = (struct iphdr *)skb->data; |
| 1280 | iph->version = 4; | 1298 | iph->version = 4; |
| @@ -1285,7 +1303,7 @@ int ip_push_pending_frames(struct sock *sk) | |||
| 1285 | } | 1303 | } |
| 1286 | iph->tos = inet->tos; | 1304 | iph->tos = inet->tos; |
| 1287 | iph->frag_off = df; | 1305 | iph->frag_off = df; |
| 1288 | ip_select_ident(iph, &rt->u.dst, sk); | 1306 | ip_select_ident(iph, &rt->dst, sk); |
| 1289 | iph->ttl = ttl; | 1307 | iph->ttl = ttl; |
| 1290 | iph->protocol = sk->sk_protocol; | 1308 | iph->protocol = sk->sk_protocol; |
| 1291 | iph->saddr = rt->rt_src; | 1309 | iph->saddr = rt->rt_src; |
| @@ -1298,7 +1316,7 @@ int ip_push_pending_frames(struct sock *sk) | |||
| 1298 | * on dst refcount | 1316 | * on dst refcount |
| 1299 | */ | 1317 | */ |
| 1300 | inet->cork.dst = NULL; | 1318 | inet->cork.dst = NULL; |
| 1301 | skb_dst_set(skb, &rt->u.dst); | 1319 | skb_dst_set(skb, &rt->dst); |
| 1302 | 1320 | ||
| 1303 | if (iph->protocol == IPPROTO_ICMP) | 1321 | if (iph->protocol == IPPROTO_ICMP) |
| 1304 | icmp_out_count(net, ((struct icmphdr *) | 1322 | icmp_out_count(net, ((struct icmphdr *) |
| @@ -1435,7 +1453,3 @@ void __init ip_init(void) | |||
| 1435 | igmp_mc_proc_init(); | 1453 | igmp_mc_proc_init(); |
| 1436 | #endif | 1454 | #endif |
| 1437 | } | 1455 | } |
| 1438 | |||
| 1439 | EXPORT_SYMBOL(ip_generic_getfrag); | ||
| 1440 | EXPORT_SYMBOL(ip_queue_xmit); | ||
| 1441 | EXPORT_SYMBOL(ip_send_check); | ||
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index cafad9baff03..64b70ad162e3 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include <linux/icmp.h> | 23 | #include <linux/icmp.h> |
| 24 | #include <linux/inetdevice.h> | 24 | #include <linux/inetdevice.h> |
| 25 | #include <linux/netdevice.h> | 25 | #include <linux/netdevice.h> |
| 26 | #include <linux/slab.h> | ||
| 26 | #include <net/sock.h> | 27 | #include <net/sock.h> |
| 27 | #include <net/ip.h> | 28 | #include <net/ip.h> |
| 28 | #include <net/icmp.h> | 29 | #include <net/icmp.h> |
| @@ -238,7 +239,16 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) | |||
| 238 | sent to multicast group to reach destination designated router. | 239 | sent to multicast group to reach destination designated router. |
| 239 | */ | 240 | */ |
| 240 | struct ip_ra_chain *ip_ra_chain; | 241 | struct ip_ra_chain *ip_ra_chain; |
| 241 | DEFINE_RWLOCK(ip_ra_lock); | 242 | static DEFINE_SPINLOCK(ip_ra_lock); |
| 243 | |||
| 244 | |||
| 245 | static void ip_ra_destroy_rcu(struct rcu_head *head) | ||
| 246 | { | ||
| 247 | struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu); | ||
| 248 | |||
| 249 | sock_put(ra->saved_sk); | ||
| 250 | kfree(ra); | ||
| 251 | } | ||
| 242 | 252 | ||
| 243 | int ip_ra_control(struct sock *sk, unsigned char on, | 253 | int ip_ra_control(struct sock *sk, unsigned char on, |
| 244 | void (*destructor)(struct sock *)) | 254 | void (*destructor)(struct sock *)) |
| @@ -250,35 +260,42 @@ int ip_ra_control(struct sock *sk, unsigned char on, | |||
| 250 | 260 | ||
| 251 | new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; | 261 | new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; |
| 252 | 262 | ||
| 253 | write_lock_bh(&ip_ra_lock); | 263 | spin_lock_bh(&ip_ra_lock); |
| 254 | for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) { | 264 | for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) { |
| 255 | if (ra->sk == sk) { | 265 | if (ra->sk == sk) { |
| 256 | if (on) { | 266 | if (on) { |
| 257 | write_unlock_bh(&ip_ra_lock); | 267 | spin_unlock_bh(&ip_ra_lock); |
| 258 | kfree(new_ra); | 268 | kfree(new_ra); |
| 259 | return -EADDRINUSE; | 269 | return -EADDRINUSE; |
| 260 | } | 270 | } |
| 261 | *rap = ra->next; | 271 | /* dont let ip_call_ra_chain() use sk again */ |
| 262 | write_unlock_bh(&ip_ra_lock); | 272 | ra->sk = NULL; |
| 273 | rcu_assign_pointer(*rap, ra->next); | ||
| 274 | spin_unlock_bh(&ip_ra_lock); | ||
| 263 | 275 | ||
| 264 | if (ra->destructor) | 276 | if (ra->destructor) |
| 265 | ra->destructor(sk); | 277 | ra->destructor(sk); |
| 266 | sock_put(sk); | 278 | /* |
| 267 | kfree(ra); | 279 | * Delay sock_put(sk) and kfree(ra) after one rcu grace |
| 280 | * period. This guarantee ip_call_ra_chain() dont need | ||
| 281 | * to mess with socket refcounts. | ||
| 282 | */ | ||
| 283 | ra->saved_sk = sk; | ||
| 284 | call_rcu(&ra->rcu, ip_ra_destroy_rcu); | ||
| 268 | return 0; | 285 | return 0; |
| 269 | } | 286 | } |
| 270 | } | 287 | } |
| 271 | if (new_ra == NULL) { | 288 | if (new_ra == NULL) { |
| 272 | write_unlock_bh(&ip_ra_lock); | 289 | spin_unlock_bh(&ip_ra_lock); |
| 273 | return -ENOBUFS; | 290 | return -ENOBUFS; |
| 274 | } | 291 | } |
| 275 | new_ra->sk = sk; | 292 | new_ra->sk = sk; |
| 276 | new_ra->destructor = destructor; | 293 | new_ra->destructor = destructor; |
| 277 | 294 | ||
| 278 | new_ra->next = ra; | 295 | new_ra->next = ra; |
| 279 | *rap = new_ra; | 296 | rcu_assign_pointer(*rap, new_ra); |
| 280 | sock_hold(sk); | 297 | sock_hold(sk); |
| 281 | write_unlock_bh(&ip_ra_lock); | 298 | spin_unlock_bh(&ip_ra_lock); |
| 282 | 299 | ||
| 283 | return 0; | 300 | return 0; |
| 284 | } | 301 | } |
| @@ -286,12 +303,8 @@ int ip_ra_control(struct sock *sk, unsigned char on, | |||
| 286 | void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, | 303 | void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, |
| 287 | __be16 port, u32 info, u8 *payload) | 304 | __be16 port, u32 info, u8 *payload) |
| 288 | { | 305 | { |
| 289 | struct inet_sock *inet = inet_sk(sk); | ||
| 290 | struct sock_exterr_skb *serr; | 306 | struct sock_exterr_skb *serr; |
| 291 | 307 | ||
| 292 | if (!inet->recverr) | ||
| 293 | return; | ||
| 294 | |||
| 295 | skb = skb_clone(skb, GFP_ATOMIC); | 308 | skb = skb_clone(skb, GFP_ATOMIC); |
| 296 | if (!skb) | 309 | if (!skb) |
| 297 | return; | 310 | return; |
| @@ -451,7 +464,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
| 451 | (1<<IP_TTL) | (1<<IP_HDRINCL) | | 464 | (1<<IP_TTL) | (1<<IP_HDRINCL) | |
| 452 | (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | | 465 | (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | |
| 453 | (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | | 466 | (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | |
| 454 | (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT))) || | 467 | (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) | |
| 468 | (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) || | ||
| 455 | optname == IP_MULTICAST_TTL || | 469 | optname == IP_MULTICAST_TTL || |
| 456 | optname == IP_MULTICAST_ALL || | 470 | optname == IP_MULTICAST_ALL || |
| 457 | optname == IP_MULTICAST_LOOP || | 471 | optname == IP_MULTICAST_LOOP || |
| @@ -574,6 +588,13 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
| 574 | } | 588 | } |
| 575 | inet->hdrincl = val ? 1 : 0; | 589 | inet->hdrincl = val ? 1 : 0; |
| 576 | break; | 590 | break; |
| 591 | case IP_NODEFRAG: | ||
| 592 | if (sk->sk_type != SOCK_RAW) { | ||
| 593 | err = -ENOPROTOOPT; | ||
| 594 | break; | ||
| 595 | } | ||
| 596 | inet->nodefrag = val ? 1 : 0; | ||
| 597 | break; | ||
| 577 | case IP_MTU_DISCOVER: | 598 | case IP_MTU_DISCOVER: |
| 578 | if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE) | 599 | if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE) |
| 579 | goto e_inval; | 600 | goto e_inval; |
| @@ -936,6 +957,14 @@ mc_msf_out: | |||
| 936 | inet->transparent = !!val; | 957 | inet->transparent = !!val; |
| 937 | break; | 958 | break; |
| 938 | 959 | ||
| 960 | case IP_MINTTL: | ||
| 961 | if (optlen < 1) | ||
| 962 | goto e_inval; | ||
| 963 | if (val < 0 || val > 255) | ||
| 964 | goto e_inval; | ||
| 965 | inet->min_ttl = val; | ||
| 966 | break; | ||
| 967 | |||
| 939 | default: | 968 | default: |
| 940 | err = -ENOPROTOOPT; | 969 | err = -ENOPROTOOPT; |
| 941 | break; | 970 | break; |
| @@ -948,6 +977,22 @@ e_inval: | |||
| 948 | return -EINVAL; | 977 | return -EINVAL; |
| 949 | } | 978 | } |
| 950 | 979 | ||
| 980 | /** | ||
| 981 | * ip_queue_rcv_skb - Queue an skb into sock receive queue | ||
| 982 | * @sk: socket | ||
| 983 | * @skb: buffer | ||
| 984 | * | ||
| 985 | * Queues an skb into socket receive queue. If IP_CMSG_PKTINFO option | ||
| 986 | * is not set, we drop skb dst entry now, while dst cache line is hot. | ||
| 987 | */ | ||
| 988 | int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | ||
| 989 | { | ||
| 990 | if (!(inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO)) | ||
| 991 | skb_dst_drop(skb); | ||
| 992 | return sock_queue_rcv_skb(sk, skb); | ||
| 993 | } | ||
| 994 | EXPORT_SYMBOL(ip_queue_rcv_skb); | ||
| 995 | |||
| 951 | int ip_setsockopt(struct sock *sk, int level, | 996 | int ip_setsockopt(struct sock *sk, int level, |
| 952 | int optname, char __user *optval, unsigned int optlen) | 997 | int optname, char __user *optval, unsigned int optlen) |
| 953 | { | 998 | { |
| @@ -1084,6 +1129,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, | |||
| 1084 | case IP_HDRINCL: | 1129 | case IP_HDRINCL: |
| 1085 | val = inet->hdrincl; | 1130 | val = inet->hdrincl; |
| 1086 | break; | 1131 | break; |
| 1132 | case IP_NODEFRAG: | ||
| 1133 | val = inet->nodefrag; | ||
| 1134 | break; | ||
| 1087 | case IP_MTU_DISCOVER: | 1135 | case IP_MTU_DISCOVER: |
| 1088 | val = inet->pmtudisc; | 1136 | val = inet->pmtudisc; |
| 1089 | break; | 1137 | break; |
| @@ -1198,6 +1246,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, | |||
| 1198 | case IP_TRANSPARENT: | 1246 | case IP_TRANSPARENT: |
| 1199 | val = inet->transparent; | 1247 | val = inet->transparent; |
| 1200 | break; | 1248 | break; |
| 1249 | case IP_MINTTL: | ||
| 1250 | val = inet->min_ttl; | ||
| 1251 | break; | ||
| 1201 | default: | 1252 | default: |
| 1202 | release_sock(sk); | 1253 | release_sock(sk); |
| 1203 | return -ENOPROTOOPT; | 1254 | return -ENOPROTOOPT; |
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 38fbf04150ae..629067571f02 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | 25 | ||
| 26 | static void ipcomp4_err(struct sk_buff *skb, u32 info) | 26 | static void ipcomp4_err(struct sk_buff *skb, u32 info) |
| 27 | { | 27 | { |
| 28 | struct net *net = dev_net(skb->dev); | ||
| 28 | __be32 spi; | 29 | __be32 spi; |
| 29 | struct iphdr *iph = (struct iphdr *)skb->data; | 30 | struct iphdr *iph = (struct iphdr *)skb->data; |
| 30 | struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); | 31 | struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); |
| @@ -35,7 +36,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) | |||
| 35 | return; | 36 | return; |
| 36 | 37 | ||
| 37 | spi = htonl(ntohs(ipch->cpi)); | 38 | spi = htonl(ntohs(ipch->cpi)); |
| 38 | x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, | 39 | x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, |
| 39 | spi, IPPROTO_COMP, AF_INET); | 40 | spi, IPPROTO_COMP, AF_INET); |
| 40 | if (!x) | 41 | if (!x) |
| 41 | return; | 42 | return; |
| @@ -47,9 +48,10 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) | |||
| 47 | /* We always hold one tunnel user reference to indicate a tunnel */ | 48 | /* We always hold one tunnel user reference to indicate a tunnel */ |
| 48 | static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) | 49 | static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) |
| 49 | { | 50 | { |
| 51 | struct net *net = xs_net(x); | ||
| 50 | struct xfrm_state *t; | 52 | struct xfrm_state *t; |
| 51 | 53 | ||
| 52 | t = xfrm_state_alloc(&init_net); | 54 | t = xfrm_state_alloc(net); |
| 53 | if (t == NULL) | 55 | if (t == NULL) |
| 54 | goto out; | 56 | goto out; |
| 55 | 57 | ||
| @@ -61,6 +63,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) | |||
| 61 | t->props.mode = x->props.mode; | 63 | t->props.mode = x->props.mode; |
| 62 | t->props.saddr.a4 = x->props.saddr.a4; | 64 | t->props.saddr.a4 = x->props.saddr.a4; |
| 63 | t->props.flags = x->props.flags; | 65 | t->props.flags = x->props.flags; |
| 66 | memcpy(&t->mark, &x->mark, sizeof(t->mark)); | ||
| 64 | 67 | ||
| 65 | if (xfrm_init_state(t)) | 68 | if (xfrm_init_state(t)) |
| 66 | goto error; | 69 | goto error; |
| @@ -82,10 +85,12 @@ error: | |||
| 82 | */ | 85 | */ |
| 83 | static int ipcomp_tunnel_attach(struct xfrm_state *x) | 86 | static int ipcomp_tunnel_attach(struct xfrm_state *x) |
| 84 | { | 87 | { |
| 88 | struct net *net = xs_net(x); | ||
| 85 | int err = 0; | 89 | int err = 0; |
| 86 | struct xfrm_state *t; | 90 | struct xfrm_state *t; |
| 91 | u32 mark = x->mark.v & x->mark.m; | ||
| 87 | 92 | ||
| 88 | t = xfrm_state_lookup(&init_net, (xfrm_address_t *)&x->id.daddr.a4, | 93 | t = xfrm_state_lookup(net, mark, (xfrm_address_t *)&x->id.daddr.a4, |
| 89 | x->props.saddr.a4, IPPROTO_IPIP, AF_INET); | 94 | x->props.saddr.a4, IPPROTO_IPIP, AF_INET); |
| 90 | if (!t) { | 95 | if (!t) { |
| 91 | t = ipcomp_tunnel_create(x); | 96 | t = ipcomp_tunnel_create(x); |
| @@ -124,16 +129,12 @@ static int ipcomp4_init_state(struct xfrm_state *x) | |||
| 124 | if (x->props.mode == XFRM_MODE_TUNNEL) { | 129 | if (x->props.mode == XFRM_MODE_TUNNEL) { |
| 125 | err = ipcomp_tunnel_attach(x); | 130 | err = ipcomp_tunnel_attach(x); |
| 126 | if (err) | 131 | if (err) |
| 127 | goto error_tunnel; | 132 | goto out; |
| 128 | } | 133 | } |
| 129 | 134 | ||
| 130 | err = 0; | 135 | err = 0; |
| 131 | out: | 136 | out: |
| 132 | return err; | 137 | return err; |
| 133 | |||
| 134 | error_tunnel: | ||
| 135 | ipcomp_destroy(x); | ||
| 136 | goto out; | ||
| 137 | } | 138 | } |
| 138 | 139 | ||
| 139 | static const struct xfrm_type ipcomp_type = { | 140 | static const struct xfrm_type ipcomp_type = { |
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 10a6a604bf32..3a6e1ec5e9ae 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
| @@ -53,6 +53,7 @@ | |||
| 53 | #include <linux/root_dev.h> | 53 | #include <linux/root_dev.h> |
| 54 | #include <linux/delay.h> | 54 | #include <linux/delay.h> |
| 55 | #include <linux/nfs_fs.h> | 55 | #include <linux/nfs_fs.h> |
| 56 | #include <linux/slab.h> | ||
| 56 | #include <net/net_namespace.h> | 57 | #include <net/net_namespace.h> |
| 57 | #include <net/arp.h> | 58 | #include <net/arp.h> |
| 58 | #include <net/ip.h> | 59 | #include <net/ip.h> |
| @@ -187,6 +188,16 @@ struct ic_device { | |||
| 187 | static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */ | 188 | static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */ |
| 188 | static struct net_device *ic_dev __initdata = NULL; /* Selected device */ | 189 | static struct net_device *ic_dev __initdata = NULL; /* Selected device */ |
| 189 | 190 | ||
| 191 | static bool __init ic_device_match(struct net_device *dev) | ||
| 192 | { | ||
| 193 | if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : | ||
| 194 | (!(dev->flags & IFF_LOOPBACK) && | ||
| 195 | (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) && | ||
| 196 | strncmp(dev->name, "dummy", 5))) | ||
| 197 | return true; | ||
| 198 | return false; | ||
| 199 | } | ||
| 200 | |||
| 190 | static int __init ic_open_devs(void) | 201 | static int __init ic_open_devs(void) |
| 191 | { | 202 | { |
| 192 | struct ic_device *d, **last; | 203 | struct ic_device *d, **last; |
| @@ -207,10 +218,7 @@ static int __init ic_open_devs(void) | |||
| 207 | for_each_netdev(&init_net, dev) { | 218 | for_each_netdev(&init_net, dev) { |
| 208 | if (dev->flags & IFF_LOOPBACK) | 219 | if (dev->flags & IFF_LOOPBACK) |
| 209 | continue; | 220 | continue; |
| 210 | if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : | 221 | if (ic_device_match(dev)) { |
| 211 | (!(dev->flags & IFF_LOOPBACK) && | ||
| 212 | (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) && | ||
| 213 | strncmp(dev->name, "dummy", 5))) { | ||
| 214 | int able = 0; | 222 | int able = 0; |
| 215 | if (dev->mtu >= 364) | 223 | if (dev->mtu >= 364) |
| 216 | able |= IC_BOOTP; | 224 | able |= IC_BOOTP; |
| @@ -228,7 +236,7 @@ static int __init ic_open_devs(void) | |||
| 228 | } | 236 | } |
| 229 | if (!(d = kmalloc(sizeof(struct ic_device), GFP_KERNEL))) { | 237 | if (!(d = kmalloc(sizeof(struct ic_device), GFP_KERNEL))) { |
| 230 | rtnl_unlock(); | 238 | rtnl_unlock(); |
| 231 | return -1; | 239 | return -ENOMEM; |
| 232 | } | 240 | } |
| 233 | d->dev = dev; | 241 | d->dev = dev; |
| 234 | *last = d; | 242 | *last = d; |
| @@ -253,7 +261,7 @@ static int __init ic_open_devs(void) | |||
| 253 | printk(KERN_ERR "IP-Config: Device `%s' not found.\n", user_dev_name); | 261 | printk(KERN_ERR "IP-Config: Device `%s' not found.\n", user_dev_name); |
| 254 | else | 262 | else |
| 255 | printk(KERN_ERR "IP-Config: No network devices available.\n"); | 263 | printk(KERN_ERR "IP-Config: No network devices available.\n"); |
| 256 | return -1; | 264 | return -ENODEV; |
| 257 | } | 265 | } |
| 258 | return 0; | 266 | return 0; |
| 259 | } | 267 | } |
| @@ -657,6 +665,13 @@ ic_dhcp_init_options(u8 *options) | |||
| 657 | memcpy(e, ic_req_params, sizeof(ic_req_params)); | 665 | memcpy(e, ic_req_params, sizeof(ic_req_params)); |
| 658 | e += sizeof(ic_req_params); | 666 | e += sizeof(ic_req_params); |
| 659 | 667 | ||
| 668 | if (ic_host_name_set) { | ||
| 669 | *e++ = 12; /* host-name */ | ||
| 670 | len = strlen(utsname()->nodename); | ||
| 671 | *e++ = len; | ||
| 672 | memcpy(e, utsname()->nodename, len); | ||
| 673 | e += len; | ||
| 674 | } | ||
| 660 | if (*vendor_class_identifier) { | 675 | if (*vendor_class_identifier) { |
| 661 | printk(KERN_INFO "DHCP: sending class identifier \"%s\"\n", | 676 | printk(KERN_INFO "DHCP: sending class identifier \"%s\"\n", |
| 662 | vendor_class_identifier); | 677 | vendor_class_identifier); |
| @@ -968,7 +983,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str | |||
| 968 | /* Is it a reply for the device we are configuring? */ | 983 | /* Is it a reply for the device we are configuring? */ |
| 969 | if (b->xid != ic_dev_xid) { | 984 | if (b->xid != ic_dev_xid) { |
| 970 | if (net_ratelimit()) | 985 | if (net_ratelimit()) |
| 971 | printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet \n"); | 986 | printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet\n"); |
| 972 | goto drop_unlock; | 987 | goto drop_unlock; |
| 973 | } | 988 | } |
| 974 | 989 | ||
| @@ -1303,6 +1318,32 @@ __be32 __init root_nfs_parse_addr(char *name) | |||
| 1303 | return addr; | 1318 | return addr; |
| 1304 | } | 1319 | } |
| 1305 | 1320 | ||
| 1321 | #define DEVICE_WAIT_MAX 12 /* 12 seconds */ | ||
| 1322 | |||
| 1323 | static int __init wait_for_devices(void) | ||
| 1324 | { | ||
| 1325 | int i; | ||
| 1326 | |||
| 1327 | msleep(CONF_PRE_OPEN); | ||
| 1328 | for (i = 0; i < DEVICE_WAIT_MAX; i++) { | ||
| 1329 | struct net_device *dev; | ||
| 1330 | int found = 0; | ||
| 1331 | |||
| 1332 | rtnl_lock(); | ||
| 1333 | for_each_netdev(&init_net, dev) { | ||
| 1334 | if (ic_device_match(dev)) { | ||
| 1335 | found = 1; | ||
| 1336 | break; | ||
| 1337 | } | ||
| 1338 | } | ||
| 1339 | rtnl_unlock(); | ||
| 1340 | if (found) | ||
| 1341 | return 0; | ||
| 1342 | ssleep(1); | ||
| 1343 | } | ||
| 1344 | return -ENODEV; | ||
| 1345 | } | ||
| 1346 | |||
| 1306 | /* | 1347 | /* |
| 1307 | * IP Autoconfig dispatcher. | 1348 | * IP Autoconfig dispatcher. |
| 1308 | */ | 1349 | */ |
| @@ -1313,6 +1354,7 @@ static int __init ip_auto_config(void) | |||
| 1313 | #ifdef IPCONFIG_DYNAMIC | 1354 | #ifdef IPCONFIG_DYNAMIC |
| 1314 | int retries = CONF_OPEN_RETRIES; | 1355 | int retries = CONF_OPEN_RETRIES; |
| 1315 | #endif | 1356 | #endif |
| 1357 | int err; | ||
| 1316 | 1358 | ||
| 1317 | #ifdef CONFIG_PROC_FS | 1359 | #ifdef CONFIG_PROC_FS |
| 1318 | proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); | 1360 | proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); |
| @@ -1325,12 +1367,15 @@ static int __init ip_auto_config(void) | |||
| 1325 | #ifdef IPCONFIG_DYNAMIC | 1367 | #ifdef IPCONFIG_DYNAMIC |
| 1326 | try_try_again: | 1368 | try_try_again: |
| 1327 | #endif | 1369 | #endif |
| 1328 | /* Give hardware a chance to settle */ | 1370 | /* Wait for devices to appear */ |
| 1329 | msleep(CONF_PRE_OPEN); | 1371 | err = wait_for_devices(); |
| 1372 | if (err) | ||
| 1373 | return err; | ||
| 1330 | 1374 | ||
| 1331 | /* Setup all network devices */ | 1375 | /* Setup all network devices */ |
| 1332 | if (ic_open_devs() < 0) | 1376 | err = ic_open_devs(); |
| 1333 | return -1; | 1377 | if (err) |
| 1378 | return err; | ||
| 1334 | 1379 | ||
| 1335 | /* Give drivers a chance to settle */ | 1380 | /* Give drivers a chance to settle */ |
| 1336 | ssleep(CONF_POST_OPEN); | 1381 | ssleep(CONF_POST_OPEN); |
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index eda04fed3379..ec036731a70b 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
| @@ -95,6 +95,7 @@ | |||
| 95 | #include <linux/module.h> | 95 | #include <linux/module.h> |
| 96 | #include <linux/types.h> | 96 | #include <linux/types.h> |
| 97 | #include <linux/kernel.h> | 97 | #include <linux/kernel.h> |
| 98 | #include <linux/slab.h> | ||
| 98 | #include <asm/uaccess.h> | 99 | #include <asm/uaccess.h> |
| 99 | #include <linux/skbuff.h> | 100 | #include <linux/skbuff.h> |
| 100 | #include <linux/netdevice.h> | 101 | #include <linux/netdevice.h> |
| @@ -130,7 +131,6 @@ struct ipip_net { | |||
| 130 | struct net_device *fb_tunnel_dev; | 131 | struct net_device *fb_tunnel_dev; |
| 131 | }; | 132 | }; |
| 132 | 133 | ||
| 133 | static void ipip_fb_tunnel_init(struct net_device *dev); | ||
| 134 | static void ipip_tunnel_init(struct net_device *dev); | 134 | static void ipip_tunnel_init(struct net_device *dev); |
| 135 | static void ipip_tunnel_setup(struct net_device *dev); | 135 | static void ipip_tunnel_setup(struct net_device *dev); |
| 136 | 136 | ||
| @@ -374,11 +374,8 @@ static int ipip_rcv(struct sk_buff *skb) | |||
| 374 | skb->protocol = htons(ETH_P_IP); | 374 | skb->protocol = htons(ETH_P_IP); |
| 375 | skb->pkt_type = PACKET_HOST; | 375 | skb->pkt_type = PACKET_HOST; |
| 376 | 376 | ||
| 377 | tunnel->dev->stats.rx_packets++; | 377 | skb_tunnel_rx(skb, tunnel->dev); |
| 378 | tunnel->dev->stats.rx_bytes += skb->len; | 378 | |
| 379 | skb->dev = tunnel->dev; | ||
| 380 | skb_dst_drop(skb); | ||
| 381 | nf_reset(skb); | ||
| 382 | ipip_ecn_decapsulate(iph, skb); | 379 | ipip_ecn_decapsulate(iph, skb); |
| 383 | netif_rx(skb); | 380 | netif_rx(skb); |
| 384 | rcu_read_unlock(); | 381 | rcu_read_unlock(); |
| @@ -438,7 +435,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
| 438 | goto tx_error_icmp; | 435 | goto tx_error_icmp; |
| 439 | } | 436 | } |
| 440 | } | 437 | } |
| 441 | tdev = rt->u.dst.dev; | 438 | tdev = rt->dst.dev; |
| 442 | 439 | ||
| 443 | if (tdev == dev) { | 440 | if (tdev == dev) { |
| 444 | ip_rt_put(rt); | 441 | ip_rt_put(rt); |
| @@ -449,7 +446,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
| 449 | df |= old_iph->frag_off & htons(IP_DF); | 446 | df |= old_iph->frag_off & htons(IP_DF); |
| 450 | 447 | ||
| 451 | if (df) { | 448 | if (df) { |
| 452 | mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); | 449 | mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); |
| 453 | 450 | ||
| 454 | if (mtu < 68) { | 451 | if (mtu < 68) { |
| 455 | stats->collisions++; | 452 | stats->collisions++; |
| @@ -506,7 +503,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
| 506 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | | 503 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | |
| 507 | IPSKB_REROUTED); | 504 | IPSKB_REROUTED); |
| 508 | skb_dst_drop(skb); | 505 | skb_dst_drop(skb); |
| 509 | skb_dst_set(skb, &rt->u.dst); | 506 | skb_dst_set(skb, &rt->dst); |
| 510 | 507 | ||
| 511 | /* | 508 | /* |
| 512 | * Push down and install the IPIP header. | 509 | * Push down and install the IPIP header. |
| @@ -555,7 +552,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) | |||
| 555 | .proto = IPPROTO_IPIP }; | 552 | .proto = IPPROTO_IPIP }; |
| 556 | struct rtable *rt; | 553 | struct rtable *rt; |
| 557 | if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { | 554 | if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { |
| 558 | tdev = rt->u.dst.dev; | 555 | tdev = rt->dst.dev; |
| 559 | ip_rt_put(rt); | 556 | ip_rt_put(rt); |
| 560 | } | 557 | } |
| 561 | dev->flags |= IFF_POINTOPOINT; | 558 | dev->flags |= IFF_POINTOPOINT; |
| @@ -730,7 +727,7 @@ static void ipip_tunnel_init(struct net_device *dev) | |||
| 730 | ipip_tunnel_bind_dev(dev); | 727 | ipip_tunnel_bind_dev(dev); |
| 731 | } | 728 | } |
| 732 | 729 | ||
| 733 | static void ipip_fb_tunnel_init(struct net_device *dev) | 730 | static void __net_init ipip_fb_tunnel_init(struct net_device *dev) |
| 734 | { | 731 | { |
| 735 | struct ip_tunnel *tunnel = netdev_priv(dev); | 732 | struct ip_tunnel *tunnel = netdev_priv(dev); |
| 736 | struct iphdr *iph = &tunnel->parms.iph; | 733 | struct iphdr *iph = &tunnel->parms.iph; |
| @@ -773,7 +770,7 @@ static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head) | |||
| 773 | } | 770 | } |
| 774 | } | 771 | } |
| 775 | 772 | ||
| 776 | static int ipip_init_net(struct net *net) | 773 | static int __net_init ipip_init_net(struct net *net) |
| 777 | { | 774 | { |
| 778 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | 775 | struct ipip_net *ipn = net_generic(net, ipip_net_id); |
| 779 | int err; | 776 | int err; |
| @@ -806,7 +803,7 @@ err_alloc_dev: | |||
| 806 | return err; | 803 | return err; |
| 807 | } | 804 | } |
| 808 | 805 | ||
| 809 | static void ipip_exit_net(struct net *net) | 806 | static void __net_exit ipip_exit_net(struct net *net) |
| 810 | { | 807 | { |
| 811 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | 808 | struct ipip_net *ipn = net_generic(net, ipip_net_id); |
| 812 | LIST_HEAD(list); | 809 | LIST_HEAD(list); |
| @@ -831,15 +828,14 @@ static int __init ipip_init(void) | |||
| 831 | 828 | ||
| 832 | printk(banner); | 829 | printk(banner); |
| 833 | 830 | ||
| 834 | if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) { | 831 | err = register_pernet_device(&ipip_net_ops); |
| 832 | if (err < 0) | ||
| 833 | return err; | ||
| 834 | err = xfrm4_tunnel_register(&ipip_handler, AF_INET); | ||
| 835 | if (err < 0) { | ||
| 836 | unregister_pernet_device(&ipip_net_ops); | ||
| 835 | printk(KERN_INFO "ipip init: can't register tunnel\n"); | 837 | printk(KERN_INFO "ipip init: can't register tunnel\n"); |
| 836 | return -EAGAIN; | ||
| 837 | } | 838 | } |
| 838 | |||
| 839 | err = register_pernet_device(&ipip_net_ops); | ||
| 840 | if (err) | ||
| 841 | xfrm4_tunnel_deregister(&ipip_handler, AF_INET); | ||
| 842 | |||
| 843 | return err; | 839 | return err; |
| 844 | } | 840 | } |
| 845 | 841 | ||
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 54596f73eff5..179fcab866fc 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | * overflow. | 22 | * overflow. |
| 23 | * Carlos Picoto : PIMv1 Support | 23 | * Carlos Picoto : PIMv1 Support |
| 24 | * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header | 24 | * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header |
| 25 | * Relax this requrement to work with older peers. | 25 | * Relax this requirement to work with older peers. |
| 26 | * | 26 | * |
| 27 | */ | 27 | */ |
| 28 | 28 | ||
| @@ -47,6 +47,7 @@ | |||
| 47 | #include <linux/mroute.h> | 47 | #include <linux/mroute.h> |
| 48 | #include <linux/init.h> | 48 | #include <linux/init.h> |
| 49 | #include <linux/if_ether.h> | 49 | #include <linux/if_ether.h> |
| 50 | #include <linux/slab.h> | ||
| 50 | #include <net/net_namespace.h> | 51 | #include <net/net_namespace.h> |
| 51 | #include <net/ip.h> | 52 | #include <net/ip.h> |
| 52 | #include <net/protocol.h> | 53 | #include <net/protocol.h> |
| @@ -62,11 +63,40 @@ | |||
| 62 | #include <net/ipip.h> | 63 | #include <net/ipip.h> |
| 63 | #include <net/checksum.h> | 64 | #include <net/checksum.h> |
| 64 | #include <net/netlink.h> | 65 | #include <net/netlink.h> |
| 66 | #include <net/fib_rules.h> | ||
| 65 | 67 | ||
| 66 | #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) | 68 | #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) |
| 67 | #define CONFIG_IP_PIMSM 1 | 69 | #define CONFIG_IP_PIMSM 1 |
| 68 | #endif | 70 | #endif |
| 69 | 71 | ||
| 72 | struct mr_table { | ||
| 73 | struct list_head list; | ||
| 74 | #ifdef CONFIG_NET_NS | ||
| 75 | struct net *net; | ||
| 76 | #endif | ||
| 77 | u32 id; | ||
| 78 | struct sock *mroute_sk; | ||
| 79 | struct timer_list ipmr_expire_timer; | ||
| 80 | struct list_head mfc_unres_queue; | ||
| 81 | struct list_head mfc_cache_array[MFC_LINES]; | ||
| 82 | struct vif_device vif_table[MAXVIFS]; | ||
| 83 | int maxvif; | ||
| 84 | atomic_t cache_resolve_queue_len; | ||
| 85 | int mroute_do_assert; | ||
| 86 | int mroute_do_pim; | ||
| 87 | #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) | ||
| 88 | int mroute_reg_vif_num; | ||
| 89 | #endif | ||
| 90 | }; | ||
| 91 | |||
| 92 | struct ipmr_rule { | ||
| 93 | struct fib_rule common; | ||
| 94 | }; | ||
| 95 | |||
| 96 | struct ipmr_result { | ||
| 97 | struct mr_table *mrt; | ||
| 98 | }; | ||
| 99 | |||
| 70 | /* Big lock, protecting vif table, mrt cache and mroute socket state. | 100 | /* Big lock, protecting vif table, mrt cache and mroute socket state. |
| 71 | Note that the changes are semaphored via rtnl_lock. | 101 | Note that the changes are semaphored via rtnl_lock. |
| 72 | */ | 102 | */ |
| @@ -77,9 +107,7 @@ static DEFINE_RWLOCK(mrt_lock); | |||
| 77 | * Multicast router control variables | 107 | * Multicast router control variables |
| 78 | */ | 108 | */ |
| 79 | 109 | ||
| 80 | #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL) | 110 | #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) |
| 81 | |||
| 82 | static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */ | ||
| 83 | 111 | ||
| 84 | /* Special spinlock for queue of unresolved entries */ | 112 | /* Special spinlock for queue of unresolved entries */ |
| 85 | static DEFINE_SPINLOCK(mfc_unres_lock); | 113 | static DEFINE_SPINLOCK(mfc_unres_lock); |
| @@ -94,12 +122,217 @@ static DEFINE_SPINLOCK(mfc_unres_lock); | |||
| 94 | 122 | ||
| 95 | static struct kmem_cache *mrt_cachep __read_mostly; | 123 | static struct kmem_cache *mrt_cachep __read_mostly; |
| 96 | 124 | ||
| 97 | static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); | 125 | static struct mr_table *ipmr_new_table(struct net *net, u32 id); |
| 98 | static int ipmr_cache_report(struct net *net, | 126 | static int ip_mr_forward(struct net *net, struct mr_table *mrt, |
| 127 | struct sk_buff *skb, struct mfc_cache *cache, | ||
| 128 | int local); | ||
| 129 | static int ipmr_cache_report(struct mr_table *mrt, | ||
| 99 | struct sk_buff *pkt, vifi_t vifi, int assert); | 130 | struct sk_buff *pkt, vifi_t vifi, int assert); |
| 100 | static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); | 131 | static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, |
| 132 | struct mfc_cache *c, struct rtmsg *rtm); | ||
| 133 | static void ipmr_expire_process(unsigned long arg); | ||
| 134 | |||
| 135 | #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES | ||
| 136 | #define ipmr_for_each_table(mrt, net) \ | ||
| 137 | list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) | ||
| 138 | |||
| 139 | static struct mr_table *ipmr_get_table(struct net *net, u32 id) | ||
| 140 | { | ||
| 141 | struct mr_table *mrt; | ||
| 142 | |||
| 143 | ipmr_for_each_table(mrt, net) { | ||
| 144 | if (mrt->id == id) | ||
| 145 | return mrt; | ||
| 146 | } | ||
| 147 | return NULL; | ||
| 148 | } | ||
| 149 | |||
| 150 | static int ipmr_fib_lookup(struct net *net, struct flowi *flp, | ||
| 151 | struct mr_table **mrt) | ||
| 152 | { | ||
| 153 | struct ipmr_result res; | ||
| 154 | struct fib_lookup_arg arg = { .result = &res, }; | ||
| 155 | int err; | ||
| 156 | |||
| 157 | err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg); | ||
| 158 | if (err < 0) | ||
| 159 | return err; | ||
| 160 | *mrt = res.mrt; | ||
| 161 | return 0; | ||
| 162 | } | ||
| 163 | |||
| 164 | static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, | ||
| 165 | int flags, struct fib_lookup_arg *arg) | ||
| 166 | { | ||
| 167 | struct ipmr_result *res = arg->result; | ||
| 168 | struct mr_table *mrt; | ||
| 169 | |||
| 170 | switch (rule->action) { | ||
| 171 | case FR_ACT_TO_TBL: | ||
| 172 | break; | ||
| 173 | case FR_ACT_UNREACHABLE: | ||
| 174 | return -ENETUNREACH; | ||
| 175 | case FR_ACT_PROHIBIT: | ||
| 176 | return -EACCES; | ||
| 177 | case FR_ACT_BLACKHOLE: | ||
| 178 | default: | ||
| 179 | return -EINVAL; | ||
| 180 | } | ||
| 181 | |||
| 182 | mrt = ipmr_get_table(rule->fr_net, rule->table); | ||
| 183 | if (mrt == NULL) | ||
| 184 | return -EAGAIN; | ||
| 185 | res->mrt = mrt; | ||
| 186 | return 0; | ||
| 187 | } | ||
| 188 | |||
| 189 | static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) | ||
| 190 | { | ||
| 191 | return 1; | ||
| 192 | } | ||
| 193 | |||
| 194 | static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = { | ||
| 195 | FRA_GENERIC_POLICY, | ||
| 196 | }; | ||
| 197 | |||
| 198 | static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, | ||
| 199 | struct fib_rule_hdr *frh, struct nlattr **tb) | ||
| 200 | { | ||
| 201 | return 0; | ||
| 202 | } | ||
| 203 | |||
| 204 | static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, | ||
| 205 | struct nlattr **tb) | ||
| 206 | { | ||
| 207 | return 1; | ||
| 208 | } | ||
| 209 | |||
| 210 | static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, | ||
| 211 | struct fib_rule_hdr *frh) | ||
| 212 | { | ||
| 213 | frh->dst_len = 0; | ||
| 214 | frh->src_len = 0; | ||
| 215 | frh->tos = 0; | ||
| 216 | return 0; | ||
| 217 | } | ||
| 101 | 218 | ||
| 102 | static struct timer_list ipmr_expire_timer; | 219 | static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = { |
| 220 | .family = RTNL_FAMILY_IPMR, | ||
| 221 | .rule_size = sizeof(struct ipmr_rule), | ||
| 222 | .addr_size = sizeof(u32), | ||
| 223 | .action = ipmr_rule_action, | ||
| 224 | .match = ipmr_rule_match, | ||
| 225 | .configure = ipmr_rule_configure, | ||
| 226 | .compare = ipmr_rule_compare, | ||
| 227 | .default_pref = fib_default_rule_pref, | ||
| 228 | .fill = ipmr_rule_fill, | ||
| 229 | .nlgroup = RTNLGRP_IPV4_RULE, | ||
| 230 | .policy = ipmr_rule_policy, | ||
| 231 | .owner = THIS_MODULE, | ||
| 232 | }; | ||
| 233 | |||
| 234 | static int __net_init ipmr_rules_init(struct net *net) | ||
| 235 | { | ||
| 236 | struct fib_rules_ops *ops; | ||
| 237 | struct mr_table *mrt; | ||
| 238 | int err; | ||
| 239 | |||
| 240 | ops = fib_rules_register(&ipmr_rules_ops_template, net); | ||
| 241 | if (IS_ERR(ops)) | ||
| 242 | return PTR_ERR(ops); | ||
| 243 | |||
| 244 | INIT_LIST_HEAD(&net->ipv4.mr_tables); | ||
| 245 | |||
| 246 | mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); | ||
| 247 | if (mrt == NULL) { | ||
| 248 | err = -ENOMEM; | ||
| 249 | goto err1; | ||
| 250 | } | ||
| 251 | |||
| 252 | err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0); | ||
| 253 | if (err < 0) | ||
| 254 | goto err2; | ||
| 255 | |||
| 256 | net->ipv4.mr_rules_ops = ops; | ||
| 257 | return 0; | ||
| 258 | |||
| 259 | err2: | ||
| 260 | kfree(mrt); | ||
| 261 | err1: | ||
| 262 | fib_rules_unregister(ops); | ||
| 263 | return err; | ||
| 264 | } | ||
| 265 | |||
| 266 | static void __net_exit ipmr_rules_exit(struct net *net) | ||
| 267 | { | ||
| 268 | struct mr_table *mrt, *next; | ||
| 269 | |||
| 270 | list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { | ||
| 271 | list_del(&mrt->list); | ||
| 272 | kfree(mrt); | ||
| 273 | } | ||
| 274 | fib_rules_unregister(net->ipv4.mr_rules_ops); | ||
| 275 | } | ||
| 276 | #else | ||
| 277 | #define ipmr_for_each_table(mrt, net) \ | ||
| 278 | for (mrt = net->ipv4.mrt; mrt; mrt = NULL) | ||
| 279 | |||
| 280 | static struct mr_table *ipmr_get_table(struct net *net, u32 id) | ||
| 281 | { | ||
| 282 | return net->ipv4.mrt; | ||
| 283 | } | ||
| 284 | |||
| 285 | static int ipmr_fib_lookup(struct net *net, struct flowi *flp, | ||
| 286 | struct mr_table **mrt) | ||
| 287 | { | ||
| 288 | *mrt = net->ipv4.mrt; | ||
| 289 | return 0; | ||
| 290 | } | ||
| 291 | |||
| 292 | static int __net_init ipmr_rules_init(struct net *net) | ||
| 293 | { | ||
| 294 | net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); | ||
| 295 | return net->ipv4.mrt ? 0 : -ENOMEM; | ||
| 296 | } | ||
| 297 | |||
| 298 | static void __net_exit ipmr_rules_exit(struct net *net) | ||
| 299 | { | ||
| 300 | kfree(net->ipv4.mrt); | ||
| 301 | } | ||
| 302 | #endif | ||
| 303 | |||
| 304 | static struct mr_table *ipmr_new_table(struct net *net, u32 id) | ||
| 305 | { | ||
| 306 | struct mr_table *mrt; | ||
| 307 | unsigned int i; | ||
| 308 | |||
| 309 | mrt = ipmr_get_table(net, id); | ||
| 310 | if (mrt != NULL) | ||
| 311 | return mrt; | ||
| 312 | |||
| 313 | mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); | ||
| 314 | if (mrt == NULL) | ||
| 315 | return NULL; | ||
| 316 | write_pnet(&mrt->net, net); | ||
| 317 | mrt->id = id; | ||
| 318 | |||
| 319 | /* Forwarding cache */ | ||
| 320 | for (i = 0; i < MFC_LINES; i++) | ||
| 321 | INIT_LIST_HEAD(&mrt->mfc_cache_array[i]); | ||
| 322 | |||
| 323 | INIT_LIST_HEAD(&mrt->mfc_unres_queue); | ||
| 324 | |||
| 325 | setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, | ||
| 326 | (unsigned long)mrt); | ||
| 327 | |||
| 328 | #ifdef CONFIG_IP_PIMSM | ||
| 329 | mrt->mroute_reg_vif_num = -1; | ||
| 330 | #endif | ||
| 331 | #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES | ||
| 332 | list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); | ||
| 333 | #endif | ||
| 334 | return mrt; | ||
| 335 | } | ||
| 103 | 336 | ||
| 104 | /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ | 337 | /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ |
| 105 | 338 | ||
| @@ -200,12 +433,24 @@ failure: | |||
| 200 | static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) | 433 | static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) |
| 201 | { | 434 | { |
| 202 | struct net *net = dev_net(dev); | 435 | struct net *net = dev_net(dev); |
| 436 | struct mr_table *mrt; | ||
| 437 | struct flowi fl = { | ||
| 438 | .oif = dev->ifindex, | ||
| 439 | .iif = skb->skb_iif, | ||
| 440 | .mark = skb->mark, | ||
| 441 | }; | ||
| 442 | int err; | ||
| 443 | |||
| 444 | err = ipmr_fib_lookup(net, &fl, &mrt); | ||
| 445 | if (err < 0) { | ||
| 446 | kfree_skb(skb); | ||
| 447 | return err; | ||
| 448 | } | ||
| 203 | 449 | ||
| 204 | read_lock(&mrt_lock); | 450 | read_lock(&mrt_lock); |
| 205 | dev->stats.tx_bytes += skb->len; | 451 | dev->stats.tx_bytes += skb->len; |
| 206 | dev->stats.tx_packets++; | 452 | dev->stats.tx_packets++; |
| 207 | ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num, | 453 | ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT); |
| 208 | IGMPMSG_WHOLEPKT); | ||
| 209 | read_unlock(&mrt_lock); | 454 | read_unlock(&mrt_lock); |
| 210 | kfree_skb(skb); | 455 | kfree_skb(skb); |
| 211 | return NETDEV_TX_OK; | 456 | return NETDEV_TX_OK; |
| @@ -225,12 +470,18 @@ static void reg_vif_setup(struct net_device *dev) | |||
| 225 | dev->features |= NETIF_F_NETNS_LOCAL; | 470 | dev->features |= NETIF_F_NETNS_LOCAL; |
| 226 | } | 471 | } |
| 227 | 472 | ||
| 228 | static struct net_device *ipmr_reg_vif(struct net *net) | 473 | static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) |
| 229 | { | 474 | { |
| 230 | struct net_device *dev; | 475 | struct net_device *dev; |
| 231 | struct in_device *in_dev; | 476 | struct in_device *in_dev; |
| 477 | char name[IFNAMSIZ]; | ||
| 478 | |||
| 479 | if (mrt->id == RT_TABLE_DEFAULT) | ||
| 480 | sprintf(name, "pimreg"); | ||
| 481 | else | ||
| 482 | sprintf(name, "pimreg%u", mrt->id); | ||
| 232 | 483 | ||
| 233 | dev = alloc_netdev(0, "pimreg", reg_vif_setup); | 484 | dev = alloc_netdev(0, name, reg_vif_setup); |
| 234 | 485 | ||
| 235 | if (dev == NULL) | 486 | if (dev == NULL) |
| 236 | return NULL; | 487 | return NULL; |
| @@ -275,17 +526,17 @@ failure: | |||
| 275 | * @notify: Set to 1, if the caller is a notifier_call | 526 | * @notify: Set to 1, if the caller is a notifier_call |
| 276 | */ | 527 | */ |
| 277 | 528 | ||
| 278 | static int vif_delete(struct net *net, int vifi, int notify, | 529 | static int vif_delete(struct mr_table *mrt, int vifi, int notify, |
| 279 | struct list_head *head) | 530 | struct list_head *head) |
| 280 | { | 531 | { |
| 281 | struct vif_device *v; | 532 | struct vif_device *v; |
| 282 | struct net_device *dev; | 533 | struct net_device *dev; |
| 283 | struct in_device *in_dev; | 534 | struct in_device *in_dev; |
| 284 | 535 | ||
| 285 | if (vifi < 0 || vifi >= net->ipv4.maxvif) | 536 | if (vifi < 0 || vifi >= mrt->maxvif) |
| 286 | return -EADDRNOTAVAIL; | 537 | return -EADDRNOTAVAIL; |
| 287 | 538 | ||
| 288 | v = &net->ipv4.vif_table[vifi]; | 539 | v = &mrt->vif_table[vifi]; |
| 289 | 540 | ||
| 290 | write_lock_bh(&mrt_lock); | 541 | write_lock_bh(&mrt_lock); |
| 291 | dev = v->dev; | 542 | dev = v->dev; |
| @@ -297,17 +548,17 @@ static int vif_delete(struct net *net, int vifi, int notify, | |||
| 297 | } | 548 | } |
| 298 | 549 | ||
| 299 | #ifdef CONFIG_IP_PIMSM | 550 | #ifdef CONFIG_IP_PIMSM |
| 300 | if (vifi == net->ipv4.mroute_reg_vif_num) | 551 | if (vifi == mrt->mroute_reg_vif_num) |
| 301 | net->ipv4.mroute_reg_vif_num = -1; | 552 | mrt->mroute_reg_vif_num = -1; |
| 302 | #endif | 553 | #endif |
| 303 | 554 | ||
| 304 | if (vifi+1 == net->ipv4.maxvif) { | 555 | if (vifi+1 == mrt->maxvif) { |
| 305 | int tmp; | 556 | int tmp; |
| 306 | for (tmp=vifi-1; tmp>=0; tmp--) { | 557 | for (tmp=vifi-1; tmp>=0; tmp--) { |
| 307 | if (VIF_EXISTS(net, tmp)) | 558 | if (VIF_EXISTS(mrt, tmp)) |
| 308 | break; | 559 | break; |
| 309 | } | 560 | } |
| 310 | net->ipv4.maxvif = tmp+1; | 561 | mrt->maxvif = tmp+1; |
| 311 | } | 562 | } |
| 312 | 563 | ||
| 313 | write_unlock_bh(&mrt_lock); | 564 | write_unlock_bh(&mrt_lock); |
| @@ -328,7 +579,6 @@ static int vif_delete(struct net *net, int vifi, int notify, | |||
| 328 | 579 | ||
| 329 | static inline void ipmr_cache_free(struct mfc_cache *c) | 580 | static inline void ipmr_cache_free(struct mfc_cache *c) |
| 330 | { | 581 | { |
| 331 | release_net(mfc_net(c)); | ||
| 332 | kmem_cache_free(mrt_cachep, c); | 582 | kmem_cache_free(mrt_cachep, c); |
| 333 | } | 583 | } |
| 334 | 584 | ||
| @@ -336,13 +586,13 @@ static inline void ipmr_cache_free(struct mfc_cache *c) | |||
| 336 | and reporting error to netlink readers. | 586 | and reporting error to netlink readers. |
| 337 | */ | 587 | */ |
| 338 | 588 | ||
| 339 | static void ipmr_destroy_unres(struct mfc_cache *c) | 589 | static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) |
| 340 | { | 590 | { |
| 591 | struct net *net = read_pnet(&mrt->net); | ||
| 341 | struct sk_buff *skb; | 592 | struct sk_buff *skb; |
| 342 | struct nlmsgerr *e; | 593 | struct nlmsgerr *e; |
| 343 | struct net *net = mfc_net(c); | ||
| 344 | 594 | ||
| 345 | atomic_dec(&net->ipv4.cache_resolve_queue_len); | 595 | atomic_dec(&mrt->cache_resolve_queue_len); |
| 346 | 596 | ||
| 347 | while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { | 597 | while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { |
| 348 | if (ip_hdr(skb)->version == 0) { | 598 | if (ip_hdr(skb)->version == 0) { |
| @@ -363,42 +613,40 @@ static void ipmr_destroy_unres(struct mfc_cache *c) | |||
| 363 | } | 613 | } |
| 364 | 614 | ||
| 365 | 615 | ||
| 366 | /* Single timer process for all the unresolved queue. */ | 616 | /* Timer process for the unresolved queue. */ |
| 367 | 617 | ||
| 368 | static void ipmr_expire_process(unsigned long dummy) | 618 | static void ipmr_expire_process(unsigned long arg) |
| 369 | { | 619 | { |
| 620 | struct mr_table *mrt = (struct mr_table *)arg; | ||
| 370 | unsigned long now; | 621 | unsigned long now; |
| 371 | unsigned long expires; | 622 | unsigned long expires; |
| 372 | struct mfc_cache *c, **cp; | 623 | struct mfc_cache *c, *next; |
| 373 | 624 | ||
| 374 | if (!spin_trylock(&mfc_unres_lock)) { | 625 | if (!spin_trylock(&mfc_unres_lock)) { |
| 375 | mod_timer(&ipmr_expire_timer, jiffies+HZ/10); | 626 | mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); |
| 376 | return; | 627 | return; |
| 377 | } | 628 | } |
| 378 | 629 | ||
| 379 | if (mfc_unres_queue == NULL) | 630 | if (list_empty(&mrt->mfc_unres_queue)) |
| 380 | goto out; | 631 | goto out; |
| 381 | 632 | ||
| 382 | now = jiffies; | 633 | now = jiffies; |
| 383 | expires = 10*HZ; | 634 | expires = 10*HZ; |
| 384 | cp = &mfc_unres_queue; | ||
| 385 | 635 | ||
| 386 | while ((c=*cp) != NULL) { | 636 | list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { |
| 387 | if (time_after(c->mfc_un.unres.expires, now)) { | 637 | if (time_after(c->mfc_un.unres.expires, now)) { |
| 388 | unsigned long interval = c->mfc_un.unres.expires - now; | 638 | unsigned long interval = c->mfc_un.unres.expires - now; |
| 389 | if (interval < expires) | 639 | if (interval < expires) |
| 390 | expires = interval; | 640 | expires = interval; |
| 391 | cp = &c->next; | ||
| 392 | continue; | 641 | continue; |
| 393 | } | 642 | } |
| 394 | 643 | ||
| 395 | *cp = c->next; | 644 | list_del(&c->list); |
| 396 | 645 | ipmr_destroy_unres(mrt, c); | |
| 397 | ipmr_destroy_unres(c); | ||
| 398 | } | 646 | } |
| 399 | 647 | ||
| 400 | if (mfc_unres_queue != NULL) | 648 | if (!list_empty(&mrt->mfc_unres_queue)) |
| 401 | mod_timer(&ipmr_expire_timer, jiffies + expires); | 649 | mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); |
| 402 | 650 | ||
| 403 | out: | 651 | out: |
| 404 | spin_unlock(&mfc_unres_lock); | 652 | spin_unlock(&mfc_unres_lock); |
| @@ -406,17 +654,17 @@ out: | |||
| 406 | 654 | ||
| 407 | /* Fill oifs list. It is called under write locked mrt_lock. */ | 655 | /* Fill oifs list. It is called under write locked mrt_lock. */ |
| 408 | 656 | ||
| 409 | static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) | 657 | static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache, |
| 658 | unsigned char *ttls) | ||
| 410 | { | 659 | { |
| 411 | int vifi; | 660 | int vifi; |
| 412 | struct net *net = mfc_net(cache); | ||
| 413 | 661 | ||
| 414 | cache->mfc_un.res.minvif = MAXVIFS; | 662 | cache->mfc_un.res.minvif = MAXVIFS; |
| 415 | cache->mfc_un.res.maxvif = 0; | 663 | cache->mfc_un.res.maxvif = 0; |
| 416 | memset(cache->mfc_un.res.ttls, 255, MAXVIFS); | 664 | memset(cache->mfc_un.res.ttls, 255, MAXVIFS); |
| 417 | 665 | ||
| 418 | for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) { | 666 | for (vifi = 0; vifi < mrt->maxvif; vifi++) { |
| 419 | if (VIF_EXISTS(net, vifi) && | 667 | if (VIF_EXISTS(mrt, vifi) && |
| 420 | ttls[vifi] && ttls[vifi] < 255) { | 668 | ttls[vifi] && ttls[vifi] < 255) { |
| 421 | cache->mfc_un.res.ttls[vifi] = ttls[vifi]; | 669 | cache->mfc_un.res.ttls[vifi] = ttls[vifi]; |
| 422 | if (cache->mfc_un.res.minvif > vifi) | 670 | if (cache->mfc_un.res.minvif > vifi) |
| @@ -427,16 +675,17 @@ static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) | |||
| 427 | } | 675 | } |
| 428 | } | 676 | } |
| 429 | 677 | ||
| 430 | static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock) | 678 | static int vif_add(struct net *net, struct mr_table *mrt, |
| 679 | struct vifctl *vifc, int mrtsock) | ||
| 431 | { | 680 | { |
| 432 | int vifi = vifc->vifc_vifi; | 681 | int vifi = vifc->vifc_vifi; |
| 433 | struct vif_device *v = &net->ipv4.vif_table[vifi]; | 682 | struct vif_device *v = &mrt->vif_table[vifi]; |
| 434 | struct net_device *dev; | 683 | struct net_device *dev; |
| 435 | struct in_device *in_dev; | 684 | struct in_device *in_dev; |
| 436 | int err; | 685 | int err; |
| 437 | 686 | ||
| 438 | /* Is vif busy ? */ | 687 | /* Is vif busy ? */ |
| 439 | if (VIF_EXISTS(net, vifi)) | 688 | if (VIF_EXISTS(mrt, vifi)) |
| 440 | return -EADDRINUSE; | 689 | return -EADDRINUSE; |
| 441 | 690 | ||
| 442 | switch (vifc->vifc_flags) { | 691 | switch (vifc->vifc_flags) { |
| @@ -446,9 +695,9 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock) | |||
| 446 | * Special Purpose VIF in PIM | 695 | * Special Purpose VIF in PIM |
| 447 | * All the packets will be sent to the daemon | 696 | * All the packets will be sent to the daemon |
| 448 | */ | 697 | */ |
| 449 | if (net->ipv4.mroute_reg_vif_num >= 0) | 698 | if (mrt->mroute_reg_vif_num >= 0) |
| 450 | return -EADDRINUSE; | 699 | return -EADDRINUSE; |
| 451 | dev = ipmr_reg_vif(net); | 700 | dev = ipmr_reg_vif(net, mrt); |
| 452 | if (!dev) | 701 | if (!dev) |
| 453 | return -ENOBUFS; | 702 | return -ENOBUFS; |
| 454 | err = dev_set_allmulti(dev, 1); | 703 | err = dev_set_allmulti(dev, 1); |
| @@ -524,49 +773,47 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock) | |||
| 524 | v->dev = dev; | 773 | v->dev = dev; |
| 525 | #ifdef CONFIG_IP_PIMSM | 774 | #ifdef CONFIG_IP_PIMSM |
| 526 | if (v->flags&VIFF_REGISTER) | 775 | if (v->flags&VIFF_REGISTER) |
| 527 | net->ipv4.mroute_reg_vif_num = vifi; | 776 | mrt->mroute_reg_vif_num = vifi; |
| 528 | #endif | 777 | #endif |
| 529 | if (vifi+1 > net->ipv4.maxvif) | 778 | if (vifi+1 > mrt->maxvif) |
| 530 | net->ipv4.maxvif = vifi+1; | 779 | mrt->maxvif = vifi+1; |
| 531 | write_unlock_bh(&mrt_lock); | 780 | write_unlock_bh(&mrt_lock); |
| 532 | return 0; | 781 | return 0; |
| 533 | } | 782 | } |
| 534 | 783 | ||
| 535 | static struct mfc_cache *ipmr_cache_find(struct net *net, | 784 | static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, |
| 536 | __be32 origin, | 785 | __be32 origin, |
| 537 | __be32 mcastgrp) | 786 | __be32 mcastgrp) |
| 538 | { | 787 | { |
| 539 | int line = MFC_HASH(mcastgrp, origin); | 788 | int line = MFC_HASH(mcastgrp, origin); |
| 540 | struct mfc_cache *c; | 789 | struct mfc_cache *c; |
| 541 | 790 | ||
| 542 | for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) { | 791 | list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { |
| 543 | if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp) | 792 | if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp) |
| 544 | break; | 793 | return c; |
| 545 | } | 794 | } |
| 546 | return c; | 795 | return NULL; |
| 547 | } | 796 | } |
| 548 | 797 | ||
| 549 | /* | 798 | /* |
| 550 | * Allocate a multicast cache entry | 799 | * Allocate a multicast cache entry |
| 551 | */ | 800 | */ |
| 552 | static struct mfc_cache *ipmr_cache_alloc(struct net *net) | 801 | static struct mfc_cache *ipmr_cache_alloc(void) |
| 553 | { | 802 | { |
| 554 | struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); | 803 | struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); |
| 555 | if (c == NULL) | 804 | if (c == NULL) |
| 556 | return NULL; | 805 | return NULL; |
| 557 | c->mfc_un.res.minvif = MAXVIFS; | 806 | c->mfc_un.res.minvif = MAXVIFS; |
| 558 | mfc_net_set(c, net); | ||
| 559 | return c; | 807 | return c; |
| 560 | } | 808 | } |
| 561 | 809 | ||
| 562 | static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net) | 810 | static struct mfc_cache *ipmr_cache_alloc_unres(void) |
| 563 | { | 811 | { |
| 564 | struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); | 812 | struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); |
| 565 | if (c == NULL) | 813 | if (c == NULL) |
| 566 | return NULL; | 814 | return NULL; |
| 567 | skb_queue_head_init(&c->mfc_un.unres.unresolved); | 815 | skb_queue_head_init(&c->mfc_un.unres.unresolved); |
| 568 | c->mfc_un.unres.expires = jiffies + 10*HZ; | 816 | c->mfc_un.unres.expires = jiffies + 10*HZ; |
| 569 | mfc_net_set(c, net); | ||
| 570 | return c; | 817 | return c; |
| 571 | } | 818 | } |
| 572 | 819 | ||
| @@ -574,7 +821,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net) | |||
| 574 | * A cache entry has gone into a resolved state from queued | 821 | * A cache entry has gone into a resolved state from queued |
| 575 | */ | 822 | */ |
| 576 | 823 | ||
| 577 | static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) | 824 | static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, |
| 825 | struct mfc_cache *uc, struct mfc_cache *c) | ||
| 578 | { | 826 | { |
| 579 | struct sk_buff *skb; | 827 | struct sk_buff *skb; |
| 580 | struct nlmsgerr *e; | 828 | struct nlmsgerr *e; |
| @@ -587,7 +835,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) | |||
| 587 | if (ip_hdr(skb)->version == 0) { | 835 | if (ip_hdr(skb)->version == 0) { |
| 588 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); | 836 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); |
| 589 | 837 | ||
| 590 | if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { | 838 | if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { |
| 591 | nlh->nlmsg_len = (skb_tail_pointer(skb) - | 839 | nlh->nlmsg_len = (skb_tail_pointer(skb) - |
| 592 | (u8 *)nlh); | 840 | (u8 *)nlh); |
| 593 | } else { | 841 | } else { |
| @@ -599,9 +847,9 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) | |||
| 599 | memset(&e->msg, 0, sizeof(e->msg)); | 847 | memset(&e->msg, 0, sizeof(e->msg)); |
| 600 | } | 848 | } |
| 601 | 849 | ||
| 602 | rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid); | 850 | rtnl_unicast(skb, net, NETLINK_CB(skb).pid); |
| 603 | } else | 851 | } else |
| 604 | ip_mr_forward(skb, c, 0); | 852 | ip_mr_forward(net, mrt, skb, c, 0); |
| 605 | } | 853 | } |
| 606 | } | 854 | } |
| 607 | 855 | ||
| @@ -612,7 +860,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) | |||
| 612 | * Called under mrt_lock. | 860 | * Called under mrt_lock. |
| 613 | */ | 861 | */ |
| 614 | 862 | ||
| 615 | static int ipmr_cache_report(struct net *net, | 863 | static int ipmr_cache_report(struct mr_table *mrt, |
| 616 | struct sk_buff *pkt, vifi_t vifi, int assert) | 864 | struct sk_buff *pkt, vifi_t vifi, int assert) |
| 617 | { | 865 | { |
| 618 | struct sk_buff *skb; | 866 | struct sk_buff *skb; |
| @@ -645,7 +893,7 @@ static int ipmr_cache_report(struct net *net, | |||
| 645 | memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); | 893 | memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); |
| 646 | msg->im_msgtype = IGMPMSG_WHOLEPKT; | 894 | msg->im_msgtype = IGMPMSG_WHOLEPKT; |
| 647 | msg->im_mbz = 0; | 895 | msg->im_mbz = 0; |
| 648 | msg->im_vif = net->ipv4.mroute_reg_vif_num; | 896 | msg->im_vif = mrt->mroute_reg_vif_num; |
| 649 | ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; | 897 | ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; |
| 650 | ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + | 898 | ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + |
| 651 | sizeof(struct iphdr)); | 899 | sizeof(struct iphdr)); |
| @@ -677,7 +925,7 @@ static int ipmr_cache_report(struct net *net, | |||
| 677 | skb->transport_header = skb->network_header; | 925 | skb->transport_header = skb->network_header; |
| 678 | } | 926 | } |
| 679 | 927 | ||
| 680 | if (net->ipv4.mroute_sk == NULL) { | 928 | if (mrt->mroute_sk == NULL) { |
| 681 | kfree_skb(skb); | 929 | kfree_skb(skb); |
| 682 | return -EINVAL; | 930 | return -EINVAL; |
| 683 | } | 931 | } |
| @@ -685,7 +933,7 @@ static int ipmr_cache_report(struct net *net, | |||
| 685 | /* | 933 | /* |
| 686 | * Deliver to mrouted | 934 | * Deliver to mrouted |
| 687 | */ | 935 | */ |
| 688 | ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb); | 936 | ret = sock_queue_rcv_skb(mrt->mroute_sk, skb); |
| 689 | if (ret < 0) { | 937 | if (ret < 0) { |
| 690 | if (net_ratelimit()) | 938 | if (net_ratelimit()) |
| 691 | printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); | 939 | printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); |
| @@ -700,27 +948,29 @@ static int ipmr_cache_report(struct net *net, | |||
| 700 | */ | 948 | */ |
| 701 | 949 | ||
| 702 | static int | 950 | static int |
| 703 | ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb) | 951 | ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) |
| 704 | { | 952 | { |
| 953 | bool found = false; | ||
| 705 | int err; | 954 | int err; |
| 706 | struct mfc_cache *c; | 955 | struct mfc_cache *c; |
| 707 | const struct iphdr *iph = ip_hdr(skb); | 956 | const struct iphdr *iph = ip_hdr(skb); |
| 708 | 957 | ||
| 709 | spin_lock_bh(&mfc_unres_lock); | 958 | spin_lock_bh(&mfc_unres_lock); |
| 710 | for (c=mfc_unres_queue; c; c=c->next) { | 959 | list_for_each_entry(c, &mrt->mfc_unres_queue, list) { |
| 711 | if (net_eq(mfc_net(c), net) && | 960 | if (c->mfc_mcastgrp == iph->daddr && |
| 712 | c->mfc_mcastgrp == iph->daddr && | 961 | c->mfc_origin == iph->saddr) { |
| 713 | c->mfc_origin == iph->saddr) | 962 | found = true; |
| 714 | break; | 963 | break; |
| 964 | } | ||
| 715 | } | 965 | } |
| 716 | 966 | ||
| 717 | if (c == NULL) { | 967 | if (!found) { |
| 718 | /* | 968 | /* |
| 719 | * Create a new entry if allowable | 969 | * Create a new entry if allowable |
| 720 | */ | 970 | */ |
| 721 | 971 | ||
| 722 | if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 || | 972 | if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || |
| 723 | (c = ipmr_cache_alloc_unres(net)) == NULL) { | 973 | (c = ipmr_cache_alloc_unres()) == NULL) { |
| 724 | spin_unlock_bh(&mfc_unres_lock); | 974 | spin_unlock_bh(&mfc_unres_lock); |
| 725 | 975 | ||
| 726 | kfree_skb(skb); | 976 | kfree_skb(skb); |
| @@ -737,7 +987,7 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb) | |||
| 737 | /* | 987 | /* |
| 738 | * Reflect first query at mrouted. | 988 | * Reflect first query at mrouted. |
| 739 | */ | 989 | */ |
| 740 | err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE); | 990 | err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); |
| 741 | if (err < 0) { | 991 | if (err < 0) { |
| 742 | /* If the report failed throw the cache entry | 992 | /* If the report failed throw the cache entry |
| 743 | out - Brad Parker | 993 | out - Brad Parker |
| @@ -749,11 +999,11 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb) | |||
| 749 | return err; | 999 | return err; |
| 750 | } | 1000 | } |
| 751 | 1001 | ||
| 752 | atomic_inc(&net->ipv4.cache_resolve_queue_len); | 1002 | atomic_inc(&mrt->cache_resolve_queue_len); |
| 753 | c->next = mfc_unres_queue; | 1003 | list_add(&c->list, &mrt->mfc_unres_queue); |
| 754 | mfc_unres_queue = c; | ||
| 755 | 1004 | ||
| 756 | mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires); | 1005 | if (atomic_read(&mrt->cache_resolve_queue_len) == 1) |
| 1006 | mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires); | ||
| 757 | } | 1007 | } |
| 758 | 1008 | ||
| 759 | /* | 1009 | /* |
| @@ -775,19 +1025,18 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb) | |||
| 775 | * MFC cache manipulation by user space mroute daemon | 1025 | * MFC cache manipulation by user space mroute daemon |
| 776 | */ | 1026 | */ |
| 777 | 1027 | ||
| 778 | static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc) | 1028 | static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) |
| 779 | { | 1029 | { |
| 780 | int line; | 1030 | int line; |
| 781 | struct mfc_cache *c, **cp; | 1031 | struct mfc_cache *c, *next; |
| 782 | 1032 | ||
| 783 | line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); | 1033 | line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); |
| 784 | 1034 | ||
| 785 | for (cp = &net->ipv4.mfc_cache_array[line]; | 1035 | list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) { |
| 786 | (c = *cp) != NULL; cp = &c->next) { | ||
| 787 | if (c->mfc_origin == mfc->mfcc_origin.s_addr && | 1036 | if (c->mfc_origin == mfc->mfcc_origin.s_addr && |
| 788 | c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { | 1037 | c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { |
| 789 | write_lock_bh(&mrt_lock); | 1038 | write_lock_bh(&mrt_lock); |
| 790 | *cp = c->next; | 1039 | list_del(&c->list); |
| 791 | write_unlock_bh(&mrt_lock); | 1040 | write_unlock_bh(&mrt_lock); |
| 792 | 1041 | ||
| 793 | ipmr_cache_free(c); | 1042 | ipmr_cache_free(c); |
| @@ -797,24 +1046,30 @@ static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc) | |||
| 797 | return -ENOENT; | 1046 | return -ENOENT; |
| 798 | } | 1047 | } |
| 799 | 1048 | ||
| 800 | static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock) | 1049 | static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, |
| 1050 | struct mfcctl *mfc, int mrtsock) | ||
| 801 | { | 1051 | { |
| 1052 | bool found = false; | ||
| 802 | int line; | 1053 | int line; |
| 803 | struct mfc_cache *uc, *c, **cp; | 1054 | struct mfc_cache *uc, *c; |
| 1055 | |||
| 1056 | if (mfc->mfcc_parent >= MAXVIFS) | ||
| 1057 | return -ENFILE; | ||
| 804 | 1058 | ||
| 805 | line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); | 1059 | line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); |
| 806 | 1060 | ||
| 807 | for (cp = &net->ipv4.mfc_cache_array[line]; | 1061 | list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { |
| 808 | (c = *cp) != NULL; cp = &c->next) { | ||
| 809 | if (c->mfc_origin == mfc->mfcc_origin.s_addr && | 1062 | if (c->mfc_origin == mfc->mfcc_origin.s_addr && |
| 810 | c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) | 1063 | c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { |
| 1064 | found = true; | ||
| 811 | break; | 1065 | break; |
| 1066 | } | ||
| 812 | } | 1067 | } |
| 813 | 1068 | ||
| 814 | if (c != NULL) { | 1069 | if (found) { |
| 815 | write_lock_bh(&mrt_lock); | 1070 | write_lock_bh(&mrt_lock); |
| 816 | c->mfc_parent = mfc->mfcc_parent; | 1071 | c->mfc_parent = mfc->mfcc_parent; |
| 817 | ipmr_update_thresholds(c, mfc->mfcc_ttls); | 1072 | ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); |
| 818 | if (!mrtsock) | 1073 | if (!mrtsock) |
| 819 | c->mfc_flags |= MFC_STATIC; | 1074 | c->mfc_flags |= MFC_STATIC; |
| 820 | write_unlock_bh(&mrt_lock); | 1075 | write_unlock_bh(&mrt_lock); |
| @@ -824,43 +1079,42 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock) | |||
| 824 | if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) | 1079 | if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) |
| 825 | return -EINVAL; | 1080 | return -EINVAL; |
| 826 | 1081 | ||
| 827 | c = ipmr_cache_alloc(net); | 1082 | c = ipmr_cache_alloc(); |
| 828 | if (c == NULL) | 1083 | if (c == NULL) |
| 829 | return -ENOMEM; | 1084 | return -ENOMEM; |
| 830 | 1085 | ||
| 831 | c->mfc_origin = mfc->mfcc_origin.s_addr; | 1086 | c->mfc_origin = mfc->mfcc_origin.s_addr; |
| 832 | c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; | 1087 | c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; |
| 833 | c->mfc_parent = mfc->mfcc_parent; | 1088 | c->mfc_parent = mfc->mfcc_parent; |
| 834 | ipmr_update_thresholds(c, mfc->mfcc_ttls); | 1089 | ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); |
| 835 | if (!mrtsock) | 1090 | if (!mrtsock) |
| 836 | c->mfc_flags |= MFC_STATIC; | 1091 | c->mfc_flags |= MFC_STATIC; |
| 837 | 1092 | ||
| 838 | write_lock_bh(&mrt_lock); | 1093 | write_lock_bh(&mrt_lock); |
| 839 | c->next = net->ipv4.mfc_cache_array[line]; | 1094 | list_add(&c->list, &mrt->mfc_cache_array[line]); |
| 840 | net->ipv4.mfc_cache_array[line] = c; | ||
| 841 | write_unlock_bh(&mrt_lock); | 1095 | write_unlock_bh(&mrt_lock); |
| 842 | 1096 | ||
| 843 | /* | 1097 | /* |
| 844 | * Check to see if we resolved a queued list. If so we | 1098 | * Check to see if we resolved a queued list. If so we |
| 845 | * need to send on the frames and tidy up. | 1099 | * need to send on the frames and tidy up. |
| 846 | */ | 1100 | */ |
| 1101 | found = false; | ||
| 847 | spin_lock_bh(&mfc_unres_lock); | 1102 | spin_lock_bh(&mfc_unres_lock); |
| 848 | for (cp = &mfc_unres_queue; (uc=*cp) != NULL; | 1103 | list_for_each_entry(uc, &mrt->mfc_unres_queue, list) { |
| 849 | cp = &uc->next) { | 1104 | if (uc->mfc_origin == c->mfc_origin && |
| 850 | if (net_eq(mfc_net(uc), net) && | ||
| 851 | uc->mfc_origin == c->mfc_origin && | ||
| 852 | uc->mfc_mcastgrp == c->mfc_mcastgrp) { | 1105 | uc->mfc_mcastgrp == c->mfc_mcastgrp) { |
| 853 | *cp = uc->next; | 1106 | list_del(&uc->list); |
| 854 | atomic_dec(&net->ipv4.cache_resolve_queue_len); | 1107 | atomic_dec(&mrt->cache_resolve_queue_len); |
| 1108 | found = true; | ||
| 855 | break; | 1109 | break; |
| 856 | } | 1110 | } |
| 857 | } | 1111 | } |
| 858 | if (mfc_unres_queue == NULL) | 1112 | if (list_empty(&mrt->mfc_unres_queue)) |
| 859 | del_timer(&ipmr_expire_timer); | 1113 | del_timer(&mrt->ipmr_expire_timer); |
| 860 | spin_unlock_bh(&mfc_unres_lock); | 1114 | spin_unlock_bh(&mfc_unres_lock); |
| 861 | 1115 | ||
| 862 | if (uc) { | 1116 | if (found) { |
| 863 | ipmr_cache_resolve(uc, c); | 1117 | ipmr_cache_resolve(net, mrt, uc, c); |
| 864 | ipmr_cache_free(uc); | 1118 | ipmr_cache_free(uc); |
| 865 | } | 1119 | } |
| 866 | return 0; | 1120 | return 0; |
| @@ -870,53 +1124,41 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock) | |||
| 870 | * Close the multicast socket, and clear the vif tables etc | 1124 | * Close the multicast socket, and clear the vif tables etc |
| 871 | */ | 1125 | */ |
| 872 | 1126 | ||
| 873 | static void mroute_clean_tables(struct net *net) | 1127 | static void mroute_clean_tables(struct mr_table *mrt) |
| 874 | { | 1128 | { |
| 875 | int i; | 1129 | int i; |
| 876 | LIST_HEAD(list); | 1130 | LIST_HEAD(list); |
| 1131 | struct mfc_cache *c, *next; | ||
| 877 | 1132 | ||
| 878 | /* | 1133 | /* |
| 879 | * Shut down all active vif entries | 1134 | * Shut down all active vif entries |
| 880 | */ | 1135 | */ |
| 881 | for (i = 0; i < net->ipv4.maxvif; i++) { | 1136 | for (i = 0; i < mrt->maxvif; i++) { |
| 882 | if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC)) | 1137 | if (!(mrt->vif_table[i].flags&VIFF_STATIC)) |
| 883 | vif_delete(net, i, 0, &list); | 1138 | vif_delete(mrt, i, 0, &list); |
| 884 | } | 1139 | } |
| 885 | unregister_netdevice_many(&list); | 1140 | unregister_netdevice_many(&list); |
| 886 | 1141 | ||
| 887 | /* | 1142 | /* |
| 888 | * Wipe the cache | 1143 | * Wipe the cache |
| 889 | */ | 1144 | */ |
| 890 | for (i=0; i<MFC_LINES; i++) { | 1145 | for (i = 0; i < MFC_LINES; i++) { |
| 891 | struct mfc_cache *c, **cp; | 1146 | list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { |
| 892 | 1147 | if (c->mfc_flags&MFC_STATIC) | |
| 893 | cp = &net->ipv4.mfc_cache_array[i]; | ||
| 894 | while ((c = *cp) != NULL) { | ||
| 895 | if (c->mfc_flags&MFC_STATIC) { | ||
| 896 | cp = &c->next; | ||
| 897 | continue; | 1148 | continue; |
| 898 | } | ||
| 899 | write_lock_bh(&mrt_lock); | 1149 | write_lock_bh(&mrt_lock); |
| 900 | *cp = c->next; | 1150 | list_del(&c->list); |
| 901 | write_unlock_bh(&mrt_lock); | 1151 | write_unlock_bh(&mrt_lock); |
| 902 | 1152 | ||
| 903 | ipmr_cache_free(c); | 1153 | ipmr_cache_free(c); |
| 904 | } | 1154 | } |
| 905 | } | 1155 | } |
| 906 | 1156 | ||
| 907 | if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) { | 1157 | if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { |
| 908 | struct mfc_cache *c, **cp; | ||
| 909 | |||
| 910 | spin_lock_bh(&mfc_unres_lock); | 1158 | spin_lock_bh(&mfc_unres_lock); |
| 911 | cp = &mfc_unres_queue; | 1159 | list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { |
| 912 | while ((c = *cp) != NULL) { | 1160 | list_del(&c->list); |
| 913 | if (!net_eq(mfc_net(c), net)) { | 1161 | ipmr_destroy_unres(mrt, c); |
| 914 | cp = &c->next; | ||
| 915 | continue; | ||
| 916 | } | ||
| 917 | *cp = c->next; | ||
| 918 | |||
| 919 | ipmr_destroy_unres(c); | ||
| 920 | } | 1162 | } |
| 921 | spin_unlock_bh(&mfc_unres_lock); | 1163 | spin_unlock_bh(&mfc_unres_lock); |
| 922 | } | 1164 | } |
| @@ -925,16 +1167,19 @@ static void mroute_clean_tables(struct net *net) | |||
| 925 | static void mrtsock_destruct(struct sock *sk) | 1167 | static void mrtsock_destruct(struct sock *sk) |
| 926 | { | 1168 | { |
| 927 | struct net *net = sock_net(sk); | 1169 | struct net *net = sock_net(sk); |
| 1170 | struct mr_table *mrt; | ||
| 928 | 1171 | ||
| 929 | rtnl_lock(); | 1172 | rtnl_lock(); |
| 930 | if (sk == net->ipv4.mroute_sk) { | 1173 | ipmr_for_each_table(mrt, net) { |
| 931 | IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; | 1174 | if (sk == mrt->mroute_sk) { |
| 1175 | IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; | ||
| 932 | 1176 | ||
| 933 | write_lock_bh(&mrt_lock); | 1177 | write_lock_bh(&mrt_lock); |
| 934 | net->ipv4.mroute_sk = NULL; | 1178 | mrt->mroute_sk = NULL; |
| 935 | write_unlock_bh(&mrt_lock); | 1179 | write_unlock_bh(&mrt_lock); |
| 936 | 1180 | ||
| 937 | mroute_clean_tables(net); | 1181 | mroute_clean_tables(mrt); |
| 1182 | } | ||
| 938 | } | 1183 | } |
| 939 | rtnl_unlock(); | 1184 | rtnl_unlock(); |
| 940 | } | 1185 | } |
| @@ -952,9 +1197,14 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi | |||
| 952 | struct vifctl vif; | 1197 | struct vifctl vif; |
| 953 | struct mfcctl mfc; | 1198 | struct mfcctl mfc; |
| 954 | struct net *net = sock_net(sk); | 1199 | struct net *net = sock_net(sk); |
| 1200 | struct mr_table *mrt; | ||
| 1201 | |||
| 1202 | mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); | ||
| 1203 | if (mrt == NULL) | ||
| 1204 | return -ENOENT; | ||
| 955 | 1205 | ||
| 956 | if (optname != MRT_INIT) { | 1206 | if (optname != MRT_INIT) { |
| 957 | if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN)) | 1207 | if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN)) |
| 958 | return -EACCES; | 1208 | return -EACCES; |
| 959 | } | 1209 | } |
| 960 | 1210 | ||
| @@ -967,7 +1217,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi | |||
| 967 | return -ENOPROTOOPT; | 1217 | return -ENOPROTOOPT; |
| 968 | 1218 | ||
| 969 | rtnl_lock(); | 1219 | rtnl_lock(); |
| 970 | if (net->ipv4.mroute_sk) { | 1220 | if (mrt->mroute_sk) { |
| 971 | rtnl_unlock(); | 1221 | rtnl_unlock(); |
| 972 | return -EADDRINUSE; | 1222 | return -EADDRINUSE; |
| 973 | } | 1223 | } |
| @@ -975,7 +1225,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi | |||
| 975 | ret = ip_ra_control(sk, 1, mrtsock_destruct); | 1225 | ret = ip_ra_control(sk, 1, mrtsock_destruct); |
| 976 | if (ret == 0) { | 1226 | if (ret == 0) { |
| 977 | write_lock_bh(&mrt_lock); | 1227 | write_lock_bh(&mrt_lock); |
| 978 | net->ipv4.mroute_sk = sk; | 1228 | mrt->mroute_sk = sk; |
| 979 | write_unlock_bh(&mrt_lock); | 1229 | write_unlock_bh(&mrt_lock); |
| 980 | 1230 | ||
| 981 | IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; | 1231 | IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; |
| @@ -983,7 +1233,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi | |||
| 983 | rtnl_unlock(); | 1233 | rtnl_unlock(); |
| 984 | return ret; | 1234 | return ret; |
| 985 | case MRT_DONE: | 1235 | case MRT_DONE: |
| 986 | if (sk != net->ipv4.mroute_sk) | 1236 | if (sk != mrt->mroute_sk) |
| 987 | return -EACCES; | 1237 | return -EACCES; |
| 988 | return ip_ra_control(sk, 0, NULL); | 1238 | return ip_ra_control(sk, 0, NULL); |
| 989 | case MRT_ADD_VIF: | 1239 | case MRT_ADD_VIF: |
| @@ -996,9 +1246,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi | |||
| 996 | return -ENFILE; | 1246 | return -ENFILE; |
| 997 | rtnl_lock(); | 1247 | rtnl_lock(); |
| 998 | if (optname == MRT_ADD_VIF) { | 1248 | if (optname == MRT_ADD_VIF) { |
| 999 | ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk); | 1249 | ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk); |
| 1000 | } else { | 1250 | } else { |
| 1001 | ret = vif_delete(net, vif.vifc_vifi, 0, NULL); | 1251 | ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); |
| 1002 | } | 1252 | } |
| 1003 | rtnl_unlock(); | 1253 | rtnl_unlock(); |
| 1004 | return ret; | 1254 | return ret; |
| @@ -1015,9 +1265,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi | |||
| 1015 | return -EFAULT; | 1265 | return -EFAULT; |
| 1016 | rtnl_lock(); | 1266 | rtnl_lock(); |
| 1017 | if (optname == MRT_DEL_MFC) | 1267 | if (optname == MRT_DEL_MFC) |
| 1018 | ret = ipmr_mfc_delete(net, &mfc); | 1268 | ret = ipmr_mfc_delete(mrt, &mfc); |
| 1019 | else | 1269 | else |
| 1020 | ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk); | 1270 | ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk); |
| 1021 | rtnl_unlock(); | 1271 | rtnl_unlock(); |
| 1022 | return ret; | 1272 | return ret; |
| 1023 | /* | 1273 | /* |
| @@ -1028,7 +1278,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi | |||
| 1028 | int v; | 1278 | int v; |
| 1029 | if (get_user(v,(int __user *)optval)) | 1279 | if (get_user(v,(int __user *)optval)) |
| 1030 | return -EFAULT; | 1280 | return -EFAULT; |
| 1031 | net->ipv4.mroute_do_assert = (v) ? 1 : 0; | 1281 | mrt->mroute_do_assert = (v) ? 1 : 0; |
| 1032 | return 0; | 1282 | return 0; |
| 1033 | } | 1283 | } |
| 1034 | #ifdef CONFIG_IP_PIMSM | 1284 | #ifdef CONFIG_IP_PIMSM |
| @@ -1042,14 +1292,35 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi | |||
| 1042 | 1292 | ||
| 1043 | rtnl_lock(); | 1293 | rtnl_lock(); |
| 1044 | ret = 0; | 1294 | ret = 0; |
| 1045 | if (v != net->ipv4.mroute_do_pim) { | 1295 | if (v != mrt->mroute_do_pim) { |
| 1046 | net->ipv4.mroute_do_pim = v; | 1296 | mrt->mroute_do_pim = v; |
| 1047 | net->ipv4.mroute_do_assert = v; | 1297 | mrt->mroute_do_assert = v; |
| 1048 | } | 1298 | } |
| 1049 | rtnl_unlock(); | 1299 | rtnl_unlock(); |
| 1050 | return ret; | 1300 | return ret; |
| 1051 | } | 1301 | } |
| 1052 | #endif | 1302 | #endif |
| 1303 | #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES | ||
| 1304 | case MRT_TABLE: | ||
| 1305 | { | ||
| 1306 | u32 v; | ||
| 1307 | |||
| 1308 | if (optlen != sizeof(u32)) | ||
| 1309 | return -EINVAL; | ||
| 1310 | if (get_user(v, (u32 __user *)optval)) | ||
| 1311 | return -EFAULT; | ||
| 1312 | if (sk == mrt->mroute_sk) | ||
| 1313 | return -EBUSY; | ||
| 1314 | |||
| 1315 | rtnl_lock(); | ||
| 1316 | ret = 0; | ||
| 1317 | if (!ipmr_new_table(net, v)) | ||
| 1318 | ret = -ENOMEM; | ||
| 1319 | raw_sk(sk)->ipmr_table = v; | ||
| 1320 | rtnl_unlock(); | ||
| 1321 | return ret; | ||
| 1322 | } | ||
| 1323 | #endif | ||
| 1053 | /* | 1324 | /* |
| 1054 | * Spurious command, or MRT_VERSION which you cannot | 1325 | * Spurious command, or MRT_VERSION which you cannot |
| 1055 | * set. | 1326 | * set. |
| @@ -1068,6 +1339,11 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int | |||
| 1068 | int olr; | 1339 | int olr; |
| 1069 | int val; | 1340 | int val; |
| 1070 | struct net *net = sock_net(sk); | 1341 | struct net *net = sock_net(sk); |
| 1342 | struct mr_table *mrt; | ||
| 1343 | |||
| 1344 | mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); | ||
| 1345 | if (mrt == NULL) | ||
| 1346 | return -ENOENT; | ||
| 1071 | 1347 | ||
| 1072 | if (optname != MRT_VERSION && | 1348 | if (optname != MRT_VERSION && |
| 1073 | #ifdef CONFIG_IP_PIMSM | 1349 | #ifdef CONFIG_IP_PIMSM |
| @@ -1089,10 +1365,10 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int | |||
| 1089 | val = 0x0305; | 1365 | val = 0x0305; |
| 1090 | #ifdef CONFIG_IP_PIMSM | 1366 | #ifdef CONFIG_IP_PIMSM |
| 1091 | else if (optname == MRT_PIM) | 1367 | else if (optname == MRT_PIM) |
| 1092 | val = net->ipv4.mroute_do_pim; | 1368 | val = mrt->mroute_do_pim; |
| 1093 | #endif | 1369 | #endif |
| 1094 | else | 1370 | else |
| 1095 | val = net->ipv4.mroute_do_assert; | 1371 | val = mrt->mroute_do_assert; |
| 1096 | if (copy_to_user(optval, &val, olr)) | 1372 | if (copy_to_user(optval, &val, olr)) |
| 1097 | return -EFAULT; | 1373 | return -EFAULT; |
| 1098 | return 0; | 1374 | return 0; |
| @@ -1109,16 +1385,21 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) | |||
| 1109 | struct vif_device *vif; | 1385 | struct vif_device *vif; |
| 1110 | struct mfc_cache *c; | 1386 | struct mfc_cache *c; |
| 1111 | struct net *net = sock_net(sk); | 1387 | struct net *net = sock_net(sk); |
| 1388 | struct mr_table *mrt; | ||
| 1389 | |||
| 1390 | mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); | ||
| 1391 | if (mrt == NULL) | ||
| 1392 | return -ENOENT; | ||
| 1112 | 1393 | ||
| 1113 | switch (cmd) { | 1394 | switch (cmd) { |
| 1114 | case SIOCGETVIFCNT: | 1395 | case SIOCGETVIFCNT: |
| 1115 | if (copy_from_user(&vr, arg, sizeof(vr))) | 1396 | if (copy_from_user(&vr, arg, sizeof(vr))) |
| 1116 | return -EFAULT; | 1397 | return -EFAULT; |
| 1117 | if (vr.vifi >= net->ipv4.maxvif) | 1398 | if (vr.vifi >= mrt->maxvif) |
| 1118 | return -EINVAL; | 1399 | return -EINVAL; |
| 1119 | read_lock(&mrt_lock); | 1400 | read_lock(&mrt_lock); |
| 1120 | vif = &net->ipv4.vif_table[vr.vifi]; | 1401 | vif = &mrt->vif_table[vr.vifi]; |
| 1121 | if (VIF_EXISTS(net, vr.vifi)) { | 1402 | if (VIF_EXISTS(mrt, vr.vifi)) { |
| 1122 | vr.icount = vif->pkt_in; | 1403 | vr.icount = vif->pkt_in; |
| 1123 | vr.ocount = vif->pkt_out; | 1404 | vr.ocount = vif->pkt_out; |
| 1124 | vr.ibytes = vif->bytes_in; | 1405 | vr.ibytes = vif->bytes_in; |
| @@ -1136,7 +1417,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) | |||
| 1136 | return -EFAULT; | 1417 | return -EFAULT; |
| 1137 | 1418 | ||
| 1138 | read_lock(&mrt_lock); | 1419 | read_lock(&mrt_lock); |
| 1139 | c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr); | 1420 | c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); |
| 1140 | if (c) { | 1421 | if (c) { |
| 1141 | sr.pktcnt = c->mfc_un.res.pkt; | 1422 | sr.pktcnt = c->mfc_un.res.pkt; |
| 1142 | sr.bytecnt = c->mfc_un.res.bytes; | 1423 | sr.bytecnt = c->mfc_un.res.bytes; |
| @@ -1159,19 +1440,20 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v | |||
| 1159 | { | 1440 | { |
| 1160 | struct net_device *dev = ptr; | 1441 | struct net_device *dev = ptr; |
| 1161 | struct net *net = dev_net(dev); | 1442 | struct net *net = dev_net(dev); |
| 1443 | struct mr_table *mrt; | ||
| 1162 | struct vif_device *v; | 1444 | struct vif_device *v; |
| 1163 | int ct; | 1445 | int ct; |
| 1164 | LIST_HEAD(list); | 1446 | LIST_HEAD(list); |
| 1165 | 1447 | ||
| 1166 | if (!net_eq(dev_net(dev), net)) | ||
| 1167 | return NOTIFY_DONE; | ||
| 1168 | |||
| 1169 | if (event != NETDEV_UNREGISTER) | 1448 | if (event != NETDEV_UNREGISTER) |
| 1170 | return NOTIFY_DONE; | 1449 | return NOTIFY_DONE; |
| 1171 | v = &net->ipv4.vif_table[0]; | 1450 | |
| 1172 | for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) { | 1451 | ipmr_for_each_table(mrt, net) { |
| 1173 | if (v->dev == dev) | 1452 | v = &mrt->vif_table[0]; |
| 1174 | vif_delete(net, ct, 1, &list); | 1453 | for (ct = 0; ct < mrt->maxvif; ct++, v++) { |
| 1454 | if (v->dev == dev) | ||
| 1455 | vif_delete(mrt, ct, 1, &list); | ||
| 1456 | } | ||
| 1175 | } | 1457 | } |
| 1176 | unregister_netdevice_many(&list); | 1458 | unregister_netdevice_many(&list); |
| 1177 | return NOTIFY_DONE; | 1459 | return NOTIFY_DONE; |
| @@ -1230,11 +1512,11 @@ static inline int ipmr_forward_finish(struct sk_buff *skb) | |||
| 1230 | * Processing handlers for ipmr_forward | 1512 | * Processing handlers for ipmr_forward |
| 1231 | */ | 1513 | */ |
| 1232 | 1514 | ||
| 1233 | static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) | 1515 | static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, |
| 1516 | struct sk_buff *skb, struct mfc_cache *c, int vifi) | ||
| 1234 | { | 1517 | { |
| 1235 | struct net *net = mfc_net(c); | ||
| 1236 | const struct iphdr *iph = ip_hdr(skb); | 1518 | const struct iphdr *iph = ip_hdr(skb); |
| 1237 | struct vif_device *vif = &net->ipv4.vif_table[vifi]; | 1519 | struct vif_device *vif = &mrt->vif_table[vifi]; |
| 1238 | struct net_device *dev; | 1520 | struct net_device *dev; |
| 1239 | struct rtable *rt; | 1521 | struct rtable *rt; |
| 1240 | int encap = 0; | 1522 | int encap = 0; |
| @@ -1248,7 +1530,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) | |||
| 1248 | vif->bytes_out += skb->len; | 1530 | vif->bytes_out += skb->len; |
| 1249 | vif->dev->stats.tx_bytes += skb->len; | 1531 | vif->dev->stats.tx_bytes += skb->len; |
| 1250 | vif->dev->stats.tx_packets++; | 1532 | vif->dev->stats.tx_packets++; |
| 1251 | ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT); | 1533 | ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); |
| 1252 | goto out_free; | 1534 | goto out_free; |
| 1253 | } | 1535 | } |
| 1254 | #endif | 1536 | #endif |
| @@ -1273,9 +1555,9 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) | |||
| 1273 | goto out_free; | 1555 | goto out_free; |
| 1274 | } | 1556 | } |
| 1275 | 1557 | ||
| 1276 | dev = rt->u.dst.dev; | 1558 | dev = rt->dst.dev; |
| 1277 | 1559 | ||
| 1278 | if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { | 1560 | if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { |
| 1279 | /* Do not fragment multicasts. Alas, IPv4 does not | 1561 | /* Do not fragment multicasts. Alas, IPv4 does not |
| 1280 | allow to send ICMP, so that packets will disappear | 1562 | allow to send ICMP, so that packets will disappear |
| 1281 | to blackhole. | 1563 | to blackhole. |
| @@ -1286,7 +1568,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) | |||
| 1286 | goto out_free; | 1568 | goto out_free; |
| 1287 | } | 1569 | } |
| 1288 | 1570 | ||
| 1289 | encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len; | 1571 | encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len; |
| 1290 | 1572 | ||
| 1291 | if (skb_cow(skb, encap)) { | 1573 | if (skb_cow(skb, encap)) { |
| 1292 | ip_rt_put(rt); | 1574 | ip_rt_put(rt); |
| @@ -1297,7 +1579,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) | |||
| 1297 | vif->bytes_out += skb->len; | 1579 | vif->bytes_out += skb->len; |
| 1298 | 1580 | ||
| 1299 | skb_dst_drop(skb); | 1581 | skb_dst_drop(skb); |
| 1300 | skb_dst_set(skb, &rt->u.dst); | 1582 | skb_dst_set(skb, &rt->dst); |
| 1301 | ip_decrease_ttl(ip_hdr(skb)); | 1583 | ip_decrease_ttl(ip_hdr(skb)); |
| 1302 | 1584 | ||
| 1303 | /* FIXME: forward and output firewalls used to be called here. | 1585 | /* FIXME: forward and output firewalls used to be called here. |
| @@ -1322,21 +1604,20 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) | |||
| 1322 | * not mrouter) cannot join to more than one interface - it will | 1604 | * not mrouter) cannot join to more than one interface - it will |
| 1323 | * result in receiving multiple packets. | 1605 | * result in receiving multiple packets. |
| 1324 | */ | 1606 | */ |
| 1325 | NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev, | 1607 | NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev, |
| 1326 | ipmr_forward_finish); | 1608 | ipmr_forward_finish); |
| 1327 | return; | 1609 | return; |
| 1328 | 1610 | ||
| 1329 | out_free: | 1611 | out_free: |
| 1330 | kfree_skb(skb); | 1612 | kfree_skb(skb); |
| 1331 | return; | ||
| 1332 | } | 1613 | } |
| 1333 | 1614 | ||
| 1334 | static int ipmr_find_vif(struct net_device *dev) | 1615 | static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) |
| 1335 | { | 1616 | { |
| 1336 | struct net *net = dev_net(dev); | ||
| 1337 | int ct; | 1617 | int ct; |
| 1338 | for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) { | 1618 | |
| 1339 | if (net->ipv4.vif_table[ct].dev == dev) | 1619 | for (ct = mrt->maxvif-1; ct >= 0; ct--) { |
| 1620 | if (mrt->vif_table[ct].dev == dev) | ||
| 1340 | break; | 1621 | break; |
| 1341 | } | 1622 | } |
| 1342 | return ct; | 1623 | return ct; |
| @@ -1344,11 +1625,12 @@ static int ipmr_find_vif(struct net_device *dev) | |||
| 1344 | 1625 | ||
| 1345 | /* "local" means that we should preserve one skb (for local delivery) */ | 1626 | /* "local" means that we should preserve one skb (for local delivery) */ |
| 1346 | 1627 | ||
| 1347 | static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local) | 1628 | static int ip_mr_forward(struct net *net, struct mr_table *mrt, |
| 1629 | struct sk_buff *skb, struct mfc_cache *cache, | ||
| 1630 | int local) | ||
| 1348 | { | 1631 | { |
| 1349 | int psend = -1; | 1632 | int psend = -1; |
| 1350 | int vif, ct; | 1633 | int vif, ct; |
| 1351 | struct net *net = mfc_net(cache); | ||
| 1352 | 1634 | ||
| 1353 | vif = cache->mfc_parent; | 1635 | vif = cache->mfc_parent; |
| 1354 | cache->mfc_un.res.pkt++; | 1636 | cache->mfc_un.res.pkt++; |
| @@ -1357,7 +1639,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local | |||
| 1357 | /* | 1639 | /* |
| 1358 | * Wrong interface: drop packet and (maybe) send PIM assert. | 1640 | * Wrong interface: drop packet and (maybe) send PIM assert. |
| 1359 | */ | 1641 | */ |
| 1360 | if (net->ipv4.vif_table[vif].dev != skb->dev) { | 1642 | if (mrt->vif_table[vif].dev != skb->dev) { |
| 1361 | int true_vifi; | 1643 | int true_vifi; |
| 1362 | 1644 | ||
| 1363 | if (skb_rtable(skb)->fl.iif == 0) { | 1645 | if (skb_rtable(skb)->fl.iif == 0) { |
| @@ -1376,26 +1658,26 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local | |||
| 1376 | } | 1658 | } |
| 1377 | 1659 | ||
| 1378 | cache->mfc_un.res.wrong_if++; | 1660 | cache->mfc_un.res.wrong_if++; |
| 1379 | true_vifi = ipmr_find_vif(skb->dev); | 1661 | true_vifi = ipmr_find_vif(mrt, skb->dev); |
| 1380 | 1662 | ||
| 1381 | if (true_vifi >= 0 && net->ipv4.mroute_do_assert && | 1663 | if (true_vifi >= 0 && mrt->mroute_do_assert && |
| 1382 | /* pimsm uses asserts, when switching from RPT to SPT, | 1664 | /* pimsm uses asserts, when switching from RPT to SPT, |
| 1383 | so that we cannot check that packet arrived on an oif. | 1665 | so that we cannot check that packet arrived on an oif. |
| 1384 | It is bad, but otherwise we would need to move pretty | 1666 | It is bad, but otherwise we would need to move pretty |
| 1385 | large chunk of pimd to kernel. Ough... --ANK | 1667 | large chunk of pimd to kernel. Ough... --ANK |
| 1386 | */ | 1668 | */ |
| 1387 | (net->ipv4.mroute_do_pim || | 1669 | (mrt->mroute_do_pim || |
| 1388 | cache->mfc_un.res.ttls[true_vifi] < 255) && | 1670 | cache->mfc_un.res.ttls[true_vifi] < 255) && |
| 1389 | time_after(jiffies, | 1671 | time_after(jiffies, |
| 1390 | cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { | 1672 | cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { |
| 1391 | cache->mfc_un.res.last_assert = jiffies; | 1673 | cache->mfc_un.res.last_assert = jiffies; |
| 1392 | ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF); | 1674 | ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); |
| 1393 | } | 1675 | } |
| 1394 | goto dont_forward; | 1676 | goto dont_forward; |
| 1395 | } | 1677 | } |
| 1396 | 1678 | ||
| 1397 | net->ipv4.vif_table[vif].pkt_in++; | 1679 | mrt->vif_table[vif].pkt_in++; |
| 1398 | net->ipv4.vif_table[vif].bytes_in += skb->len; | 1680 | mrt->vif_table[vif].bytes_in += skb->len; |
| 1399 | 1681 | ||
| 1400 | /* | 1682 | /* |
| 1401 | * Forward the frame | 1683 | * Forward the frame |
| @@ -1405,7 +1687,8 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local | |||
| 1405 | if (psend != -1) { | 1687 | if (psend != -1) { |
| 1406 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); | 1688 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); |
| 1407 | if (skb2) | 1689 | if (skb2) |
| 1408 | ipmr_queue_xmit(skb2, cache, psend); | 1690 | ipmr_queue_xmit(net, mrt, skb2, cache, |
| 1691 | psend); | ||
| 1409 | } | 1692 | } |
| 1410 | psend = ct; | 1693 | psend = ct; |
| 1411 | } | 1694 | } |
| @@ -1414,9 +1697,9 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local | |||
| 1414 | if (local) { | 1697 | if (local) { |
| 1415 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); | 1698 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); |
| 1416 | if (skb2) | 1699 | if (skb2) |
| 1417 | ipmr_queue_xmit(skb2, cache, psend); | 1700 | ipmr_queue_xmit(net, mrt, skb2, cache, psend); |
| 1418 | } else { | 1701 | } else { |
| 1419 | ipmr_queue_xmit(skb, cache, psend); | 1702 | ipmr_queue_xmit(net, mrt, skb, cache, psend); |
| 1420 | return 0; | 1703 | return 0; |
| 1421 | } | 1704 | } |
| 1422 | } | 1705 | } |
| @@ -1437,6 +1720,8 @@ int ip_mr_input(struct sk_buff *skb) | |||
| 1437 | struct mfc_cache *cache; | 1720 | struct mfc_cache *cache; |
| 1438 | struct net *net = dev_net(skb->dev); | 1721 | struct net *net = dev_net(skb->dev); |
| 1439 | int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; | 1722 | int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; |
| 1723 | struct mr_table *mrt; | ||
| 1724 | int err; | ||
| 1440 | 1725 | ||
| 1441 | /* Packet is looped back after forward, it should not be | 1726 | /* Packet is looped back after forward, it should not be |
| 1442 | forwarded second time, but still can be delivered locally. | 1727 | forwarded second time, but still can be delivered locally. |
| @@ -1444,6 +1729,12 @@ int ip_mr_input(struct sk_buff *skb) | |||
| 1444 | if (IPCB(skb)->flags&IPSKB_FORWARDED) | 1729 | if (IPCB(skb)->flags&IPSKB_FORWARDED) |
| 1445 | goto dont_forward; | 1730 | goto dont_forward; |
| 1446 | 1731 | ||
| 1732 | err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt); | ||
| 1733 | if (err < 0) { | ||
| 1734 | kfree_skb(skb); | ||
| 1735 | return err; | ||
| 1736 | } | ||
| 1737 | |||
| 1447 | if (!local) { | 1738 | if (!local) { |
| 1448 | if (IPCB(skb)->opt.router_alert) { | 1739 | if (IPCB(skb)->opt.router_alert) { |
| 1449 | if (ip_call_ra_chain(skb)) | 1740 | if (ip_call_ra_chain(skb)) |
| @@ -1456,9 +1747,9 @@ int ip_mr_input(struct sk_buff *skb) | |||
| 1456 | that we can forward NO IGMP messages. | 1747 | that we can forward NO IGMP messages. |
| 1457 | */ | 1748 | */ |
| 1458 | read_lock(&mrt_lock); | 1749 | read_lock(&mrt_lock); |
| 1459 | if (net->ipv4.mroute_sk) { | 1750 | if (mrt->mroute_sk) { |
| 1460 | nf_reset(skb); | 1751 | nf_reset(skb); |
| 1461 | raw_rcv(net->ipv4.mroute_sk, skb); | 1752 | raw_rcv(mrt->mroute_sk, skb); |
| 1462 | read_unlock(&mrt_lock); | 1753 | read_unlock(&mrt_lock); |
| 1463 | return 0; | 1754 | return 0; |
| 1464 | } | 1755 | } |
| @@ -1467,7 +1758,7 @@ int ip_mr_input(struct sk_buff *skb) | |||
| 1467 | } | 1758 | } |
| 1468 | 1759 | ||
| 1469 | read_lock(&mrt_lock); | 1760 | read_lock(&mrt_lock); |
| 1470 | cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); | 1761 | cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); |
| 1471 | 1762 | ||
| 1472 | /* | 1763 | /* |
| 1473 | * No usable cache entry | 1764 | * No usable cache entry |
| @@ -1485,19 +1776,19 @@ int ip_mr_input(struct sk_buff *skb) | |||
| 1485 | skb = skb2; | 1776 | skb = skb2; |
| 1486 | } | 1777 | } |
| 1487 | 1778 | ||
| 1488 | vif = ipmr_find_vif(skb->dev); | 1779 | vif = ipmr_find_vif(mrt, skb->dev); |
| 1489 | if (vif >= 0) { | 1780 | if (vif >= 0) { |
| 1490 | int err = ipmr_cache_unresolved(net, vif, skb); | 1781 | int err2 = ipmr_cache_unresolved(mrt, vif, skb); |
| 1491 | read_unlock(&mrt_lock); | 1782 | read_unlock(&mrt_lock); |
| 1492 | 1783 | ||
| 1493 | return err; | 1784 | return err2; |
| 1494 | } | 1785 | } |
| 1495 | read_unlock(&mrt_lock); | 1786 | read_unlock(&mrt_lock); |
| 1496 | kfree_skb(skb); | 1787 | kfree_skb(skb); |
| 1497 | return -ENODEV; | 1788 | return -ENODEV; |
| 1498 | } | 1789 | } |
| 1499 | 1790 | ||
| 1500 | ip_mr_forward(skb, cache, local); | 1791 | ip_mr_forward(net, mrt, skb, cache, local); |
| 1501 | 1792 | ||
| 1502 | read_unlock(&mrt_lock); | 1793 | read_unlock(&mrt_lock); |
| 1503 | 1794 | ||
| @@ -1514,11 +1805,11 @@ dont_forward: | |||
| 1514 | } | 1805 | } |
| 1515 | 1806 | ||
| 1516 | #ifdef CONFIG_IP_PIMSM | 1807 | #ifdef CONFIG_IP_PIMSM |
| 1517 | static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen) | 1808 | static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, |
| 1809 | unsigned int pimlen) | ||
| 1518 | { | 1810 | { |
| 1519 | struct net_device *reg_dev = NULL; | 1811 | struct net_device *reg_dev = NULL; |
| 1520 | struct iphdr *encap; | 1812 | struct iphdr *encap; |
| 1521 | struct net *net = dev_net(skb->dev); | ||
| 1522 | 1813 | ||
| 1523 | encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); | 1814 | encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); |
| 1524 | /* | 1815 | /* |
| @@ -1533,8 +1824,8 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen) | |||
| 1533 | return 1; | 1824 | return 1; |
| 1534 | 1825 | ||
| 1535 | read_lock(&mrt_lock); | 1826 | read_lock(&mrt_lock); |
| 1536 | if (net->ipv4.mroute_reg_vif_num >= 0) | 1827 | if (mrt->mroute_reg_vif_num >= 0) |
| 1537 | reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev; | 1828 | reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; |
| 1538 | if (reg_dev) | 1829 | if (reg_dev) |
| 1539 | dev_hold(reg_dev); | 1830 | dev_hold(reg_dev); |
| 1540 | read_unlock(&mrt_lock); | 1831 | read_unlock(&mrt_lock); |
| @@ -1545,14 +1836,12 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen) | |||
| 1545 | skb->mac_header = skb->network_header; | 1836 | skb->mac_header = skb->network_header; |
| 1546 | skb_pull(skb, (u8*)encap - skb->data); | 1837 | skb_pull(skb, (u8*)encap - skb->data); |
| 1547 | skb_reset_network_header(skb); | 1838 | skb_reset_network_header(skb); |
| 1548 | skb->dev = reg_dev; | ||
| 1549 | skb->protocol = htons(ETH_P_IP); | 1839 | skb->protocol = htons(ETH_P_IP); |
| 1550 | skb->ip_summed = 0; | 1840 | skb->ip_summed = 0; |
| 1551 | skb->pkt_type = PACKET_HOST; | 1841 | skb->pkt_type = PACKET_HOST; |
| 1552 | skb_dst_drop(skb); | 1842 | |
| 1553 | reg_dev->stats.rx_bytes += skb->len; | 1843 | skb_tunnel_rx(skb, reg_dev); |
| 1554 | reg_dev->stats.rx_packets++; | 1844 | |
| 1555 | nf_reset(skb); | ||
| 1556 | netif_rx(skb); | 1845 | netif_rx(skb); |
| 1557 | dev_put(reg_dev); | 1846 | dev_put(reg_dev); |
| 1558 | 1847 | ||
| @@ -1569,17 +1858,21 @@ int pim_rcv_v1(struct sk_buff * skb) | |||
| 1569 | { | 1858 | { |
| 1570 | struct igmphdr *pim; | 1859 | struct igmphdr *pim; |
| 1571 | struct net *net = dev_net(skb->dev); | 1860 | struct net *net = dev_net(skb->dev); |
| 1861 | struct mr_table *mrt; | ||
| 1572 | 1862 | ||
| 1573 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) | 1863 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) |
| 1574 | goto drop; | 1864 | goto drop; |
| 1575 | 1865 | ||
| 1576 | pim = igmp_hdr(skb); | 1866 | pim = igmp_hdr(skb); |
| 1577 | 1867 | ||
| 1578 | if (!net->ipv4.mroute_do_pim || | 1868 | if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) |
| 1869 | goto drop; | ||
| 1870 | |||
| 1871 | if (!mrt->mroute_do_pim || | ||
| 1579 | pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) | 1872 | pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) |
| 1580 | goto drop; | 1873 | goto drop; |
| 1581 | 1874 | ||
| 1582 | if (__pim_rcv(skb, sizeof(*pim))) { | 1875 | if (__pim_rcv(mrt, skb, sizeof(*pim))) { |
| 1583 | drop: | 1876 | drop: |
| 1584 | kfree_skb(skb); | 1877 | kfree_skb(skb); |
| 1585 | } | 1878 | } |
| @@ -1591,6 +1884,8 @@ drop: | |||
| 1591 | static int pim_rcv(struct sk_buff * skb) | 1884 | static int pim_rcv(struct sk_buff * skb) |
| 1592 | { | 1885 | { |
| 1593 | struct pimreghdr *pim; | 1886 | struct pimreghdr *pim; |
| 1887 | struct net *net = dev_net(skb->dev); | ||
| 1888 | struct mr_table *mrt; | ||
| 1594 | 1889 | ||
| 1595 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) | 1890 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) |
| 1596 | goto drop; | 1891 | goto drop; |
| @@ -1602,7 +1897,10 @@ static int pim_rcv(struct sk_buff * skb) | |||
| 1602 | csum_fold(skb_checksum(skb, 0, skb->len, 0)))) | 1897 | csum_fold(skb_checksum(skb, 0, skb->len, 0)))) |
| 1603 | goto drop; | 1898 | goto drop; |
| 1604 | 1899 | ||
| 1605 | if (__pim_rcv(skb, sizeof(*pim))) { | 1900 | if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) |
| 1901 | goto drop; | ||
| 1902 | |||
| 1903 | if (__pim_rcv(mrt, skb, sizeof(*pim))) { | ||
| 1606 | drop: | 1904 | drop: |
| 1607 | kfree_skb(skb); | 1905 | kfree_skb(skb); |
| 1608 | } | 1906 | } |
| @@ -1610,29 +1908,31 @@ drop: | |||
| 1610 | } | 1908 | } |
| 1611 | #endif | 1909 | #endif |
| 1612 | 1910 | ||
| 1613 | static int | 1911 | static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, |
| 1614 | ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) | 1912 | struct mfc_cache *c, struct rtmsg *rtm) |
| 1615 | { | 1913 | { |
| 1616 | int ct; | 1914 | int ct; |
| 1617 | struct rtnexthop *nhp; | 1915 | struct rtnexthop *nhp; |
| 1618 | struct net *net = mfc_net(c); | ||
| 1619 | struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev; | ||
| 1620 | u8 *b = skb_tail_pointer(skb); | 1916 | u8 *b = skb_tail_pointer(skb); |
| 1621 | struct rtattr *mp_head; | 1917 | struct rtattr *mp_head; |
| 1622 | 1918 | ||
| 1623 | if (dev) | 1919 | /* If cache is unresolved, don't try to parse IIF and OIF */ |
| 1624 | RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex); | 1920 | if (c->mfc_parent >= MAXVIFS) |
| 1921 | return -ENOENT; | ||
| 1922 | |||
| 1923 | if (VIF_EXISTS(mrt, c->mfc_parent)) | ||
| 1924 | RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex); | ||
| 1625 | 1925 | ||
| 1626 | mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); | 1926 | mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); |
| 1627 | 1927 | ||
| 1628 | for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { | 1928 | for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { |
| 1629 | if (c->mfc_un.res.ttls[ct] < 255) { | 1929 | if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { |
| 1630 | if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) | 1930 | if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) |
| 1631 | goto rtattr_failure; | 1931 | goto rtattr_failure; |
| 1632 | nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); | 1932 | nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); |
| 1633 | nhp->rtnh_flags = 0; | 1933 | nhp->rtnh_flags = 0; |
| 1634 | nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; | 1934 | nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; |
| 1635 | nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex; | 1935 | nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex; |
| 1636 | nhp->rtnh_len = sizeof(*nhp); | 1936 | nhp->rtnh_len = sizeof(*nhp); |
| 1637 | } | 1937 | } |
| 1638 | } | 1938 | } |
| @@ -1650,11 +1950,16 @@ int ipmr_get_route(struct net *net, | |||
| 1650 | struct sk_buff *skb, struct rtmsg *rtm, int nowait) | 1950 | struct sk_buff *skb, struct rtmsg *rtm, int nowait) |
| 1651 | { | 1951 | { |
| 1652 | int err; | 1952 | int err; |
| 1953 | struct mr_table *mrt; | ||
| 1653 | struct mfc_cache *cache; | 1954 | struct mfc_cache *cache; |
| 1654 | struct rtable *rt = skb_rtable(skb); | 1955 | struct rtable *rt = skb_rtable(skb); |
| 1655 | 1956 | ||
| 1957 | mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); | ||
| 1958 | if (mrt == NULL) | ||
| 1959 | return -ENOENT; | ||
| 1960 | |||
| 1656 | read_lock(&mrt_lock); | 1961 | read_lock(&mrt_lock); |
| 1657 | cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst); | 1962 | cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst); |
| 1658 | 1963 | ||
| 1659 | if (cache == NULL) { | 1964 | if (cache == NULL) { |
| 1660 | struct sk_buff *skb2; | 1965 | struct sk_buff *skb2; |
| @@ -1668,7 +1973,7 @@ int ipmr_get_route(struct net *net, | |||
| 1668 | } | 1973 | } |
| 1669 | 1974 | ||
| 1670 | dev = skb->dev; | 1975 | dev = skb->dev; |
| 1671 | if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) { | 1976 | if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) { |
| 1672 | read_unlock(&mrt_lock); | 1977 | read_unlock(&mrt_lock); |
| 1673 | return -ENODEV; | 1978 | return -ENODEV; |
| 1674 | } | 1979 | } |
| @@ -1685,24 +1990,107 @@ int ipmr_get_route(struct net *net, | |||
| 1685 | iph->saddr = rt->rt_src; | 1990 | iph->saddr = rt->rt_src; |
| 1686 | iph->daddr = rt->rt_dst; | 1991 | iph->daddr = rt->rt_dst; |
| 1687 | iph->version = 0; | 1992 | iph->version = 0; |
| 1688 | err = ipmr_cache_unresolved(net, vif, skb2); | 1993 | err = ipmr_cache_unresolved(mrt, vif, skb2); |
| 1689 | read_unlock(&mrt_lock); | 1994 | read_unlock(&mrt_lock); |
| 1690 | return err; | 1995 | return err; |
| 1691 | } | 1996 | } |
| 1692 | 1997 | ||
| 1693 | if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) | 1998 | if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) |
| 1694 | cache->mfc_flags |= MFC_NOTIFY; | 1999 | cache->mfc_flags |= MFC_NOTIFY; |
| 1695 | err = ipmr_fill_mroute(skb, cache, rtm); | 2000 | err = __ipmr_fill_mroute(mrt, skb, cache, rtm); |
| 1696 | read_unlock(&mrt_lock); | 2001 | read_unlock(&mrt_lock); |
| 1697 | return err; | 2002 | return err; |
| 1698 | } | 2003 | } |
| 1699 | 2004 | ||
| 2005 | static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, | ||
| 2006 | u32 pid, u32 seq, struct mfc_cache *c) | ||
| 2007 | { | ||
| 2008 | struct nlmsghdr *nlh; | ||
| 2009 | struct rtmsg *rtm; | ||
| 2010 | |||
| 2011 | nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); | ||
| 2012 | if (nlh == NULL) | ||
| 2013 | return -EMSGSIZE; | ||
| 2014 | |||
| 2015 | rtm = nlmsg_data(nlh); | ||
| 2016 | rtm->rtm_family = RTNL_FAMILY_IPMR; | ||
| 2017 | rtm->rtm_dst_len = 32; | ||
| 2018 | rtm->rtm_src_len = 32; | ||
| 2019 | rtm->rtm_tos = 0; | ||
| 2020 | rtm->rtm_table = mrt->id; | ||
| 2021 | NLA_PUT_U32(skb, RTA_TABLE, mrt->id); | ||
| 2022 | rtm->rtm_type = RTN_MULTICAST; | ||
| 2023 | rtm->rtm_scope = RT_SCOPE_UNIVERSE; | ||
| 2024 | rtm->rtm_protocol = RTPROT_UNSPEC; | ||
| 2025 | rtm->rtm_flags = 0; | ||
| 2026 | |||
| 2027 | NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin); | ||
| 2028 | NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp); | ||
| 2029 | |||
| 2030 | if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0) | ||
| 2031 | goto nla_put_failure; | ||
| 2032 | |||
| 2033 | return nlmsg_end(skb, nlh); | ||
| 2034 | |||
| 2035 | nla_put_failure: | ||
| 2036 | nlmsg_cancel(skb, nlh); | ||
| 2037 | return -EMSGSIZE; | ||
| 2038 | } | ||
| 2039 | |||
| 2040 | static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) | ||
| 2041 | { | ||
| 2042 | struct net *net = sock_net(skb->sk); | ||
| 2043 | struct mr_table *mrt; | ||
| 2044 | struct mfc_cache *mfc; | ||
| 2045 | unsigned int t = 0, s_t; | ||
| 2046 | unsigned int h = 0, s_h; | ||
| 2047 | unsigned int e = 0, s_e; | ||
| 2048 | |||
| 2049 | s_t = cb->args[0]; | ||
| 2050 | s_h = cb->args[1]; | ||
| 2051 | s_e = cb->args[2]; | ||
| 2052 | |||
| 2053 | read_lock(&mrt_lock); | ||
| 2054 | ipmr_for_each_table(mrt, net) { | ||
| 2055 | if (t < s_t) | ||
| 2056 | goto next_table; | ||
| 2057 | if (t > s_t) | ||
| 2058 | s_h = 0; | ||
| 2059 | for (h = s_h; h < MFC_LINES; h++) { | ||
| 2060 | list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) { | ||
| 2061 | if (e < s_e) | ||
| 2062 | goto next_entry; | ||
| 2063 | if (ipmr_fill_mroute(mrt, skb, | ||
| 2064 | NETLINK_CB(cb->skb).pid, | ||
| 2065 | cb->nlh->nlmsg_seq, | ||
| 2066 | mfc) < 0) | ||
| 2067 | goto done; | ||
| 2068 | next_entry: | ||
| 2069 | e++; | ||
| 2070 | } | ||
| 2071 | e = s_e = 0; | ||
| 2072 | } | ||
| 2073 | s_h = 0; | ||
| 2074 | next_table: | ||
| 2075 | t++; | ||
| 2076 | } | ||
| 2077 | done: | ||
| 2078 | read_unlock(&mrt_lock); | ||
| 2079 | |||
| 2080 | cb->args[2] = e; | ||
| 2081 | cb->args[1] = h; | ||
| 2082 | cb->args[0] = t; | ||
| 2083 | |||
| 2084 | return skb->len; | ||
| 2085 | } | ||
| 2086 | |||
| 1700 | #ifdef CONFIG_PROC_FS | 2087 | #ifdef CONFIG_PROC_FS |
| 1701 | /* | 2088 | /* |
| 1702 | * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif | 2089 | * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif |
| 1703 | */ | 2090 | */ |
| 1704 | struct ipmr_vif_iter { | 2091 | struct ipmr_vif_iter { |
| 1705 | struct seq_net_private p; | 2092 | struct seq_net_private p; |
| 2093 | struct mr_table *mrt; | ||
| 1706 | int ct; | 2094 | int ct; |
| 1707 | }; | 2095 | }; |
| 1708 | 2096 | ||
| @@ -1710,11 +2098,13 @@ static struct vif_device *ipmr_vif_seq_idx(struct net *net, | |||
| 1710 | struct ipmr_vif_iter *iter, | 2098 | struct ipmr_vif_iter *iter, |
| 1711 | loff_t pos) | 2099 | loff_t pos) |
| 1712 | { | 2100 | { |
| 1713 | for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) { | 2101 | struct mr_table *mrt = iter->mrt; |
| 1714 | if (!VIF_EXISTS(net, iter->ct)) | 2102 | |
| 2103 | for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { | ||
| 2104 | if (!VIF_EXISTS(mrt, iter->ct)) | ||
| 1715 | continue; | 2105 | continue; |
| 1716 | if (pos-- == 0) | 2106 | if (pos-- == 0) |
| 1717 | return &net->ipv4.vif_table[iter->ct]; | 2107 | return &mrt->vif_table[iter->ct]; |
| 1718 | } | 2108 | } |
| 1719 | return NULL; | 2109 | return NULL; |
| 1720 | } | 2110 | } |
| @@ -1722,7 +2112,15 @@ static struct vif_device *ipmr_vif_seq_idx(struct net *net, | |||
| 1722 | static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) | 2112 | static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) |
| 1723 | __acquires(mrt_lock) | 2113 | __acquires(mrt_lock) |
| 1724 | { | 2114 | { |
| 2115 | struct ipmr_vif_iter *iter = seq->private; | ||
| 1725 | struct net *net = seq_file_net(seq); | 2116 | struct net *net = seq_file_net(seq); |
| 2117 | struct mr_table *mrt; | ||
| 2118 | |||
| 2119 | mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); | ||
| 2120 | if (mrt == NULL) | ||
| 2121 | return ERR_PTR(-ENOENT); | ||
| 2122 | |||
| 2123 | iter->mrt = mrt; | ||
| 1726 | 2124 | ||
| 1727 | read_lock(&mrt_lock); | 2125 | read_lock(&mrt_lock); |
| 1728 | return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1) | 2126 | return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1) |
| @@ -1733,15 +2131,16 @@ static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
| 1733 | { | 2131 | { |
| 1734 | struct ipmr_vif_iter *iter = seq->private; | 2132 | struct ipmr_vif_iter *iter = seq->private; |
| 1735 | struct net *net = seq_file_net(seq); | 2133 | struct net *net = seq_file_net(seq); |
| 2134 | struct mr_table *mrt = iter->mrt; | ||
| 1736 | 2135 | ||
| 1737 | ++*pos; | 2136 | ++*pos; |
| 1738 | if (v == SEQ_START_TOKEN) | 2137 | if (v == SEQ_START_TOKEN) |
| 1739 | return ipmr_vif_seq_idx(net, iter, 0); | 2138 | return ipmr_vif_seq_idx(net, iter, 0); |
| 1740 | 2139 | ||
| 1741 | while (++iter->ct < net->ipv4.maxvif) { | 2140 | while (++iter->ct < mrt->maxvif) { |
| 1742 | if (!VIF_EXISTS(net, iter->ct)) | 2141 | if (!VIF_EXISTS(mrt, iter->ct)) |
| 1743 | continue; | 2142 | continue; |
| 1744 | return &net->ipv4.vif_table[iter->ct]; | 2143 | return &mrt->vif_table[iter->ct]; |
| 1745 | } | 2144 | } |
| 1746 | return NULL; | 2145 | return NULL; |
| 1747 | } | 2146 | } |
| @@ -1754,7 +2153,8 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) | |||
| 1754 | 2153 | ||
| 1755 | static int ipmr_vif_seq_show(struct seq_file *seq, void *v) | 2154 | static int ipmr_vif_seq_show(struct seq_file *seq, void *v) |
| 1756 | { | 2155 | { |
| 1757 | struct net *net = seq_file_net(seq); | 2156 | struct ipmr_vif_iter *iter = seq->private; |
| 2157 | struct mr_table *mrt = iter->mrt; | ||
| 1758 | 2158 | ||
| 1759 | if (v == SEQ_START_TOKEN) { | 2159 | if (v == SEQ_START_TOKEN) { |
| 1760 | seq_puts(seq, | 2160 | seq_puts(seq, |
| @@ -1765,7 +2165,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v) | |||
| 1765 | 2165 | ||
| 1766 | seq_printf(seq, | 2166 | seq_printf(seq, |
| 1767 | "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", | 2167 | "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", |
| 1768 | vif - net->ipv4.vif_table, | 2168 | vif - mrt->vif_table, |
| 1769 | name, vif->bytes_in, vif->pkt_in, | 2169 | name, vif->bytes_in, vif->pkt_in, |
| 1770 | vif->bytes_out, vif->pkt_out, | 2170 | vif->bytes_out, vif->pkt_out, |
| 1771 | vif->flags, vif->local, vif->remote); | 2171 | vif->flags, vif->local, vif->remote); |
| @@ -1796,7 +2196,8 @@ static const struct file_operations ipmr_vif_fops = { | |||
| 1796 | 2196 | ||
| 1797 | struct ipmr_mfc_iter { | 2197 | struct ipmr_mfc_iter { |
| 1798 | struct seq_net_private p; | 2198 | struct seq_net_private p; |
| 1799 | struct mfc_cache **cache; | 2199 | struct mr_table *mrt; |
| 2200 | struct list_head *cache; | ||
| 1800 | int ct; | 2201 | int ct; |
| 1801 | }; | 2202 | }; |
| 1802 | 2203 | ||
| @@ -1804,22 +2205,22 @@ struct ipmr_mfc_iter { | |||
| 1804 | static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, | 2205 | static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, |
| 1805 | struct ipmr_mfc_iter *it, loff_t pos) | 2206 | struct ipmr_mfc_iter *it, loff_t pos) |
| 1806 | { | 2207 | { |
| 2208 | struct mr_table *mrt = it->mrt; | ||
| 1807 | struct mfc_cache *mfc; | 2209 | struct mfc_cache *mfc; |
| 1808 | 2210 | ||
| 1809 | it->cache = net->ipv4.mfc_cache_array; | ||
| 1810 | read_lock(&mrt_lock); | 2211 | read_lock(&mrt_lock); |
| 1811 | for (it->ct = 0; it->ct < MFC_LINES; it->ct++) | 2212 | for (it->ct = 0; it->ct < MFC_LINES; it->ct++) { |
| 1812 | for (mfc = net->ipv4.mfc_cache_array[it->ct]; | 2213 | it->cache = &mrt->mfc_cache_array[it->ct]; |
| 1813 | mfc; mfc = mfc->next) | 2214 | list_for_each_entry(mfc, it->cache, list) |
| 1814 | if (pos-- == 0) | 2215 | if (pos-- == 0) |
| 1815 | return mfc; | 2216 | return mfc; |
| 2217 | } | ||
| 1816 | read_unlock(&mrt_lock); | 2218 | read_unlock(&mrt_lock); |
| 1817 | 2219 | ||
| 1818 | it->cache = &mfc_unres_queue; | ||
| 1819 | spin_lock_bh(&mfc_unres_lock); | 2220 | spin_lock_bh(&mfc_unres_lock); |
| 1820 | for (mfc = mfc_unres_queue; mfc; mfc = mfc->next) | 2221 | it->cache = &mrt->mfc_unres_queue; |
| 1821 | if (net_eq(mfc_net(mfc), net) && | 2222 | list_for_each_entry(mfc, it->cache, list) |
| 1822 | pos-- == 0) | 2223 | if (pos-- == 0) |
| 1823 | return mfc; | 2224 | return mfc; |
| 1824 | spin_unlock_bh(&mfc_unres_lock); | 2225 | spin_unlock_bh(&mfc_unres_lock); |
| 1825 | 2226 | ||
| @@ -1832,7 +2233,13 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) | |||
| 1832 | { | 2233 | { |
| 1833 | struct ipmr_mfc_iter *it = seq->private; | 2234 | struct ipmr_mfc_iter *it = seq->private; |
| 1834 | struct net *net = seq_file_net(seq); | 2235 | struct net *net = seq_file_net(seq); |
| 2236 | struct mr_table *mrt; | ||
| 1835 | 2237 | ||
| 2238 | mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); | ||
| 2239 | if (mrt == NULL) | ||
| 2240 | return ERR_PTR(-ENOENT); | ||
| 2241 | |||
| 2242 | it->mrt = mrt; | ||
| 1836 | it->cache = NULL; | 2243 | it->cache = NULL; |
| 1837 | it->ct = 0; | 2244 | it->ct = 0; |
| 1838 | return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) | 2245 | return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) |
| @@ -1844,37 +2251,36 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
| 1844 | struct mfc_cache *mfc = v; | 2251 | struct mfc_cache *mfc = v; |
| 1845 | struct ipmr_mfc_iter *it = seq->private; | 2252 | struct ipmr_mfc_iter *it = seq->private; |
| 1846 | struct net *net = seq_file_net(seq); | 2253 | struct net *net = seq_file_net(seq); |
| 2254 | struct mr_table *mrt = it->mrt; | ||
| 1847 | 2255 | ||
| 1848 | ++*pos; | 2256 | ++*pos; |
| 1849 | 2257 | ||
| 1850 | if (v == SEQ_START_TOKEN) | 2258 | if (v == SEQ_START_TOKEN) |
| 1851 | return ipmr_mfc_seq_idx(net, seq->private, 0); | 2259 | return ipmr_mfc_seq_idx(net, seq->private, 0); |
| 1852 | 2260 | ||
| 1853 | if (mfc->next) | 2261 | if (mfc->list.next != it->cache) |
| 1854 | return mfc->next; | 2262 | return list_entry(mfc->list.next, struct mfc_cache, list); |
| 1855 | 2263 | ||
| 1856 | if (it->cache == &mfc_unres_queue) | 2264 | if (it->cache == &mrt->mfc_unres_queue) |
| 1857 | goto end_of_list; | 2265 | goto end_of_list; |
| 1858 | 2266 | ||
| 1859 | BUG_ON(it->cache != net->ipv4.mfc_cache_array); | 2267 | BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]); |
| 1860 | 2268 | ||
| 1861 | while (++it->ct < MFC_LINES) { | 2269 | while (++it->ct < MFC_LINES) { |
| 1862 | mfc = net->ipv4.mfc_cache_array[it->ct]; | 2270 | it->cache = &mrt->mfc_cache_array[it->ct]; |
| 1863 | if (mfc) | 2271 | if (list_empty(it->cache)) |
| 1864 | return mfc; | 2272 | continue; |
| 2273 | return list_first_entry(it->cache, struct mfc_cache, list); | ||
| 1865 | } | 2274 | } |
| 1866 | 2275 | ||
| 1867 | /* exhausted cache_array, show unresolved */ | 2276 | /* exhausted cache_array, show unresolved */ |
| 1868 | read_unlock(&mrt_lock); | 2277 | read_unlock(&mrt_lock); |
| 1869 | it->cache = &mfc_unres_queue; | 2278 | it->cache = &mrt->mfc_unres_queue; |
| 1870 | it->ct = 0; | 2279 | it->ct = 0; |
| 1871 | 2280 | ||
| 1872 | spin_lock_bh(&mfc_unres_lock); | 2281 | spin_lock_bh(&mfc_unres_lock); |
| 1873 | mfc = mfc_unres_queue; | 2282 | if (!list_empty(it->cache)) |
| 1874 | while (mfc && !net_eq(mfc_net(mfc), net)) | 2283 | return list_first_entry(it->cache, struct mfc_cache, list); |
| 1875 | mfc = mfc->next; | ||
| 1876 | if (mfc) | ||
| 1877 | return mfc; | ||
| 1878 | 2284 | ||
| 1879 | end_of_list: | 2285 | end_of_list: |
| 1880 | spin_unlock_bh(&mfc_unres_lock); | 2286 | spin_unlock_bh(&mfc_unres_lock); |
| @@ -1886,18 +2292,17 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
| 1886 | static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) | 2292 | static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) |
| 1887 | { | 2293 | { |
| 1888 | struct ipmr_mfc_iter *it = seq->private; | 2294 | struct ipmr_mfc_iter *it = seq->private; |
| 1889 | struct net *net = seq_file_net(seq); | 2295 | struct mr_table *mrt = it->mrt; |
| 1890 | 2296 | ||
| 1891 | if (it->cache == &mfc_unres_queue) | 2297 | if (it->cache == &mrt->mfc_unres_queue) |
| 1892 | spin_unlock_bh(&mfc_unres_lock); | 2298 | spin_unlock_bh(&mfc_unres_lock); |
| 1893 | else if (it->cache == net->ipv4.mfc_cache_array) | 2299 | else if (it->cache == &mrt->mfc_cache_array[it->ct]) |
| 1894 | read_unlock(&mrt_lock); | 2300 | read_unlock(&mrt_lock); |
| 1895 | } | 2301 | } |
| 1896 | 2302 | ||
| 1897 | static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) | 2303 | static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) |
| 1898 | { | 2304 | { |
| 1899 | int n; | 2305 | int n; |
| 1900 | struct net *net = seq_file_net(seq); | ||
| 1901 | 2306 | ||
| 1902 | if (v == SEQ_START_TOKEN) { | 2307 | if (v == SEQ_START_TOKEN) { |
| 1903 | seq_puts(seq, | 2308 | seq_puts(seq, |
| @@ -1905,20 +2310,21 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) | |||
| 1905 | } else { | 2310 | } else { |
| 1906 | const struct mfc_cache *mfc = v; | 2311 | const struct mfc_cache *mfc = v; |
| 1907 | const struct ipmr_mfc_iter *it = seq->private; | 2312 | const struct ipmr_mfc_iter *it = seq->private; |
| 2313 | const struct mr_table *mrt = it->mrt; | ||
| 1908 | 2314 | ||
| 1909 | seq_printf(seq, "%08lX %08lX %-3hd", | 2315 | seq_printf(seq, "%08X %08X %-3hd", |
| 1910 | (unsigned long) mfc->mfc_mcastgrp, | 2316 | (__force u32) mfc->mfc_mcastgrp, |
| 1911 | (unsigned long) mfc->mfc_origin, | 2317 | (__force u32) mfc->mfc_origin, |
| 1912 | mfc->mfc_parent); | 2318 | mfc->mfc_parent); |
| 1913 | 2319 | ||
| 1914 | if (it->cache != &mfc_unres_queue) { | 2320 | if (it->cache != &mrt->mfc_unres_queue) { |
| 1915 | seq_printf(seq, " %8lu %8lu %8lu", | 2321 | seq_printf(seq, " %8lu %8lu %8lu", |
| 1916 | mfc->mfc_un.res.pkt, | 2322 | mfc->mfc_un.res.pkt, |
| 1917 | mfc->mfc_un.res.bytes, | 2323 | mfc->mfc_un.res.bytes, |
| 1918 | mfc->mfc_un.res.wrong_if); | 2324 | mfc->mfc_un.res.wrong_if); |
| 1919 | for (n = mfc->mfc_un.res.minvif; | 2325 | for (n = mfc->mfc_un.res.minvif; |
| 1920 | n < mfc->mfc_un.res.maxvif; n++ ) { | 2326 | n < mfc->mfc_un.res.maxvif; n++ ) { |
| 1921 | if (VIF_EXISTS(net, n) && | 2327 | if (VIF_EXISTS(mrt, n) && |
| 1922 | mfc->mfc_un.res.ttls[n] < 255) | 2328 | mfc->mfc_un.res.ttls[n] < 255) |
| 1923 | seq_printf(seq, | 2329 | seq_printf(seq, |
| 1924 | " %2d:%-3d", | 2330 | " %2d:%-3d", |
| @@ -1970,27 +2376,11 @@ static const struct net_protocol pim_protocol = { | |||
| 1970 | */ | 2376 | */ |
| 1971 | static int __net_init ipmr_net_init(struct net *net) | 2377 | static int __net_init ipmr_net_init(struct net *net) |
| 1972 | { | 2378 | { |
| 1973 | int err = 0; | 2379 | int err; |
| 1974 | 2380 | ||
| 1975 | net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device), | 2381 | err = ipmr_rules_init(net); |
| 1976 | GFP_KERNEL); | 2382 | if (err < 0) |
| 1977 | if (!net->ipv4.vif_table) { | ||
| 1978 | err = -ENOMEM; | ||
| 1979 | goto fail; | 2383 | goto fail; |
| 1980 | } | ||
| 1981 | |||
| 1982 | /* Forwarding cache */ | ||
| 1983 | net->ipv4.mfc_cache_array = kcalloc(MFC_LINES, | ||
| 1984 | sizeof(struct mfc_cache *), | ||
| 1985 | GFP_KERNEL); | ||
| 1986 | if (!net->ipv4.mfc_cache_array) { | ||
| 1987 | err = -ENOMEM; | ||
| 1988 | goto fail_mfc_cache; | ||
| 1989 | } | ||
| 1990 | |||
| 1991 | #ifdef CONFIG_IP_PIMSM | ||
| 1992 | net->ipv4.mroute_reg_vif_num = -1; | ||
| 1993 | #endif | ||
| 1994 | 2384 | ||
| 1995 | #ifdef CONFIG_PROC_FS | 2385 | #ifdef CONFIG_PROC_FS |
| 1996 | err = -ENOMEM; | 2386 | err = -ENOMEM; |
| @@ -2005,10 +2395,8 @@ static int __net_init ipmr_net_init(struct net *net) | |||
| 2005 | proc_cache_fail: | 2395 | proc_cache_fail: |
| 2006 | proc_net_remove(net, "ip_mr_vif"); | 2396 | proc_net_remove(net, "ip_mr_vif"); |
| 2007 | proc_vif_fail: | 2397 | proc_vif_fail: |
| 2008 | kfree(net->ipv4.mfc_cache_array); | 2398 | ipmr_rules_exit(net); |
| 2009 | #endif | 2399 | #endif |
| 2010 | fail_mfc_cache: | ||
| 2011 | kfree(net->ipv4.vif_table); | ||
| 2012 | fail: | 2400 | fail: |
| 2013 | return err; | 2401 | return err; |
| 2014 | } | 2402 | } |
| @@ -2019,8 +2407,7 @@ static void __net_exit ipmr_net_exit(struct net *net) | |||
| 2019 | proc_net_remove(net, "ip_mr_cache"); | 2407 | proc_net_remove(net, "ip_mr_cache"); |
| 2020 | proc_net_remove(net, "ip_mr_vif"); | 2408 | proc_net_remove(net, "ip_mr_vif"); |
| 2021 | #endif | 2409 | #endif |
| 2022 | kfree(net->ipv4.mfc_cache_array); | 2410 | ipmr_rules_exit(net); |
| 2023 | kfree(net->ipv4.vif_table); | ||
| 2024 | } | 2411 | } |
| 2025 | 2412 | ||
| 2026 | static struct pernet_operations ipmr_net_ops = { | 2413 | static struct pernet_operations ipmr_net_ops = { |
| @@ -2043,7 +2430,6 @@ int __init ip_mr_init(void) | |||
| 2043 | if (err) | 2430 | if (err) |
| 2044 | goto reg_pernet_fail; | 2431 | goto reg_pernet_fail; |
| 2045 | 2432 | ||
| 2046 | setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0); | ||
| 2047 | err = register_netdevice_notifier(&ip_mr_notifier); | 2433 | err = register_netdevice_notifier(&ip_mr_notifier); |
| 2048 | if (err) | 2434 | if (err) |
| 2049 | goto reg_notif_fail; | 2435 | goto reg_notif_fail; |
| @@ -2054,6 +2440,7 @@ int __init ip_mr_init(void) | |||
| 2054 | goto add_proto_fail; | 2440 | goto add_proto_fail; |
| 2055 | } | 2441 | } |
| 2056 | #endif | 2442 | #endif |
| 2443 | rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute); | ||
| 2057 | return 0; | 2444 | return 0; |
| 2058 | 2445 | ||
| 2059 | #ifdef CONFIG_IP_PIMSM_V2 | 2446 | #ifdef CONFIG_IP_PIMSM_V2 |
| @@ -2061,7 +2448,6 @@ add_proto_fail: | |||
| 2061 | unregister_netdevice_notifier(&ip_mr_notifier); | 2448 | unregister_netdevice_notifier(&ip_mr_notifier); |
| 2062 | #endif | 2449 | #endif |
| 2063 | reg_notif_fail: | 2450 | reg_notif_fail: |
| 2064 | del_timer(&ipmr_expire_timer); | ||
| 2065 | unregister_pernet_subsys(&ipmr_net_ops); | 2451 | unregister_pernet_subsys(&ipmr_net_ops); |
| 2066 | reg_pernet_fail: | 2452 | reg_pernet_fail: |
| 2067 | kmem_cache_destroy(mrt_cachep); | 2453 | kmem_cache_destroy(mrt_cachep); |
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index c14623fc4d5e..d88a46c54fd1 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | #include <linux/netfilter_ipv4.h> | 4 | #include <linux/netfilter_ipv4.h> |
| 5 | #include <linux/ip.h> | 5 | #include <linux/ip.h> |
| 6 | #include <linux/skbuff.h> | 6 | #include <linux/skbuff.h> |
| 7 | #include <linux/gfp.h> | ||
| 7 | #include <net/route.h> | 8 | #include <net/route.h> |
| 8 | #include <net/xfrm.h> | 9 | #include <net/xfrm.h> |
| 9 | #include <net/ip.h> | 10 | #include <net/ip.h> |
| @@ -16,7 +17,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) | |||
| 16 | const struct iphdr *iph = ip_hdr(skb); | 17 | const struct iphdr *iph = ip_hdr(skb); |
| 17 | struct rtable *rt; | 18 | struct rtable *rt; |
| 18 | struct flowi fl = {}; | 19 | struct flowi fl = {}; |
| 19 | struct dst_entry *odst; | 20 | unsigned long orefdst; |
| 20 | unsigned int hh_len; | 21 | unsigned int hh_len; |
| 21 | unsigned int type; | 22 | unsigned int type; |
| 22 | 23 | ||
| @@ -42,7 +43,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) | |||
| 42 | 43 | ||
| 43 | /* Drop old route. */ | 44 | /* Drop old route. */ |
| 44 | skb_dst_drop(skb); | 45 | skb_dst_drop(skb); |
| 45 | skb_dst_set(skb, &rt->u.dst); | 46 | skb_dst_set(skb, &rt->dst); |
| 46 | } else { | 47 | } else { |
| 47 | /* non-local src, find valid iif to satisfy | 48 | /* non-local src, find valid iif to satisfy |
| 48 | * rp-filter when calling ip_route_input. */ | 49 | * rp-filter when calling ip_route_input. */ |
| @@ -50,14 +51,14 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) | |||
| 50 | if (ip_route_output_key(net, &rt, &fl) != 0) | 51 | if (ip_route_output_key(net, &rt, &fl) != 0) |
| 51 | return -1; | 52 | return -1; |
| 52 | 53 | ||
| 53 | odst = skb_dst(skb); | 54 | orefdst = skb->_skb_refdst; |
| 54 | if (ip_route_input(skb, iph->daddr, iph->saddr, | 55 | if (ip_route_input(skb, iph->daddr, iph->saddr, |
| 55 | RT_TOS(iph->tos), rt->u.dst.dev) != 0) { | 56 | RT_TOS(iph->tos), rt->dst.dev) != 0) { |
| 56 | dst_release(&rt->u.dst); | 57 | dst_release(&rt->dst); |
| 57 | return -1; | 58 | return -1; |
| 58 | } | 59 | } |
| 59 | dst_release(&rt->u.dst); | 60 | dst_release(&rt->dst); |
| 60 | dst_release(odst); | 61 | refdst_drop(orefdst); |
| 61 | } | 62 | } |
| 62 | 63 | ||
| 63 | if (skb_dst(skb)->error) | 64 | if (skb_dst(skb)->error) |
| @@ -211,9 +212,7 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, | |||
| 211 | skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol, | 212 | skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol, |
| 212 | skb->len - dataoff, 0); | 213 | skb->len - dataoff, 0); |
| 213 | skb->ip_summed = CHECKSUM_NONE; | 214 | skb->ip_summed = CHECKSUM_NONE; |
| 214 | csum = __skb_checksum_complete_head(skb, dataoff + len); | 215 | return __skb_checksum_complete_head(skb, dataoff + len); |
| 215 | if (!csum) | ||
| 216 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
| 217 | } | 216 | } |
| 218 | return csum; | 217 | return csum; |
| 219 | } | 218 | } |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 06632762ba5f..e8f4f9a57f12 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | 27 | ||
| 28 | #include <linux/netfilter/x_tables.h> | 28 | #include <linux/netfilter/x_tables.h> |
| 29 | #include <linux/netfilter_arp/arp_tables.h> | 29 | #include <linux/netfilter_arp/arp_tables.h> |
| 30 | #include "../../netfilter/xt_repldata.h" | ||
| 30 | 31 | ||
| 31 | MODULE_LICENSE("GPL"); | 32 | MODULE_LICENSE("GPL"); |
| 32 | MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); | 33 | MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); |
| @@ -48,16 +49,17 @@ MODULE_DESCRIPTION("arptables core"); | |||
| 48 | #endif | 49 | #endif |
| 49 | 50 | ||
| 50 | #ifdef CONFIG_NETFILTER_DEBUG | 51 | #ifdef CONFIG_NETFILTER_DEBUG |
| 51 | #define ARP_NF_ASSERT(x) \ | 52 | #define ARP_NF_ASSERT(x) WARN_ON(!(x)) |
| 52 | do { \ | ||
| 53 | if (!(x)) \ | ||
| 54 | printk("ARP_NF_ASSERT: %s:%s:%u\n", \ | ||
| 55 | __func__, __FILE__, __LINE__); \ | ||
| 56 | } while(0) | ||
| 57 | #else | 53 | #else |
| 58 | #define ARP_NF_ASSERT(x) | 54 | #define ARP_NF_ASSERT(x) |
| 59 | #endif | 55 | #endif |
| 60 | 56 | ||
| 57 | void *arpt_alloc_initial_table(const struct xt_table *info) | ||
| 58 | { | ||
| 59 | return xt_alloc_initial_table(arpt, ARPT); | ||
| 60 | } | ||
| 61 | EXPORT_SYMBOL_GPL(arpt_alloc_initial_table); | ||
| 62 | |||
| 61 | static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, | 63 | static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, |
| 62 | const char *hdr_addr, int len) | 64 | const char *hdr_addr, int len) |
| 63 | { | 65 | { |
| @@ -217,16 +219,23 @@ static inline int arp_checkentry(const struct arpt_arp *arp) | |||
| 217 | } | 219 | } |
| 218 | 220 | ||
| 219 | static unsigned int | 221 | static unsigned int |
| 220 | arpt_error(struct sk_buff *skb, const struct xt_target_param *par) | 222 | arpt_error(struct sk_buff *skb, const struct xt_action_param *par) |
| 221 | { | 223 | { |
| 222 | if (net_ratelimit()) | 224 | if (net_ratelimit()) |
| 223 | printk("arp_tables: error: '%s'\n", | 225 | pr_err("arp_tables: error: '%s'\n", |
| 224 | (const char *)par->targinfo); | 226 | (const char *)par->targinfo); |
| 225 | 227 | ||
| 226 | return NF_DROP; | 228 | return NF_DROP; |
| 227 | } | 229 | } |
| 228 | 230 | ||
| 229 | static inline struct arpt_entry *get_entry(void *base, unsigned int offset) | 231 | static inline const struct arpt_entry_target * |
| 232 | arpt_get_target_c(const struct arpt_entry *e) | ||
| 233 | { | ||
| 234 | return arpt_get_target((struct arpt_entry *)e); | ||
| 235 | } | ||
| 236 | |||
| 237 | static inline struct arpt_entry * | ||
| 238 | get_entry(const void *base, unsigned int offset) | ||
| 230 | { | 239 | { |
| 231 | return (struct arpt_entry *)(base + offset); | 240 | return (struct arpt_entry *)(base + offset); |
| 232 | } | 241 | } |
| @@ -246,12 +255,11 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
| 246 | static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); | 255 | static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); |
| 247 | unsigned int verdict = NF_DROP; | 256 | unsigned int verdict = NF_DROP; |
| 248 | const struct arphdr *arp; | 257 | const struct arphdr *arp; |
| 249 | bool hotdrop = false; | ||
| 250 | struct arpt_entry *e, *back; | 258 | struct arpt_entry *e, *back; |
| 251 | const char *indev, *outdev; | 259 | const char *indev, *outdev; |
| 252 | void *table_base; | 260 | void *table_base; |
| 253 | const struct xt_table_info *private; | 261 | const struct xt_table_info *private; |
| 254 | struct xt_target_param tgpar; | 262 | struct xt_action_param acpar; |
| 255 | 263 | ||
| 256 | if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) | 264 | if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) |
| 257 | return NF_DROP; | 265 | return NF_DROP; |
| @@ -266,26 +274,24 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
| 266 | e = get_entry(table_base, private->hook_entry[hook]); | 274 | e = get_entry(table_base, private->hook_entry[hook]); |
| 267 | back = get_entry(table_base, private->underflow[hook]); | 275 | back = get_entry(table_base, private->underflow[hook]); |
| 268 | 276 | ||
| 269 | tgpar.in = in; | 277 | acpar.in = in; |
| 270 | tgpar.out = out; | 278 | acpar.out = out; |
| 271 | tgpar.hooknum = hook; | 279 | acpar.hooknum = hook; |
| 272 | tgpar.family = NFPROTO_ARP; | 280 | acpar.family = NFPROTO_ARP; |
| 281 | acpar.hotdrop = false; | ||
| 273 | 282 | ||
| 274 | arp = arp_hdr(skb); | 283 | arp = arp_hdr(skb); |
| 275 | do { | 284 | do { |
| 276 | struct arpt_entry_target *t; | 285 | const struct arpt_entry_target *t; |
| 277 | int hdr_len; | ||
| 278 | 286 | ||
| 279 | if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { | 287 | if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { |
| 280 | e = arpt_next_entry(e); | 288 | e = arpt_next_entry(e); |
| 281 | continue; | 289 | continue; |
| 282 | } | 290 | } |
| 283 | 291 | ||
| 284 | hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + | 292 | ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1); |
| 285 | (2 * skb->dev->addr_len); | ||
| 286 | ADD_COUNTER(e->counters, hdr_len, 1); | ||
| 287 | 293 | ||
| 288 | t = arpt_get_target(e); | 294 | t = arpt_get_target_c(e); |
| 289 | 295 | ||
| 290 | /* Standard target? */ | 296 | /* Standard target? */ |
| 291 | if (!t->u.kernel.target->target) { | 297 | if (!t->u.kernel.target->target) { |
| @@ -319,9 +325,9 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
| 319 | /* Targets which reenter must return | 325 | /* Targets which reenter must return |
| 320 | * abs. verdicts | 326 | * abs. verdicts |
| 321 | */ | 327 | */ |
| 322 | tgpar.target = t->u.kernel.target; | 328 | acpar.target = t->u.kernel.target; |
| 323 | tgpar.targinfo = t->data; | 329 | acpar.targinfo = t->data; |
| 324 | verdict = t->u.kernel.target->target(skb, &tgpar); | 330 | verdict = t->u.kernel.target->target(skb, &acpar); |
| 325 | 331 | ||
| 326 | /* Target might have changed stuff. */ | 332 | /* Target might have changed stuff. */ |
| 327 | arp = arp_hdr(skb); | 333 | arp = arp_hdr(skb); |
| @@ -331,10 +337,10 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
| 331 | else | 337 | else |
| 332 | /* Verdict */ | 338 | /* Verdict */ |
| 333 | break; | 339 | break; |
| 334 | } while (!hotdrop); | 340 | } while (!acpar.hotdrop); |
| 335 | xt_info_rdunlock_bh(); | 341 | xt_info_rdunlock_bh(); |
| 336 | 342 | ||
| 337 | if (hotdrop) | 343 | if (acpar.hotdrop) |
| 338 | return NF_DROP; | 344 | return NF_DROP; |
| 339 | else | 345 | else |
| 340 | return verdict; | 346 | return verdict; |
| @@ -351,7 +357,7 @@ static inline bool unconditional(const struct arpt_arp *arp) | |||
| 351 | /* Figures out from what hook each rule can be called: returns 0 if | 357 | /* Figures out from what hook each rule can be called: returns 0 if |
| 352 | * there are loops. Puts hook bitmask in comefrom. | 358 | * there are loops. Puts hook bitmask in comefrom. |
| 353 | */ | 359 | */ |
| 354 | static int mark_source_chains(struct xt_table_info *newinfo, | 360 | static int mark_source_chains(const struct xt_table_info *newinfo, |
| 355 | unsigned int valid_hooks, void *entry0) | 361 | unsigned int valid_hooks, void *entry0) |
| 356 | { | 362 | { |
| 357 | unsigned int hook; | 363 | unsigned int hook; |
| @@ -372,11 +378,11 @@ static int mark_source_chains(struct xt_table_info *newinfo, | |||
| 372 | 378 | ||
| 373 | for (;;) { | 379 | for (;;) { |
| 374 | const struct arpt_standard_target *t | 380 | const struct arpt_standard_target *t |
| 375 | = (void *)arpt_get_target(e); | 381 | = (void *)arpt_get_target_c(e); |
| 376 | int visited = e->comefrom & (1 << hook); | 382 | int visited = e->comefrom & (1 << hook); |
| 377 | 383 | ||
| 378 | if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) { | 384 | if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) { |
| 379 | printk("arptables: loop hook %u pos %u %08X.\n", | 385 | pr_notice("arptables: loop hook %u pos %u %08X.\n", |
| 380 | hook, pos, e->comefrom); | 386 | hook, pos, e->comefrom); |
| 381 | return 0; | 387 | return 0; |
| 382 | } | 388 | } |
| @@ -456,7 +462,7 @@ static int mark_source_chains(struct xt_table_info *newinfo, | |||
| 456 | return 1; | 462 | return 1; |
| 457 | } | 463 | } |
| 458 | 464 | ||
| 459 | static inline int check_entry(struct arpt_entry *e, const char *name) | 465 | static inline int check_entry(const struct arpt_entry *e, const char *name) |
| 460 | { | 466 | { |
| 461 | const struct arpt_entry_target *t; | 467 | const struct arpt_entry_target *t; |
| 462 | 468 | ||
| @@ -468,7 +474,7 @@ static inline int check_entry(struct arpt_entry *e, const char *name) | |||
| 468 | if (e->target_offset + sizeof(struct arpt_entry_target) > e->next_offset) | 474 | if (e->target_offset + sizeof(struct arpt_entry_target) > e->next_offset) |
| 469 | return -EINVAL; | 475 | return -EINVAL; |
| 470 | 476 | ||
| 471 | t = arpt_get_target(e); | 477 | t = arpt_get_target_c(e); |
| 472 | if (e->target_offset + t->u.target_size > e->next_offset) | 478 | if (e->target_offset + t->u.target_size > e->next_offset) |
| 473 | return -EINVAL; | 479 | return -EINVAL; |
| 474 | 480 | ||
| @@ -498,8 +504,7 @@ static inline int check_target(struct arpt_entry *e, const char *name) | |||
| 498 | } | 504 | } |
| 499 | 505 | ||
| 500 | static inline int | 506 | static inline int |
| 501 | find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, | 507 | find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) |
| 502 | unsigned int *i) | ||
| 503 | { | 508 | { |
| 504 | struct arpt_entry_target *t; | 509 | struct arpt_entry_target *t; |
| 505 | struct xt_target *target; | 510 | struct xt_target *target; |
| @@ -510,13 +515,11 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, | |||
| 510 | return ret; | 515 | return ret; |
| 511 | 516 | ||
| 512 | t = arpt_get_target(e); | 517 | t = arpt_get_target(e); |
| 513 | target = try_then_request_module(xt_find_target(NFPROTO_ARP, | 518 | target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, |
| 514 | t->u.user.name, | 519 | t->u.user.revision); |
| 515 | t->u.user.revision), | 520 | if (IS_ERR(target)) { |
| 516 | "arpt_%s", t->u.user.name); | ||
| 517 | if (IS_ERR(target) || !target) { | ||
| 518 | duprintf("find_check_entry: `%s' not found\n", t->u.user.name); | 521 | duprintf("find_check_entry: `%s' not found\n", t->u.user.name); |
| 519 | ret = target ? PTR_ERR(target) : -ENOENT; | 522 | ret = PTR_ERR(target); |
| 520 | goto out; | 523 | goto out; |
| 521 | } | 524 | } |
| 522 | t->u.kernel.target = target; | 525 | t->u.kernel.target = target; |
| @@ -524,8 +527,6 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, | |||
| 524 | ret = check_target(e, name); | 527 | ret = check_target(e, name); |
| 525 | if (ret) | 528 | if (ret) |
| 526 | goto err; | 529 | goto err; |
| 527 | |||
| 528 | (*i)++; | ||
| 529 | return 0; | 530 | return 0; |
| 530 | err: | 531 | err: |
| 531 | module_put(t->u.kernel.target->me); | 532 | module_put(t->u.kernel.target->me); |
| @@ -533,14 +534,14 @@ out: | |||
| 533 | return ret; | 534 | return ret; |
| 534 | } | 535 | } |
| 535 | 536 | ||
| 536 | static bool check_underflow(struct arpt_entry *e) | 537 | static bool check_underflow(const struct arpt_entry *e) |
| 537 | { | 538 | { |
| 538 | const struct arpt_entry_target *t; | 539 | const struct arpt_entry_target *t; |
| 539 | unsigned int verdict; | 540 | unsigned int verdict; |
| 540 | 541 | ||
| 541 | if (!unconditional(&e->arp)) | 542 | if (!unconditional(&e->arp)) |
| 542 | return false; | 543 | return false; |
| 543 | t = arpt_get_target(e); | 544 | t = arpt_get_target_c(e); |
| 544 | if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) | 545 | if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) |
| 545 | return false; | 546 | return false; |
| 546 | verdict = ((struct arpt_standard_target *)t)->verdict; | 547 | verdict = ((struct arpt_standard_target *)t)->verdict; |
| @@ -550,12 +551,11 @@ static bool check_underflow(struct arpt_entry *e) | |||
| 550 | 551 | ||
| 551 | static inline int check_entry_size_and_hooks(struct arpt_entry *e, | 552 | static inline int check_entry_size_and_hooks(struct arpt_entry *e, |
| 552 | struct xt_table_info *newinfo, | 553 | struct xt_table_info *newinfo, |
| 553 | unsigned char *base, | 554 | const unsigned char *base, |
| 554 | unsigned char *limit, | 555 | const unsigned char *limit, |
| 555 | const unsigned int *hook_entries, | 556 | const unsigned int *hook_entries, |
| 556 | const unsigned int *underflows, | 557 | const unsigned int *underflows, |
| 557 | unsigned int valid_hooks, | 558 | unsigned int valid_hooks) |
| 558 | unsigned int *i) | ||
| 559 | { | 559 | { |
| 560 | unsigned int h; | 560 | unsigned int h; |
| 561 | 561 | ||
| @@ -592,19 +592,14 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, | |||
| 592 | /* Clear counters and comefrom */ | 592 | /* Clear counters and comefrom */ |
| 593 | e->counters = ((struct xt_counters) { 0, 0 }); | 593 | e->counters = ((struct xt_counters) { 0, 0 }); |
| 594 | e->comefrom = 0; | 594 | e->comefrom = 0; |
| 595 | |||
| 596 | (*i)++; | ||
| 597 | return 0; | 595 | return 0; |
| 598 | } | 596 | } |
| 599 | 597 | ||
| 600 | static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) | 598 | static inline void cleanup_entry(struct arpt_entry *e) |
| 601 | { | 599 | { |
| 602 | struct xt_tgdtor_param par; | 600 | struct xt_tgdtor_param par; |
| 603 | struct arpt_entry_target *t; | 601 | struct arpt_entry_target *t; |
| 604 | 602 | ||
| 605 | if (i && (*i)-- == 0) | ||
| 606 | return 1; | ||
| 607 | |||
| 608 | t = arpt_get_target(e); | 603 | t = arpt_get_target(e); |
| 609 | par.target = t->u.kernel.target; | 604 | par.target = t->u.kernel.target; |
| 610 | par.targinfo = t->data; | 605 | par.targinfo = t->data; |
| @@ -612,26 +607,20 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) | |||
| 612 | if (par.target->destroy != NULL) | 607 | if (par.target->destroy != NULL) |
| 613 | par.target->destroy(&par); | 608 | par.target->destroy(&par); |
| 614 | module_put(par.target->me); | 609 | module_put(par.target->me); |
| 615 | return 0; | ||
| 616 | } | 610 | } |
| 617 | 611 | ||
| 618 | /* Checks and translates the user-supplied table segment (held in | 612 | /* Checks and translates the user-supplied table segment (held in |
| 619 | * newinfo). | 613 | * newinfo). |
| 620 | */ | 614 | */ |
| 621 | static int translate_table(const char *name, | 615 | static int translate_table(struct xt_table_info *newinfo, void *entry0, |
| 622 | unsigned int valid_hooks, | 616 | const struct arpt_replace *repl) |
| 623 | struct xt_table_info *newinfo, | ||
| 624 | void *entry0, | ||
| 625 | unsigned int size, | ||
| 626 | unsigned int number, | ||
| 627 | const unsigned int *hook_entries, | ||
| 628 | const unsigned int *underflows) | ||
| 629 | { | 617 | { |
| 618 | struct arpt_entry *iter; | ||
| 630 | unsigned int i; | 619 | unsigned int i; |
| 631 | int ret; | 620 | int ret = 0; |
| 632 | 621 | ||
| 633 | newinfo->size = size; | 622 | newinfo->size = repl->size; |
| 634 | newinfo->number = number; | 623 | newinfo->number = repl->num_entries; |
| 635 | 624 | ||
| 636 | /* Init all hooks to impossible value. */ | 625 | /* Init all hooks to impossible value. */ |
| 637 | for (i = 0; i < NF_ARP_NUMHOOKS; i++) { | 626 | for (i = 0; i < NF_ARP_NUMHOOKS; i++) { |
| @@ -643,52 +632,66 @@ static int translate_table(const char *name, | |||
| 643 | i = 0; | 632 | i = 0; |
| 644 | 633 | ||
| 645 | /* Walk through entries, checking offsets. */ | 634 | /* Walk through entries, checking offsets. */ |
| 646 | ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size, | 635 | xt_entry_foreach(iter, entry0, newinfo->size) { |
| 647 | check_entry_size_and_hooks, | 636 | ret = check_entry_size_and_hooks(iter, newinfo, entry0, |
| 648 | newinfo, | 637 | entry0 + repl->size, |
| 649 | entry0, | 638 | repl->hook_entry, |
| 650 | entry0 + size, | 639 | repl->underflow, |
| 651 | hook_entries, underflows, valid_hooks, &i); | 640 | repl->valid_hooks); |
| 641 | if (ret != 0) | ||
| 642 | break; | ||
| 643 | ++i; | ||
| 644 | if (strcmp(arpt_get_target(iter)->u.user.name, | ||
| 645 | XT_ERROR_TARGET) == 0) | ||
| 646 | ++newinfo->stacksize; | ||
| 647 | } | ||
| 652 | duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); | 648 | duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); |
| 653 | if (ret != 0) | 649 | if (ret != 0) |
| 654 | return ret; | 650 | return ret; |
| 655 | 651 | ||
| 656 | if (i != number) { | 652 | if (i != repl->num_entries) { |
| 657 | duprintf("translate_table: %u not %u entries\n", | 653 | duprintf("translate_table: %u not %u entries\n", |
| 658 | i, number); | 654 | i, repl->num_entries); |
| 659 | return -EINVAL; | 655 | return -EINVAL; |
| 660 | } | 656 | } |
| 661 | 657 | ||
| 662 | /* Check hooks all assigned */ | 658 | /* Check hooks all assigned */ |
| 663 | for (i = 0; i < NF_ARP_NUMHOOKS; i++) { | 659 | for (i = 0; i < NF_ARP_NUMHOOKS; i++) { |
| 664 | /* Only hooks which are valid */ | 660 | /* Only hooks which are valid */ |
| 665 | if (!(valid_hooks & (1 << i))) | 661 | if (!(repl->valid_hooks & (1 << i))) |
| 666 | continue; | 662 | continue; |
| 667 | if (newinfo->hook_entry[i] == 0xFFFFFFFF) { | 663 | if (newinfo->hook_entry[i] == 0xFFFFFFFF) { |
| 668 | duprintf("Invalid hook entry %u %u\n", | 664 | duprintf("Invalid hook entry %u %u\n", |
| 669 | i, hook_entries[i]); | 665 | i, repl->hook_entry[i]); |
| 670 | return -EINVAL; | 666 | return -EINVAL; |
| 671 | } | 667 | } |
| 672 | if (newinfo->underflow[i] == 0xFFFFFFFF) { | 668 | if (newinfo->underflow[i] == 0xFFFFFFFF) { |
| 673 | duprintf("Invalid underflow %u %u\n", | 669 | duprintf("Invalid underflow %u %u\n", |
| 674 | i, underflows[i]); | 670 | i, repl->underflow[i]); |
| 675 | return -EINVAL; | 671 | return -EINVAL; |
| 676 | } | 672 | } |
| 677 | } | 673 | } |
| 678 | 674 | ||
| 679 | if (!mark_source_chains(newinfo, valid_hooks, entry0)) { | 675 | if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) { |
| 680 | duprintf("Looping hook\n"); | 676 | duprintf("Looping hook\n"); |
| 681 | return -ELOOP; | 677 | return -ELOOP; |
| 682 | } | 678 | } |
| 683 | 679 | ||
| 684 | /* Finally, each sanity check must pass */ | 680 | /* Finally, each sanity check must pass */ |
| 685 | i = 0; | 681 | i = 0; |
| 686 | ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size, | 682 | xt_entry_foreach(iter, entry0, newinfo->size) { |
| 687 | find_check_entry, name, size, &i); | 683 | ret = find_check_entry(iter, repl->name, repl->size); |
| 684 | if (ret != 0) | ||
| 685 | break; | ||
| 686 | ++i; | ||
| 687 | } | ||
| 688 | 688 | ||
| 689 | if (ret != 0) { | 689 | if (ret != 0) { |
| 690 | ARPT_ENTRY_ITERATE(entry0, newinfo->size, | 690 | xt_entry_foreach(iter, entry0, newinfo->size) { |
| 691 | cleanup_entry, &i); | 691 | if (i-- == 0) |
| 692 | break; | ||
| 693 | cleanup_entry(iter); | ||
| 694 | } | ||
| 692 | return ret; | 695 | return ret; |
| 693 | } | 696 | } |
| 694 | 697 | ||
| @@ -701,33 +704,13 @@ static int translate_table(const char *name, | |||
| 701 | return ret; | 704 | return ret; |
| 702 | } | 705 | } |
| 703 | 706 | ||
| 704 | /* Gets counters. */ | ||
| 705 | static inline int add_entry_to_counter(const struct arpt_entry *e, | ||
| 706 | struct xt_counters total[], | ||
| 707 | unsigned int *i) | ||
| 708 | { | ||
| 709 | ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); | ||
| 710 | |||
| 711 | (*i)++; | ||
| 712 | return 0; | ||
| 713 | } | ||
| 714 | |||
| 715 | static inline int set_entry_to_counter(const struct arpt_entry *e, | ||
| 716 | struct xt_counters total[], | ||
| 717 | unsigned int *i) | ||
| 718 | { | ||
| 719 | SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); | ||
| 720 | |||
| 721 | (*i)++; | ||
| 722 | return 0; | ||
| 723 | } | ||
| 724 | |||
| 725 | static void get_counters(const struct xt_table_info *t, | 707 | static void get_counters(const struct xt_table_info *t, |
| 726 | struct xt_counters counters[]) | 708 | struct xt_counters counters[]) |
| 727 | { | 709 | { |
| 710 | struct arpt_entry *iter; | ||
| 728 | unsigned int cpu; | 711 | unsigned int cpu; |
| 729 | unsigned int i; | 712 | unsigned int i; |
| 730 | unsigned int curcpu; | 713 | unsigned int curcpu = get_cpu(); |
| 731 | 714 | ||
| 732 | /* Instead of clearing (by a previous call to memset()) | 715 | /* Instead of clearing (by a previous call to memset()) |
| 733 | * the counters and using adds, we set the counters | 716 | * the counters and using adds, we set the counters |
| @@ -737,42 +720,46 @@ static void get_counters(const struct xt_table_info *t, | |||
| 737 | * if new softirq were to run and call ipt_do_table | 720 | * if new softirq were to run and call ipt_do_table |
| 738 | */ | 721 | */ |
| 739 | local_bh_disable(); | 722 | local_bh_disable(); |
| 740 | curcpu = smp_processor_id(); | ||
| 741 | |||
| 742 | i = 0; | 723 | i = 0; |
| 743 | ARPT_ENTRY_ITERATE(t->entries[curcpu], | 724 | xt_entry_foreach(iter, t->entries[curcpu], t->size) { |
| 744 | t->size, | 725 | SET_COUNTER(counters[i], iter->counters.bcnt, |
| 745 | set_entry_to_counter, | 726 | iter->counters.pcnt); |
| 746 | counters, | 727 | ++i; |
| 747 | &i); | 728 | } |
| 729 | local_bh_enable(); | ||
| 730 | /* Processing counters from other cpus, we can let bottom half enabled, | ||
| 731 | * (preemption is disabled) | ||
| 732 | */ | ||
| 748 | 733 | ||
| 749 | for_each_possible_cpu(cpu) { | 734 | for_each_possible_cpu(cpu) { |
| 750 | if (cpu == curcpu) | 735 | if (cpu == curcpu) |
| 751 | continue; | 736 | continue; |
| 752 | i = 0; | 737 | i = 0; |
| 738 | local_bh_disable(); | ||
| 753 | xt_info_wrlock(cpu); | 739 | xt_info_wrlock(cpu); |
| 754 | ARPT_ENTRY_ITERATE(t->entries[cpu], | 740 | xt_entry_foreach(iter, t->entries[cpu], t->size) { |
| 755 | t->size, | 741 | ADD_COUNTER(counters[i], iter->counters.bcnt, |
| 756 | add_entry_to_counter, | 742 | iter->counters.pcnt); |
| 757 | counters, | 743 | ++i; |
| 758 | &i); | 744 | } |
| 759 | xt_info_wrunlock(cpu); | 745 | xt_info_wrunlock(cpu); |
| 746 | local_bh_enable(); | ||
| 760 | } | 747 | } |
| 761 | local_bh_enable(); | 748 | put_cpu(); |
| 762 | } | 749 | } |
| 763 | 750 | ||
| 764 | static struct xt_counters *alloc_counters(struct xt_table *table) | 751 | static struct xt_counters *alloc_counters(const struct xt_table *table) |
| 765 | { | 752 | { |
| 766 | unsigned int countersize; | 753 | unsigned int countersize; |
| 767 | struct xt_counters *counters; | 754 | struct xt_counters *counters; |
| 768 | struct xt_table_info *private = table->private; | 755 | const struct xt_table_info *private = table->private; |
| 769 | 756 | ||
| 770 | /* We need atomic snapshot of counters: rest doesn't change | 757 | /* We need atomic snapshot of counters: rest doesn't change |
| 771 | * (other than comefrom, which userspace doesn't care | 758 | * (other than comefrom, which userspace doesn't care |
| 772 | * about). | 759 | * about). |
| 773 | */ | 760 | */ |
| 774 | countersize = sizeof(struct xt_counters) * private->number; | 761 | countersize = sizeof(struct xt_counters) * private->number; |
| 775 | counters = vmalloc_node(countersize, numa_node_id()); | 762 | counters = vmalloc(countersize); |
| 776 | 763 | ||
| 777 | if (counters == NULL) | 764 | if (counters == NULL) |
| 778 | return ERR_PTR(-ENOMEM); | 765 | return ERR_PTR(-ENOMEM); |
| @@ -783,11 +770,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) | |||
| 783 | } | 770 | } |
| 784 | 771 | ||
| 785 | static int copy_entries_to_user(unsigned int total_size, | 772 | static int copy_entries_to_user(unsigned int total_size, |
| 786 | struct xt_table *table, | 773 | const struct xt_table *table, |
| 787 | void __user *userptr) | 774 | void __user *userptr) |
| 788 | { | 775 | { |
| 789 | unsigned int off, num; | 776 | unsigned int off, num; |
| 790 | struct arpt_entry *e; | 777 | const struct arpt_entry *e; |
| 791 | struct xt_counters *counters; | 778 | struct xt_counters *counters; |
| 792 | struct xt_table_info *private = table->private; | 779 | struct xt_table_info *private = table->private; |
| 793 | int ret = 0; | 780 | int ret = 0; |
| @@ -807,7 +794,7 @@ static int copy_entries_to_user(unsigned int total_size, | |||
| 807 | /* FIXME: use iterator macros --RR */ | 794 | /* FIXME: use iterator macros --RR */ |
| 808 | /* ... then go back and fix counters and names */ | 795 | /* ... then go back and fix counters and names */ |
| 809 | for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ | 796 | for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ |
| 810 | struct arpt_entry_target *t; | 797 | const struct arpt_entry_target *t; |
| 811 | 798 | ||
| 812 | e = (struct arpt_entry *)(loc_cpu_entry + off); | 799 | e = (struct arpt_entry *)(loc_cpu_entry + off); |
| 813 | if (copy_to_user(userptr + off | 800 | if (copy_to_user(userptr + off |
| @@ -818,7 +805,7 @@ static int copy_entries_to_user(unsigned int total_size, | |||
| 818 | goto free_counters; | 805 | goto free_counters; |
| 819 | } | 806 | } |
| 820 | 807 | ||
| 821 | t = arpt_get_target(e); | 808 | t = arpt_get_target_c(e); |
| 822 | if (copy_to_user(userptr + off + e->target_offset | 809 | if (copy_to_user(userptr + off + e->target_offset |
| 823 | + offsetof(struct arpt_entry_target, | 810 | + offsetof(struct arpt_entry_target, |
| 824 | u.user.name), | 811 | u.user.name), |
| @@ -835,7 +822,7 @@ static int copy_entries_to_user(unsigned int total_size, | |||
| 835 | } | 822 | } |
| 836 | 823 | ||
| 837 | #ifdef CONFIG_COMPAT | 824 | #ifdef CONFIG_COMPAT |
| 838 | static void compat_standard_from_user(void *dst, void *src) | 825 | static void compat_standard_from_user(void *dst, const void *src) |
| 839 | { | 826 | { |
| 840 | int v = *(compat_int_t *)src; | 827 | int v = *(compat_int_t *)src; |
| 841 | 828 | ||
| @@ -844,7 +831,7 @@ static void compat_standard_from_user(void *dst, void *src) | |||
| 844 | memcpy(dst, &v, sizeof(v)); | 831 | memcpy(dst, &v, sizeof(v)); |
| 845 | } | 832 | } |
| 846 | 833 | ||
| 847 | static int compat_standard_to_user(void __user *dst, void *src) | 834 | static int compat_standard_to_user(void __user *dst, const void *src) |
| 848 | { | 835 | { |
| 849 | compat_int_t cv = *(int *)src; | 836 | compat_int_t cv = *(int *)src; |
| 850 | 837 | ||
| @@ -853,18 +840,18 @@ static int compat_standard_to_user(void __user *dst, void *src) | |||
| 853 | return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; | 840 | return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; |
| 854 | } | 841 | } |
| 855 | 842 | ||
| 856 | static int compat_calc_entry(struct arpt_entry *e, | 843 | static int compat_calc_entry(const struct arpt_entry *e, |
| 857 | const struct xt_table_info *info, | 844 | const struct xt_table_info *info, |
| 858 | void *base, struct xt_table_info *newinfo) | 845 | const void *base, struct xt_table_info *newinfo) |
| 859 | { | 846 | { |
| 860 | struct arpt_entry_target *t; | 847 | const struct arpt_entry_target *t; |
| 861 | unsigned int entry_offset; | 848 | unsigned int entry_offset; |
| 862 | int off, i, ret; | 849 | int off, i, ret; |
| 863 | 850 | ||
| 864 | off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); | 851 | off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); |
| 865 | entry_offset = (void *)e - base; | 852 | entry_offset = (void *)e - base; |
| 866 | 853 | ||
| 867 | t = arpt_get_target(e); | 854 | t = arpt_get_target_c(e); |
| 868 | off += xt_compat_target_offset(t->u.kernel.target); | 855 | off += xt_compat_target_offset(t->u.kernel.target); |
| 869 | newinfo->size -= off; | 856 | newinfo->size -= off; |
| 870 | ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); | 857 | ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); |
| @@ -885,7 +872,9 @@ static int compat_calc_entry(struct arpt_entry *e, | |||
| 885 | static int compat_table_info(const struct xt_table_info *info, | 872 | static int compat_table_info(const struct xt_table_info *info, |
| 886 | struct xt_table_info *newinfo) | 873 | struct xt_table_info *newinfo) |
| 887 | { | 874 | { |
| 875 | struct arpt_entry *iter; | ||
| 888 | void *loc_cpu_entry; | 876 | void *loc_cpu_entry; |
| 877 | int ret; | ||
| 889 | 878 | ||
| 890 | if (!newinfo || !info) | 879 | if (!newinfo || !info) |
| 891 | return -EINVAL; | 880 | return -EINVAL; |
| @@ -894,13 +883,17 @@ static int compat_table_info(const struct xt_table_info *info, | |||
| 894 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | 883 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); |
| 895 | newinfo->initial_entries = 0; | 884 | newinfo->initial_entries = 0; |
| 896 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; | 885 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; |
| 897 | return ARPT_ENTRY_ITERATE(loc_cpu_entry, info->size, | 886 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { |
| 898 | compat_calc_entry, info, loc_cpu_entry, | 887 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); |
| 899 | newinfo); | 888 | if (ret != 0) |
| 889 | return ret; | ||
| 890 | } | ||
| 891 | return 0; | ||
| 900 | } | 892 | } |
| 901 | #endif | 893 | #endif |
| 902 | 894 | ||
| 903 | static int get_info(struct net *net, void __user *user, int *len, int compat) | 895 | static int get_info(struct net *net, void __user *user, |
| 896 | const int *len, int compat) | ||
| 904 | { | 897 | { |
| 905 | char name[ARPT_TABLE_MAXNAMELEN]; | 898 | char name[ARPT_TABLE_MAXNAMELEN]; |
| 906 | struct xt_table *t; | 899 | struct xt_table *t; |
| @@ -925,10 +918,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) | |||
| 925 | if (t && !IS_ERR(t)) { | 918 | if (t && !IS_ERR(t)) { |
| 926 | struct arpt_getinfo info; | 919 | struct arpt_getinfo info; |
| 927 | const struct xt_table_info *private = t->private; | 920 | const struct xt_table_info *private = t->private; |
| 928 | |||
| 929 | #ifdef CONFIG_COMPAT | 921 | #ifdef CONFIG_COMPAT |
| 922 | struct xt_table_info tmp; | ||
| 923 | |||
| 930 | if (compat) { | 924 | if (compat) { |
| 931 | struct xt_table_info tmp; | ||
| 932 | ret = compat_table_info(private, &tmp); | 925 | ret = compat_table_info(private, &tmp); |
| 933 | xt_compat_flush_offsets(NFPROTO_ARP); | 926 | xt_compat_flush_offsets(NFPROTO_ARP); |
| 934 | private = &tmp; | 927 | private = &tmp; |
| @@ -959,7 +952,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) | |||
| 959 | } | 952 | } |
| 960 | 953 | ||
| 961 | static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, | 954 | static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, |
| 962 | int *len) | 955 | const int *len) |
| 963 | { | 956 | { |
| 964 | int ret; | 957 | int ret; |
| 965 | struct arpt_get_entries get; | 958 | struct arpt_get_entries get; |
| @@ -1010,10 +1003,10 @@ static int __do_replace(struct net *net, const char *name, | |||
| 1010 | struct xt_table_info *oldinfo; | 1003 | struct xt_table_info *oldinfo; |
| 1011 | struct xt_counters *counters; | 1004 | struct xt_counters *counters; |
| 1012 | void *loc_cpu_old_entry; | 1005 | void *loc_cpu_old_entry; |
| 1006 | struct arpt_entry *iter; | ||
| 1013 | 1007 | ||
| 1014 | ret = 0; | 1008 | ret = 0; |
| 1015 | counters = vmalloc_node(num_counters * sizeof(struct xt_counters), | 1009 | counters = vmalloc(num_counters * sizeof(struct xt_counters)); |
| 1016 | numa_node_id()); | ||
| 1017 | if (!counters) { | 1010 | if (!counters) { |
| 1018 | ret = -ENOMEM; | 1011 | ret = -ENOMEM; |
| 1019 | goto out; | 1012 | goto out; |
| @@ -1053,8 +1046,8 @@ static int __do_replace(struct net *net, const char *name, | |||
| 1053 | 1046 | ||
| 1054 | /* Decrease module usage counts and free resource */ | 1047 | /* Decrease module usage counts and free resource */ |
| 1055 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; | 1048 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; |
| 1056 | ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, | 1049 | xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) |
| 1057 | NULL); | 1050 | cleanup_entry(iter); |
| 1058 | 1051 | ||
| 1059 | xt_free_table_info(oldinfo); | 1052 | xt_free_table_info(oldinfo); |
| 1060 | if (copy_to_user(counters_ptr, counters, | 1053 | if (copy_to_user(counters_ptr, counters, |
| @@ -1073,12 +1066,14 @@ static int __do_replace(struct net *net, const char *name, | |||
| 1073 | return ret; | 1066 | return ret; |
| 1074 | } | 1067 | } |
| 1075 | 1068 | ||
| 1076 | static int do_replace(struct net *net, void __user *user, unsigned int len) | 1069 | static int do_replace(struct net *net, const void __user *user, |
| 1070 | unsigned int len) | ||
| 1077 | { | 1071 | { |
| 1078 | int ret; | 1072 | int ret; |
| 1079 | struct arpt_replace tmp; | 1073 | struct arpt_replace tmp; |
| 1080 | struct xt_table_info *newinfo; | 1074 | struct xt_table_info *newinfo; |
| 1081 | void *loc_cpu_entry; | 1075 | void *loc_cpu_entry; |
| 1076 | struct arpt_entry *iter; | ||
| 1082 | 1077 | ||
| 1083 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) | 1078 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) |
| 1084 | return -EFAULT; | 1079 | return -EFAULT; |
| @@ -1099,9 +1094,7 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) | |||
| 1099 | goto free_newinfo; | 1094 | goto free_newinfo; |
| 1100 | } | 1095 | } |
| 1101 | 1096 | ||
| 1102 | ret = translate_table(tmp.name, tmp.valid_hooks, | 1097 | ret = translate_table(newinfo, loc_cpu_entry, &tmp); |
| 1103 | newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, | ||
| 1104 | tmp.hook_entry, tmp.underflow); | ||
| 1105 | if (ret != 0) | 1098 | if (ret != 0) |
| 1106 | goto free_newinfo; | 1099 | goto free_newinfo; |
| 1107 | 1100 | ||
| @@ -1114,27 +1107,15 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) | |||
| 1114 | return 0; | 1107 | return 0; |
| 1115 | 1108 | ||
| 1116 | free_newinfo_untrans: | 1109 | free_newinfo_untrans: |
| 1117 | ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); | 1110 | xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) |
| 1111 | cleanup_entry(iter); | ||
| 1118 | free_newinfo: | 1112 | free_newinfo: |
| 1119 | xt_free_table_info(newinfo); | 1113 | xt_free_table_info(newinfo); |
| 1120 | return ret; | 1114 | return ret; |
| 1121 | } | 1115 | } |
| 1122 | 1116 | ||
| 1123 | /* We're lazy, and add to the first CPU; overflow works its fey magic | 1117 | static int do_add_counters(struct net *net, const void __user *user, |
| 1124 | * and everything is OK. */ | 1118 | unsigned int len, int compat) |
| 1125 | static int | ||
| 1126 | add_counter_to_entry(struct arpt_entry *e, | ||
| 1127 | const struct xt_counters addme[], | ||
| 1128 | unsigned int *i) | ||
| 1129 | { | ||
| 1130 | ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); | ||
| 1131 | |||
| 1132 | (*i)++; | ||
| 1133 | return 0; | ||
| 1134 | } | ||
| 1135 | |||
| 1136 | static int do_add_counters(struct net *net, void __user *user, unsigned int len, | ||
| 1137 | int compat) | ||
| 1138 | { | 1119 | { |
| 1139 | unsigned int i, curcpu; | 1120 | unsigned int i, curcpu; |
| 1140 | struct xt_counters_info tmp; | 1121 | struct xt_counters_info tmp; |
| @@ -1147,6 +1128,7 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, | |||
| 1147 | const struct xt_table_info *private; | 1128 | const struct xt_table_info *private; |
| 1148 | int ret = 0; | 1129 | int ret = 0; |
| 1149 | void *loc_cpu_entry; | 1130 | void *loc_cpu_entry; |
| 1131 | struct arpt_entry *iter; | ||
| 1150 | #ifdef CONFIG_COMPAT | 1132 | #ifdef CONFIG_COMPAT |
| 1151 | struct compat_xt_counters_info compat_tmp; | 1133 | struct compat_xt_counters_info compat_tmp; |
| 1152 | 1134 | ||
| @@ -1177,7 +1159,7 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, | |||
| 1177 | if (len != size + num_counters * sizeof(struct xt_counters)) | 1159 | if (len != size + num_counters * sizeof(struct xt_counters)) |
| 1178 | return -EINVAL; | 1160 | return -EINVAL; |
| 1179 | 1161 | ||
| 1180 | paddc = vmalloc_node(len - size, numa_node_id()); | 1162 | paddc = vmalloc(len - size); |
| 1181 | if (!paddc) | 1163 | if (!paddc) |
| 1182 | return -ENOMEM; | 1164 | return -ENOMEM; |
| 1183 | 1165 | ||
| @@ -1204,11 +1186,10 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, | |||
| 1204 | curcpu = smp_processor_id(); | 1186 | curcpu = smp_processor_id(); |
| 1205 | loc_cpu_entry = private->entries[curcpu]; | 1187 | loc_cpu_entry = private->entries[curcpu]; |
| 1206 | xt_info_wrlock(curcpu); | 1188 | xt_info_wrlock(curcpu); |
| 1207 | ARPT_ENTRY_ITERATE(loc_cpu_entry, | 1189 | xt_entry_foreach(iter, loc_cpu_entry, private->size) { |
| 1208 | private->size, | 1190 | ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); |
| 1209 | add_counter_to_entry, | 1191 | ++i; |
| 1210 | paddc, | 1192 | } |
| 1211 | &i); | ||
| 1212 | xt_info_wrunlock(curcpu); | 1193 | xt_info_wrunlock(curcpu); |
| 1213 | unlock_up_free: | 1194 | unlock_up_free: |
| 1214 | local_bh_enable(); | 1195 | local_bh_enable(); |
| @@ -1221,28 +1202,22 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, | |||
| 1221 | } | 1202 | } |
| 1222 | 1203 | ||
| 1223 | #ifdef CONFIG_COMPAT | 1204 | #ifdef CONFIG_COMPAT |
| 1224 | static inline int | 1205 | static inline void compat_release_entry(struct compat_arpt_entry *e) |
| 1225 | compat_release_entry(struct compat_arpt_entry *e, unsigned int *i) | ||
| 1226 | { | 1206 | { |
| 1227 | struct arpt_entry_target *t; | 1207 | struct arpt_entry_target *t; |
| 1228 | 1208 | ||
| 1229 | if (i && (*i)-- == 0) | ||
| 1230 | return 1; | ||
| 1231 | |||
| 1232 | t = compat_arpt_get_target(e); | 1209 | t = compat_arpt_get_target(e); |
| 1233 | module_put(t->u.kernel.target->me); | 1210 | module_put(t->u.kernel.target->me); |
| 1234 | return 0; | ||
| 1235 | } | 1211 | } |
| 1236 | 1212 | ||
| 1237 | static inline int | 1213 | static inline int |
| 1238 | check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, | 1214 | check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, |
| 1239 | struct xt_table_info *newinfo, | 1215 | struct xt_table_info *newinfo, |
| 1240 | unsigned int *size, | 1216 | unsigned int *size, |
| 1241 | unsigned char *base, | 1217 | const unsigned char *base, |
| 1242 | unsigned char *limit, | 1218 | const unsigned char *limit, |
| 1243 | unsigned int *hook_entries, | 1219 | const unsigned int *hook_entries, |
| 1244 | unsigned int *underflows, | 1220 | const unsigned int *underflows, |
| 1245 | unsigned int *i, | ||
| 1246 | const char *name) | 1221 | const char *name) |
| 1247 | { | 1222 | { |
| 1248 | struct arpt_entry_target *t; | 1223 | struct arpt_entry_target *t; |
| @@ -1273,14 +1248,12 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, | |||
| 1273 | entry_offset = (void *)e - (void *)base; | 1248 | entry_offset = (void *)e - (void *)base; |
| 1274 | 1249 | ||
| 1275 | t = compat_arpt_get_target(e); | 1250 | t = compat_arpt_get_target(e); |
| 1276 | target = try_then_request_module(xt_find_target(NFPROTO_ARP, | 1251 | target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, |
| 1277 | t->u.user.name, | 1252 | t->u.user.revision); |
| 1278 | t->u.user.revision), | 1253 | if (IS_ERR(target)) { |
| 1279 | "arpt_%s", t->u.user.name); | ||
| 1280 | if (IS_ERR(target) || !target) { | ||
| 1281 | duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", | 1254 | duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", |
| 1282 | t->u.user.name); | 1255 | t->u.user.name); |
| 1283 | ret = target ? PTR_ERR(target) : -ENOENT; | 1256 | ret = PTR_ERR(target); |
| 1284 | goto out; | 1257 | goto out; |
| 1285 | } | 1258 | } |
| 1286 | t->u.kernel.target = target; | 1259 | t->u.kernel.target = target; |
| @@ -1302,8 +1275,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, | |||
| 1302 | /* Clear counters and comefrom */ | 1275 | /* Clear counters and comefrom */ |
| 1303 | memset(&e->counters, 0, sizeof(e->counters)); | 1276 | memset(&e->counters, 0, sizeof(e->counters)); |
| 1304 | e->comefrom = 0; | 1277 | e->comefrom = 0; |
| 1305 | |||
| 1306 | (*i)++; | ||
| 1307 | return 0; | 1278 | return 0; |
| 1308 | 1279 | ||
| 1309 | release_target: | 1280 | release_target: |
| @@ -1347,19 +1318,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, | |||
| 1347 | return ret; | 1318 | return ret; |
| 1348 | } | 1319 | } |
| 1349 | 1320 | ||
| 1350 | static inline int compat_check_entry(struct arpt_entry *e, const char *name, | ||
| 1351 | unsigned int *i) | ||
| 1352 | { | ||
| 1353 | int ret; | ||
| 1354 | |||
| 1355 | ret = check_target(e, name); | ||
| 1356 | if (ret) | ||
| 1357 | return ret; | ||
| 1358 | |||
| 1359 | (*i)++; | ||
| 1360 | return 0; | ||
| 1361 | } | ||
| 1362 | |||
| 1363 | static int translate_compat_table(const char *name, | 1321 | static int translate_compat_table(const char *name, |
| 1364 | unsigned int valid_hooks, | 1322 | unsigned int valid_hooks, |
| 1365 | struct xt_table_info **pinfo, | 1323 | struct xt_table_info **pinfo, |
| @@ -1372,8 +1330,10 @@ static int translate_compat_table(const char *name, | |||
| 1372 | unsigned int i, j; | 1330 | unsigned int i, j; |
| 1373 | struct xt_table_info *newinfo, *info; | 1331 | struct xt_table_info *newinfo, *info; |
| 1374 | void *pos, *entry0, *entry1; | 1332 | void *pos, *entry0, *entry1; |
| 1333 | struct compat_arpt_entry *iter0; | ||
| 1334 | struct arpt_entry *iter1; | ||
| 1375 | unsigned int size; | 1335 | unsigned int size; |
| 1376 | int ret; | 1336 | int ret = 0; |
| 1377 | 1337 | ||
| 1378 | info = *pinfo; | 1338 | info = *pinfo; |
| 1379 | entry0 = *pentry0; | 1339 | entry0 = *pentry0; |
| @@ -1390,13 +1350,17 @@ static int translate_compat_table(const char *name, | |||
| 1390 | j = 0; | 1350 | j = 0; |
| 1391 | xt_compat_lock(NFPROTO_ARP); | 1351 | xt_compat_lock(NFPROTO_ARP); |
| 1392 | /* Walk through entries, checking offsets. */ | 1352 | /* Walk through entries, checking offsets. */ |
| 1393 | ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, | 1353 | xt_entry_foreach(iter0, entry0, total_size) { |
| 1394 | check_compat_entry_size_and_hooks, | 1354 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, |
| 1395 | info, &size, entry0, | 1355 | entry0, |
| 1396 | entry0 + total_size, | 1356 | entry0 + total_size, |
| 1397 | hook_entries, underflows, &j, name); | 1357 | hook_entries, |
| 1398 | if (ret != 0) | 1358 | underflows, |
| 1399 | goto out_unlock; | 1359 | name); |
| 1360 | if (ret != 0) | ||
| 1361 | goto out_unlock; | ||
| 1362 | ++j; | ||
| 1363 | } | ||
| 1400 | 1364 | ||
| 1401 | ret = -EINVAL; | 1365 | ret = -EINVAL; |
| 1402 | if (j != number) { | 1366 | if (j != number) { |
| @@ -1435,9 +1399,12 @@ static int translate_compat_table(const char *name, | |||
| 1435 | entry1 = newinfo->entries[raw_smp_processor_id()]; | 1399 | entry1 = newinfo->entries[raw_smp_processor_id()]; |
| 1436 | pos = entry1; | 1400 | pos = entry1; |
| 1437 | size = total_size; | 1401 | size = total_size; |
| 1438 | ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, | 1402 | xt_entry_foreach(iter0, entry0, total_size) { |
| 1439 | compat_copy_entry_from_user, | 1403 | ret = compat_copy_entry_from_user(iter0, &pos, &size, |
| 1440 | &pos, &size, name, newinfo, entry1); | 1404 | name, newinfo, entry1); |
| 1405 | if (ret != 0) | ||
| 1406 | break; | ||
| 1407 | } | ||
| 1441 | xt_compat_flush_offsets(NFPROTO_ARP); | 1408 | xt_compat_flush_offsets(NFPROTO_ARP); |
| 1442 | xt_compat_unlock(NFPROTO_ARP); | 1409 | xt_compat_unlock(NFPROTO_ARP); |
| 1443 | if (ret) | 1410 | if (ret) |
| @@ -1448,13 +1415,35 @@ static int translate_compat_table(const char *name, | |||
| 1448 | goto free_newinfo; | 1415 | goto free_newinfo; |
| 1449 | 1416 | ||
| 1450 | i = 0; | 1417 | i = 0; |
| 1451 | ret = ARPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, | 1418 | xt_entry_foreach(iter1, entry1, newinfo->size) { |
| 1452 | name, &i); | 1419 | ret = check_target(iter1, name); |
| 1420 | if (ret != 0) | ||
| 1421 | break; | ||
| 1422 | ++i; | ||
| 1423 | if (strcmp(arpt_get_target(iter1)->u.user.name, | ||
| 1424 | XT_ERROR_TARGET) == 0) | ||
| 1425 | ++newinfo->stacksize; | ||
| 1426 | } | ||
| 1453 | if (ret) { | 1427 | if (ret) { |
| 1428 | /* | ||
| 1429 | * The first i matches need cleanup_entry (calls ->destroy) | ||
| 1430 | * because they had called ->check already. The other j-i | ||
| 1431 | * entries need only release. | ||
| 1432 | */ | ||
| 1433 | int skip = i; | ||
| 1454 | j -= i; | 1434 | j -= i; |
| 1455 | COMPAT_ARPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i, | 1435 | xt_entry_foreach(iter0, entry0, newinfo->size) { |
| 1456 | compat_release_entry, &j); | 1436 | if (skip-- > 0) |
| 1457 | ARPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i); | 1437 | continue; |
| 1438 | if (j-- == 0) | ||
| 1439 | break; | ||
| 1440 | compat_release_entry(iter0); | ||
| 1441 | } | ||
| 1442 | xt_entry_foreach(iter1, entry1, newinfo->size) { | ||
| 1443 | if (i-- == 0) | ||
| 1444 | break; | ||
| 1445 | cleanup_entry(iter1); | ||
| 1446 | } | ||
| 1458 | xt_free_table_info(newinfo); | 1447 | xt_free_table_info(newinfo); |
| 1459 | return ret; | 1448 | return ret; |
| 1460 | } | 1449 | } |
| @@ -1472,7 +1461,11 @@ static int translate_compat_table(const char *name, | |||
| 1472 | free_newinfo: | 1461 | free_newinfo: |
| 1473 | xt_free_table_info(newinfo); | 1462 | xt_free_table_info(newinfo); |
| 1474 | out: | 1463 | out: |
| 1475 | COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j); | 1464 | xt_entry_foreach(iter0, entry0, total_size) { |
| 1465 | if (j-- == 0) | ||
| 1466 | break; | ||
| 1467 | compat_release_entry(iter0); | ||
| 1468 | } | ||
| 1476 | return ret; | 1469 | return ret; |
| 1477 | out_unlock: | 1470 | out_unlock: |
| 1478 | xt_compat_flush_offsets(NFPROTO_ARP); | 1471 | xt_compat_flush_offsets(NFPROTO_ARP); |
| @@ -1499,6 +1492,7 @@ static int compat_do_replace(struct net *net, void __user *user, | |||
| 1499 | struct compat_arpt_replace tmp; | 1492 | struct compat_arpt_replace tmp; |
| 1500 | struct xt_table_info *newinfo; | 1493 | struct xt_table_info *newinfo; |
| 1501 | void *loc_cpu_entry; | 1494 | void *loc_cpu_entry; |
| 1495 | struct arpt_entry *iter; | ||
| 1502 | 1496 | ||
| 1503 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) | 1497 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) |
| 1504 | return -EFAULT; | 1498 | return -EFAULT; |
| @@ -1536,7 +1530,8 @@ static int compat_do_replace(struct net *net, void __user *user, | |||
| 1536 | return 0; | 1530 | return 0; |
| 1537 | 1531 | ||
| 1538 | free_newinfo_untrans: | 1532 | free_newinfo_untrans: |
| 1539 | ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); | 1533 | xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) |
| 1534 | cleanup_entry(iter); | ||
| 1540 | free_newinfo: | 1535 | free_newinfo: |
| 1541 | xt_free_table_info(newinfo); | 1536 | xt_free_table_info(newinfo); |
| 1542 | return ret; | 1537 | return ret; |
| @@ -1570,7 +1565,7 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, | |||
| 1570 | static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, | 1565 | static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, |
| 1571 | compat_uint_t *size, | 1566 | compat_uint_t *size, |
| 1572 | struct xt_counters *counters, | 1567 | struct xt_counters *counters, |
| 1573 | unsigned int *i) | 1568 | unsigned int i) |
| 1574 | { | 1569 | { |
| 1575 | struct arpt_entry_target *t; | 1570 | struct arpt_entry_target *t; |
| 1576 | struct compat_arpt_entry __user *ce; | 1571 | struct compat_arpt_entry __user *ce; |
| @@ -1578,14 +1573,12 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, | |||
| 1578 | compat_uint_t origsize; | 1573 | compat_uint_t origsize; |
| 1579 | int ret; | 1574 | int ret; |
| 1580 | 1575 | ||
| 1581 | ret = -EFAULT; | ||
| 1582 | origsize = *size; | 1576 | origsize = *size; |
| 1583 | ce = (struct compat_arpt_entry __user *)*dstptr; | 1577 | ce = (struct compat_arpt_entry __user *)*dstptr; |
| 1584 | if (copy_to_user(ce, e, sizeof(struct arpt_entry))) | 1578 | if (copy_to_user(ce, e, sizeof(struct arpt_entry)) != 0 || |
| 1585 | goto out; | 1579 | copy_to_user(&ce->counters, &counters[i], |
| 1586 | 1580 | sizeof(counters[i])) != 0) | |
| 1587 | if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i]))) | 1581 | return -EFAULT; |
| 1588 | goto out; | ||
| 1589 | 1582 | ||
| 1590 | *dstptr += sizeof(struct compat_arpt_entry); | 1583 | *dstptr += sizeof(struct compat_arpt_entry); |
| 1591 | *size -= sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); | 1584 | *size -= sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); |
| @@ -1595,18 +1588,12 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, | |||
| 1595 | t = arpt_get_target(e); | 1588 | t = arpt_get_target(e); |
| 1596 | ret = xt_compat_target_to_user(t, dstptr, size); | 1589 | ret = xt_compat_target_to_user(t, dstptr, size); |
| 1597 | if (ret) | 1590 | if (ret) |
| 1598 | goto out; | 1591 | return ret; |
| 1599 | ret = -EFAULT; | ||
| 1600 | next_offset = e->next_offset - (origsize - *size); | 1592 | next_offset = e->next_offset - (origsize - *size); |
| 1601 | if (put_user(target_offset, &ce->target_offset)) | 1593 | if (put_user(target_offset, &ce->target_offset) != 0 || |
| 1602 | goto out; | 1594 | put_user(next_offset, &ce->next_offset) != 0) |
| 1603 | if (put_user(next_offset, &ce->next_offset)) | 1595 | return -EFAULT; |
| 1604 | goto out; | ||
| 1605 | |||
| 1606 | (*i)++; | ||
| 1607 | return 0; | 1596 | return 0; |
| 1608 | out: | ||
| 1609 | return ret; | ||
| 1610 | } | 1597 | } |
| 1611 | 1598 | ||
| 1612 | static int compat_copy_entries_to_user(unsigned int total_size, | 1599 | static int compat_copy_entries_to_user(unsigned int total_size, |
| @@ -1620,6 +1607,7 @@ static int compat_copy_entries_to_user(unsigned int total_size, | |||
| 1620 | int ret = 0; | 1607 | int ret = 0; |
| 1621 | void *loc_cpu_entry; | 1608 | void *loc_cpu_entry; |
| 1622 | unsigned int i = 0; | 1609 | unsigned int i = 0; |
| 1610 | struct arpt_entry *iter; | ||
| 1623 | 1611 | ||
| 1624 | counters = alloc_counters(table); | 1612 | counters = alloc_counters(table); |
| 1625 | if (IS_ERR(counters)) | 1613 | if (IS_ERR(counters)) |
| @@ -1629,9 +1617,12 @@ static int compat_copy_entries_to_user(unsigned int total_size, | |||
| 1629 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; | 1617 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; |
| 1630 | pos = userptr; | 1618 | pos = userptr; |
| 1631 | size = total_size; | 1619 | size = total_size; |
| 1632 | ret = ARPT_ENTRY_ITERATE(loc_cpu_entry, total_size, | 1620 | xt_entry_foreach(iter, loc_cpu_entry, total_size) { |
| 1633 | compat_copy_entry_to_user, | 1621 | ret = compat_copy_entry_to_user(iter, &pos, |
| 1634 | &pos, &size, counters, &i); | 1622 | &size, counters, i++); |
| 1623 | if (ret != 0) | ||
| 1624 | break; | ||
| 1625 | } | ||
| 1635 | vfree(counters); | 1626 | vfree(counters); |
| 1636 | return ret; | 1627 | return ret; |
| 1637 | } | 1628 | } |
| @@ -1784,8 +1775,7 @@ struct xt_table *arpt_register_table(struct net *net, | |||
| 1784 | { | 1775 | { |
| 1785 | int ret; | 1776 | int ret; |
| 1786 | struct xt_table_info *newinfo; | 1777 | struct xt_table_info *newinfo; |
| 1787 | struct xt_table_info bootstrap | 1778 | struct xt_table_info bootstrap = {0}; |
| 1788 | = { 0, 0, 0, { 0 }, { 0 }, { } }; | ||
| 1789 | void *loc_cpu_entry; | 1779 | void *loc_cpu_entry; |
| 1790 | struct xt_table *new_table; | 1780 | struct xt_table *new_table; |
| 1791 | 1781 | ||
| @@ -1799,12 +1789,7 @@ struct xt_table *arpt_register_table(struct net *net, | |||
| 1799 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; | 1789 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; |
| 1800 | memcpy(loc_cpu_entry, repl->entries, repl->size); | 1790 | memcpy(loc_cpu_entry, repl->entries, repl->size); |
| 1801 | 1791 | ||
| 1802 | ret = translate_table(table->name, table->valid_hooks, | 1792 | ret = translate_table(newinfo, loc_cpu_entry, repl); |
| 1803 | newinfo, loc_cpu_entry, repl->size, | ||
| 1804 | repl->num_entries, | ||
| 1805 | repl->hook_entry, | ||
| 1806 | repl->underflow); | ||
| 1807 | |||
| 1808 | duprintf("arpt_register_table: translate table gives %d\n", ret); | 1793 | duprintf("arpt_register_table: translate table gives %d\n", ret); |
| 1809 | if (ret != 0) | 1794 | if (ret != 0) |
| 1810 | goto out_free; | 1795 | goto out_free; |
| @@ -1827,35 +1812,37 @@ void arpt_unregister_table(struct xt_table *table) | |||
| 1827 | struct xt_table_info *private; | 1812 | struct xt_table_info *private; |
| 1828 | void *loc_cpu_entry; | 1813 | void *loc_cpu_entry; |
| 1829 | struct module *table_owner = table->me; | 1814 | struct module *table_owner = table->me; |
| 1815 | struct arpt_entry *iter; | ||
| 1830 | 1816 | ||
| 1831 | private = xt_unregister_table(table); | 1817 | private = xt_unregister_table(table); |
| 1832 | 1818 | ||
| 1833 | /* Decrease module usage counts and free resources */ | 1819 | /* Decrease module usage counts and free resources */ |
| 1834 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; | 1820 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; |
| 1835 | ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size, | 1821 | xt_entry_foreach(iter, loc_cpu_entry, private->size) |
| 1836 | cleanup_entry, NULL); | 1822 | cleanup_entry(iter); |
| 1837 | if (private->number > private->initial_entries) | 1823 | if (private->number > private->initial_entries) |
| 1838 | module_put(table_owner); | 1824 | module_put(table_owner); |
| 1839 | xt_free_table_info(private); | 1825 | xt_free_table_info(private); |
| 1840 | } | 1826 | } |
| 1841 | 1827 | ||
| 1842 | /* The built-in targets: standard (NULL) and error. */ | 1828 | /* The built-in targets: standard (NULL) and error. */ |
| 1843 | static struct xt_target arpt_standard_target __read_mostly = { | 1829 | static struct xt_target arpt_builtin_tg[] __read_mostly = { |
| 1844 | .name = ARPT_STANDARD_TARGET, | 1830 | { |
| 1845 | .targetsize = sizeof(int), | 1831 | .name = ARPT_STANDARD_TARGET, |
| 1846 | .family = NFPROTO_ARP, | 1832 | .targetsize = sizeof(int), |
| 1833 | .family = NFPROTO_ARP, | ||
| 1847 | #ifdef CONFIG_COMPAT | 1834 | #ifdef CONFIG_COMPAT |
| 1848 | .compatsize = sizeof(compat_int_t), | 1835 | .compatsize = sizeof(compat_int_t), |
| 1849 | .compat_from_user = compat_standard_from_user, | 1836 | .compat_from_user = compat_standard_from_user, |
| 1850 | .compat_to_user = compat_standard_to_user, | 1837 | .compat_to_user = compat_standard_to_user, |
| 1851 | #endif | 1838 | #endif |
| 1852 | }; | 1839 | }, |
| 1853 | 1840 | { | |
| 1854 | static struct xt_target arpt_error_target __read_mostly = { | 1841 | .name = ARPT_ERROR_TARGET, |
| 1855 | .name = ARPT_ERROR_TARGET, | 1842 | .target = arpt_error, |
| 1856 | .target = arpt_error, | 1843 | .targetsize = ARPT_FUNCTION_MAXNAMELEN, |
| 1857 | .targetsize = ARPT_FUNCTION_MAXNAMELEN, | 1844 | .family = NFPROTO_ARP, |
| 1858 | .family = NFPROTO_ARP, | 1845 | }, |
| 1859 | }; | 1846 | }; |
| 1860 | 1847 | ||
| 1861 | static struct nf_sockopt_ops arpt_sockopts = { | 1848 | static struct nf_sockopt_ops arpt_sockopts = { |
| @@ -1899,12 +1886,9 @@ static int __init arp_tables_init(void) | |||
| 1899 | goto err1; | 1886 | goto err1; |
| 1900 | 1887 | ||
| 1901 | /* Noone else will be downing sem now, so we won't sleep */ | 1888 | /* Noone else will be downing sem now, so we won't sleep */ |
| 1902 | ret = xt_register_target(&arpt_standard_target); | 1889 | ret = xt_register_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg)); |
| 1903 | if (ret < 0) | 1890 | if (ret < 0) |
| 1904 | goto err2; | 1891 | goto err2; |
| 1905 | ret = xt_register_target(&arpt_error_target); | ||
| 1906 | if (ret < 0) | ||
| 1907 | goto err3; | ||
| 1908 | 1892 | ||
| 1909 | /* Register setsockopt */ | 1893 | /* Register setsockopt */ |
| 1910 | ret = nf_register_sockopt(&arpt_sockopts); | 1894 | ret = nf_register_sockopt(&arpt_sockopts); |
| @@ -1915,9 +1899,7 @@ static int __init arp_tables_init(void) | |||
| 1915 | return 0; | 1899 | return 0; |
| 1916 | 1900 | ||
| 1917 | err4: | 1901 | err4: |
| 1918 | xt_unregister_target(&arpt_error_target); | 1902 | xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg)); |
| 1919 | err3: | ||
| 1920 | xt_unregister_target(&arpt_standard_target); | ||
| 1921 | err2: | 1903 | err2: |
| 1922 | unregister_pernet_subsys(&arp_tables_net_ops); | 1904 | unregister_pernet_subsys(&arp_tables_net_ops); |
| 1923 | err1: | 1905 | err1: |
| @@ -1927,8 +1909,7 @@ err1: | |||
| 1927 | static void __exit arp_tables_fini(void) | 1909 | static void __exit arp_tables_fini(void) |
| 1928 | { | 1910 | { |
| 1929 | nf_unregister_sockopt(&arpt_sockopts); | 1911 | nf_unregister_sockopt(&arpt_sockopts); |
| 1930 | xt_unregister_target(&arpt_error_target); | 1912 | xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg)); |
| 1931 | xt_unregister_target(&arpt_standard_target); | ||
| 1932 | unregister_pernet_subsys(&arp_tables_net_ops); | 1913 | unregister_pernet_subsys(&arp_tables_net_ops); |
| 1933 | } | 1914 | } |
| 1934 | 1915 | ||
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index b0d5b1d0a769..e1be7dd1171b 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c | |||
| @@ -9,7 +9,7 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); | |||
| 9 | MODULE_DESCRIPTION("arptables arp payload mangle target"); | 9 | MODULE_DESCRIPTION("arptables arp payload mangle target"); |
| 10 | 10 | ||
| 11 | static unsigned int | 11 | static unsigned int |
| 12 | target(struct sk_buff *skb, const struct xt_target_param *par) | 12 | target(struct sk_buff *skb, const struct xt_action_param *par) |
| 13 | { | 13 | { |
| 14 | const struct arpt_mangle *mangle = par->targinfo; | 14 | const struct arpt_mangle *mangle = par->targinfo; |
| 15 | const struct arphdr *arp; | 15 | const struct arphdr *arp; |
| @@ -54,7 +54,7 @@ target(struct sk_buff *skb, const struct xt_target_param *par) | |||
| 54 | return mangle->target; | 54 | return mangle->target; |
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | static bool checkentry(const struct xt_tgchk_param *par) | 57 | static int checkentry(const struct xt_tgchk_param *par) |
| 58 | { | 58 | { |
| 59 | const struct arpt_mangle *mangle = par->targinfo; | 59 | const struct arpt_mangle *mangle = par->targinfo; |
| 60 | 60 | ||
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 97337601827a..79ca5e70d497 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c | |||
| @@ -6,7 +6,9 @@ | |||
| 6 | */ | 6 | */ |
| 7 | 7 | ||
| 8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
| 9 | #include <linux/netfilter/x_tables.h> | ||
| 9 | #include <linux/netfilter_arp/arp_tables.h> | 10 | #include <linux/netfilter_arp/arp_tables.h> |
| 11 | #include <linux/slab.h> | ||
| 10 | 12 | ||
| 11 | MODULE_LICENSE("GPL"); | 13 | MODULE_LICENSE("GPL"); |
| 12 | MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); | 14 | MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); |
| @@ -15,93 +17,37 @@ MODULE_DESCRIPTION("arptables filter table"); | |||
| 15 | #define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ | 17 | #define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ |
| 16 | (1 << NF_ARP_FORWARD)) | 18 | (1 << NF_ARP_FORWARD)) |
| 17 | 19 | ||
| 18 | static const struct | ||
| 19 | { | ||
| 20 | struct arpt_replace repl; | ||
| 21 | struct arpt_standard entries[3]; | ||
| 22 | struct arpt_error term; | ||
| 23 | } initial_table __net_initdata = { | ||
| 24 | .repl = { | ||
| 25 | .name = "filter", | ||
| 26 | .valid_hooks = FILTER_VALID_HOOKS, | ||
| 27 | .num_entries = 4, | ||
| 28 | .size = sizeof(struct arpt_standard) * 3 + sizeof(struct arpt_error), | ||
| 29 | .hook_entry = { | ||
| 30 | [NF_ARP_IN] = 0, | ||
| 31 | [NF_ARP_OUT] = sizeof(struct arpt_standard), | ||
| 32 | [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), | ||
| 33 | }, | ||
| 34 | .underflow = { | ||
| 35 | [NF_ARP_IN] = 0, | ||
| 36 | [NF_ARP_OUT] = sizeof(struct arpt_standard), | ||
| 37 | [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), | ||
| 38 | }, | ||
| 39 | }, | ||
| 40 | .entries = { | ||
| 41 | ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_IN */ | ||
| 42 | ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_OUT */ | ||
| 43 | ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_FORWARD */ | ||
| 44 | }, | ||
| 45 | .term = ARPT_ERROR_INIT, | ||
| 46 | }; | ||
| 47 | |||
| 48 | static const struct xt_table packet_filter = { | 20 | static const struct xt_table packet_filter = { |
| 49 | .name = "filter", | 21 | .name = "filter", |
| 50 | .valid_hooks = FILTER_VALID_HOOKS, | 22 | .valid_hooks = FILTER_VALID_HOOKS, |
| 51 | .me = THIS_MODULE, | 23 | .me = THIS_MODULE, |
| 52 | .af = NFPROTO_ARP, | 24 | .af = NFPROTO_ARP, |
| 25 | .priority = NF_IP_PRI_FILTER, | ||
| 53 | }; | 26 | }; |
| 54 | 27 | ||
| 55 | /* The work comes in here from netfilter.c */ | 28 | /* The work comes in here from netfilter.c */ |
| 56 | static unsigned int arpt_in_hook(unsigned int hook, | 29 | static unsigned int |
| 57 | struct sk_buff *skb, | 30 | arptable_filter_hook(unsigned int hook, struct sk_buff *skb, |
| 58 | const struct net_device *in, | 31 | const struct net_device *in, const struct net_device *out, |
| 59 | const struct net_device *out, | 32 | int (*okfn)(struct sk_buff *)) |
| 60 | int (*okfn)(struct sk_buff *)) | ||
| 61 | { | 33 | { |
| 62 | return arpt_do_table(skb, hook, in, out, | 34 | const struct net *net = dev_net((in != NULL) ? in : out); |
| 63 | dev_net(in)->ipv4.arptable_filter); | ||
| 64 | } | ||
| 65 | 35 | ||
| 66 | static unsigned int arpt_out_hook(unsigned int hook, | 36 | return arpt_do_table(skb, hook, in, out, net->ipv4.arptable_filter); |
| 67 | struct sk_buff *skb, | ||
| 68 | const struct net_device *in, | ||
| 69 | const struct net_device *out, | ||
| 70 | int (*okfn)(struct sk_buff *)) | ||
| 71 | { | ||
| 72 | return arpt_do_table(skb, hook, in, out, | ||
| 73 | dev_net(out)->ipv4.arptable_filter); | ||
| 74 | } | 37 | } |
| 75 | 38 | ||
| 76 | static struct nf_hook_ops arpt_ops[] __read_mostly = { | 39 | static struct nf_hook_ops *arpfilter_ops __read_mostly; |
| 77 | { | ||
| 78 | .hook = arpt_in_hook, | ||
| 79 | .owner = THIS_MODULE, | ||
| 80 | .pf = NFPROTO_ARP, | ||
| 81 | .hooknum = NF_ARP_IN, | ||
| 82 | .priority = NF_IP_PRI_FILTER, | ||
| 83 | }, | ||
| 84 | { | ||
| 85 | .hook = arpt_out_hook, | ||
| 86 | .owner = THIS_MODULE, | ||
| 87 | .pf = NFPROTO_ARP, | ||
| 88 | .hooknum = NF_ARP_OUT, | ||
| 89 | .priority = NF_IP_PRI_FILTER, | ||
| 90 | }, | ||
| 91 | { | ||
| 92 | .hook = arpt_in_hook, | ||
| 93 | .owner = THIS_MODULE, | ||
| 94 | .pf = NFPROTO_ARP, | ||
| 95 | .hooknum = NF_ARP_FORWARD, | ||
| 96 | .priority = NF_IP_PRI_FILTER, | ||
| 97 | }, | ||
| 98 | }; | ||
| 99 | 40 | ||
| 100 | static int __net_init arptable_filter_net_init(struct net *net) | 41 | static int __net_init arptable_filter_net_init(struct net *net) |
| 101 | { | 42 | { |
| 102 | /* Register table */ | 43 | struct arpt_replace *repl; |
| 44 | |||
| 45 | repl = arpt_alloc_initial_table(&packet_filter); | ||
| 46 | if (repl == NULL) | ||
| 47 | return -ENOMEM; | ||
| 103 | net->ipv4.arptable_filter = | 48 | net->ipv4.arptable_filter = |
| 104 | arpt_register_table(net, &packet_filter, &initial_table.repl); | 49 | arpt_register_table(net, &packet_filter, repl); |
| 50 | kfree(repl); | ||
| 105 | if (IS_ERR(net->ipv4.arptable_filter)) | 51 | if (IS_ERR(net->ipv4.arptable_filter)) |
| 106 | return PTR_ERR(net->ipv4.arptable_filter); | 52 | return PTR_ERR(net->ipv4.arptable_filter); |
| 107 | return 0; | 53 | return 0; |
| @@ -125,9 +71,11 @@ static int __init arptable_filter_init(void) | |||
| 125 | if (ret < 0) | 71 | if (ret < 0) |
| 126 | return ret; | 72 | return ret; |
| 127 | 73 | ||
| 128 | ret = nf_register_hooks(arpt_ops, ARRAY_SIZE(arpt_ops)); | 74 | arpfilter_ops = xt_hook_link(&packet_filter, arptable_filter_hook); |
| 129 | if (ret < 0) | 75 | if (IS_ERR(arpfilter_ops)) { |
| 76 | ret = PTR_ERR(arpfilter_ops); | ||
| 130 | goto cleanup_table; | 77 | goto cleanup_table; |
| 78 | } | ||
| 131 | return ret; | 79 | return ret; |
| 132 | 80 | ||
| 133 | cleanup_table: | 81 | cleanup_table: |
| @@ -137,7 +85,7 @@ cleanup_table: | |||
| 137 | 85 | ||
| 138 | static void __exit arptable_filter_fini(void) | 86 | static void __exit arptable_filter_fini(void) |
| 139 | { | 87 | { |
| 140 | nf_unregister_hooks(arpt_ops, ARRAY_SIZE(arpt_ops)); | 88 | xt_hook_unlink(&packet_filter, arpfilter_ops); |
| 141 | unregister_pernet_subsys(&arptable_filter_net_ops); | 89 | unregister_pernet_subsys(&arptable_filter_net_ops); |
| 142 | } | 90 | } |
| 143 | 91 | ||
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 2855f1f38cbc..d2c1311cb28d 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #include <linux/security.h> | 26 | #include <linux/security.h> |
| 27 | #include <linux/net.h> | 27 | #include <linux/net.h> |
| 28 | #include <linux/mutex.h> | 28 | #include <linux/mutex.h> |
| 29 | #include <linux/slab.h> | ||
| 29 | #include <net/net_namespace.h> | 30 | #include <net/net_namespace.h> |
| 30 | #include <net/sock.h> | 31 | #include <net/sock.h> |
| 31 | #include <net/route.h> | 32 | #include <net/route.h> |
| @@ -41,7 +42,7 @@ typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long); | |||
| 41 | 42 | ||
| 42 | static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; | 43 | static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; |
| 43 | static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; | 44 | static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; |
| 44 | static DEFINE_RWLOCK(queue_lock); | 45 | static DEFINE_SPINLOCK(queue_lock); |
| 45 | static int peer_pid __read_mostly; | 46 | static int peer_pid __read_mostly; |
| 46 | static unsigned int copy_range __read_mostly; | 47 | static unsigned int copy_range __read_mostly; |
| 47 | static unsigned int queue_total; | 48 | static unsigned int queue_total; |
| @@ -71,10 +72,10 @@ __ipq_set_mode(unsigned char mode, unsigned int range) | |||
| 71 | break; | 72 | break; |
| 72 | 73 | ||
| 73 | case IPQ_COPY_PACKET: | 74 | case IPQ_COPY_PACKET: |
| 74 | copy_mode = mode; | 75 | if (range > 0xFFFF) |
| 76 | range = 0xFFFF; | ||
| 75 | copy_range = range; | 77 | copy_range = range; |
| 76 | if (copy_range > 0xFFFF) | 78 | copy_mode = mode; |
| 77 | copy_range = 0xFFFF; | ||
| 78 | break; | 79 | break; |
| 79 | 80 | ||
| 80 | default: | 81 | default: |
| @@ -100,7 +101,7 @@ ipq_find_dequeue_entry(unsigned long id) | |||
| 100 | { | 101 | { |
| 101 | struct nf_queue_entry *entry = NULL, *i; | 102 | struct nf_queue_entry *entry = NULL, *i; |
| 102 | 103 | ||
| 103 | write_lock_bh(&queue_lock); | 104 | spin_lock_bh(&queue_lock); |
| 104 | 105 | ||
| 105 | list_for_each_entry(i, &queue_list, list) { | 106 | list_for_each_entry(i, &queue_list, list) { |
| 106 | if ((unsigned long)i == id) { | 107 | if ((unsigned long)i == id) { |
| @@ -114,7 +115,7 @@ ipq_find_dequeue_entry(unsigned long id) | |||
| 114 | queue_total--; | 115 | queue_total--; |
| 115 | } | 116 | } |
| 116 | 117 | ||
| 117 | write_unlock_bh(&queue_lock); | 118 | spin_unlock_bh(&queue_lock); |
| 118 | return entry; | 119 | return entry; |
| 119 | } | 120 | } |
| 120 | 121 | ||
| @@ -135,9 +136,9 @@ __ipq_flush(ipq_cmpfn cmpfn, unsigned long data) | |||
| 135 | static void | 136 | static void |
| 136 | ipq_flush(ipq_cmpfn cmpfn, unsigned long data) | 137 | ipq_flush(ipq_cmpfn cmpfn, unsigned long data) |
| 137 | { | 138 | { |
| 138 | write_lock_bh(&queue_lock); | 139 | spin_lock_bh(&queue_lock); |
| 139 | __ipq_flush(cmpfn, data); | 140 | __ipq_flush(cmpfn, data); |
| 140 | write_unlock_bh(&queue_lock); | 141 | spin_unlock_bh(&queue_lock); |
| 141 | } | 142 | } |
| 142 | 143 | ||
| 143 | static struct sk_buff * | 144 | static struct sk_buff * |
| @@ -151,37 +152,29 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) | |||
| 151 | struct nlmsghdr *nlh; | 152 | struct nlmsghdr *nlh; |
| 152 | struct timeval tv; | 153 | struct timeval tv; |
| 153 | 154 | ||
| 154 | read_lock_bh(&queue_lock); | 155 | switch (ACCESS_ONCE(copy_mode)) { |
| 155 | |||
| 156 | switch (copy_mode) { | ||
| 157 | case IPQ_COPY_META: | 156 | case IPQ_COPY_META: |
| 158 | case IPQ_COPY_NONE: | 157 | case IPQ_COPY_NONE: |
| 159 | size = NLMSG_SPACE(sizeof(*pmsg)); | 158 | size = NLMSG_SPACE(sizeof(*pmsg)); |
| 160 | break; | 159 | break; |
| 161 | 160 | ||
| 162 | case IPQ_COPY_PACKET: | 161 | case IPQ_COPY_PACKET: |
| 163 | if ((entry->skb->ip_summed == CHECKSUM_PARTIAL || | 162 | if (entry->skb->ip_summed == CHECKSUM_PARTIAL && |
| 164 | entry->skb->ip_summed == CHECKSUM_COMPLETE) && | 163 | (*errp = skb_checksum_help(entry->skb))) |
| 165 | (*errp = skb_checksum_help(entry->skb))) { | ||
| 166 | read_unlock_bh(&queue_lock); | ||
| 167 | return NULL; | 164 | return NULL; |
| 168 | } | 165 | |
| 169 | if (copy_range == 0 || copy_range > entry->skb->len) | 166 | data_len = ACCESS_ONCE(copy_range); |
| 167 | if (data_len == 0 || data_len > entry->skb->len) | ||
| 170 | data_len = entry->skb->len; | 168 | data_len = entry->skb->len; |
| 171 | else | ||
| 172 | data_len = copy_range; | ||
| 173 | 169 | ||
| 174 | size = NLMSG_SPACE(sizeof(*pmsg) + data_len); | 170 | size = NLMSG_SPACE(sizeof(*pmsg) + data_len); |
| 175 | break; | 171 | break; |
| 176 | 172 | ||
| 177 | default: | 173 | default: |
| 178 | *errp = -EINVAL; | 174 | *errp = -EINVAL; |
| 179 | read_unlock_bh(&queue_lock); | ||
| 180 | return NULL; | 175 | return NULL; |
| 181 | } | 176 | } |
| 182 | 177 | ||
| 183 | read_unlock_bh(&queue_lock); | ||
| 184 | |||
| 185 | skb = alloc_skb(size, GFP_ATOMIC); | 178 | skb = alloc_skb(size, GFP_ATOMIC); |
| 186 | if (!skb) | 179 | if (!skb) |
| 187 | goto nlmsg_failure; | 180 | goto nlmsg_failure; |
| @@ -242,7 +235,7 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) | |||
| 242 | if (nskb == NULL) | 235 | if (nskb == NULL) |
| 243 | return status; | 236 | return status; |
| 244 | 237 | ||
| 245 | write_lock_bh(&queue_lock); | 238 | spin_lock_bh(&queue_lock); |
| 246 | 239 | ||
| 247 | if (!peer_pid) | 240 | if (!peer_pid) |
| 248 | goto err_out_free_nskb; | 241 | goto err_out_free_nskb; |
| @@ -266,14 +259,14 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) | |||
| 266 | 259 | ||
| 267 | __ipq_enqueue_entry(entry); | 260 | __ipq_enqueue_entry(entry); |
| 268 | 261 | ||
| 269 | write_unlock_bh(&queue_lock); | 262 | spin_unlock_bh(&queue_lock); |
| 270 | return status; | 263 | return status; |
| 271 | 264 | ||
| 272 | err_out_free_nskb: | 265 | err_out_free_nskb: |
| 273 | kfree_skb(nskb); | 266 | kfree_skb(nskb); |
| 274 | 267 | ||
| 275 | err_out_unlock: | 268 | err_out_unlock: |
| 276 | write_unlock_bh(&queue_lock); | 269 | spin_unlock_bh(&queue_lock); |
| 277 | return status; | 270 | return status; |
| 278 | } | 271 | } |
| 279 | 272 | ||
| @@ -342,9 +335,9 @@ ipq_set_mode(unsigned char mode, unsigned int range) | |||
| 342 | { | 335 | { |
| 343 | int status; | 336 | int status; |
| 344 | 337 | ||
| 345 | write_lock_bh(&queue_lock); | 338 | spin_lock_bh(&queue_lock); |
| 346 | status = __ipq_set_mode(mode, range); | 339 | status = __ipq_set_mode(mode, range); |
| 347 | write_unlock_bh(&queue_lock); | 340 | spin_unlock_bh(&queue_lock); |
| 348 | return status; | 341 | return status; |
| 349 | } | 342 | } |
| 350 | 343 | ||
| @@ -440,11 +433,11 @@ __ipq_rcv_skb(struct sk_buff *skb) | |||
| 440 | if (security_netlink_recv(skb, CAP_NET_ADMIN)) | 433 | if (security_netlink_recv(skb, CAP_NET_ADMIN)) |
| 441 | RCV_SKB_FAIL(-EPERM); | 434 | RCV_SKB_FAIL(-EPERM); |
| 442 | 435 | ||
| 443 | write_lock_bh(&queue_lock); | 436 | spin_lock_bh(&queue_lock); |
| 444 | 437 | ||
| 445 | if (peer_pid) { | 438 | if (peer_pid) { |
| 446 | if (peer_pid != pid) { | 439 | if (peer_pid != pid) { |
| 447 | write_unlock_bh(&queue_lock); | 440 | spin_unlock_bh(&queue_lock); |
| 448 | RCV_SKB_FAIL(-EBUSY); | 441 | RCV_SKB_FAIL(-EBUSY); |
| 449 | } | 442 | } |
| 450 | } else { | 443 | } else { |
| @@ -452,7 +445,7 @@ __ipq_rcv_skb(struct sk_buff *skb) | |||
| 452 | peer_pid = pid; | 445 | peer_pid = pid; |
| 453 | } | 446 | } |
| 454 | 447 | ||
| 455 | write_unlock_bh(&queue_lock); | 448 | spin_unlock_bh(&queue_lock); |
| 456 | 449 | ||
| 457 | status = ipq_receive_peer(NLMSG_DATA(nlh), type, | 450 | status = ipq_receive_peer(NLMSG_DATA(nlh), type, |
| 458 | nlmsglen - NLMSG_LENGTH(0)); | 451 | nlmsglen - NLMSG_LENGTH(0)); |
| @@ -461,7 +454,6 @@ __ipq_rcv_skb(struct sk_buff *skb) | |||
| 461 | 454 | ||
| 462 | if (flags & NLM_F_ACK) | 455 | if (flags & NLM_F_ACK) |
| 463 | netlink_ack(skb, nlh, 0); | 456 | netlink_ack(skb, nlh, 0); |
| 464 | return; | ||
| 465 | } | 457 | } |
| 466 | 458 | ||
| 467 | static void | 459 | static void |
| @@ -498,10 +490,10 @@ ipq_rcv_nl_event(struct notifier_block *this, | |||
| 498 | struct netlink_notify *n = ptr; | 490 | struct netlink_notify *n = ptr; |
| 499 | 491 | ||
| 500 | if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) { | 492 | if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) { |
| 501 | write_lock_bh(&queue_lock); | 493 | spin_lock_bh(&queue_lock); |
| 502 | if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) | 494 | if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) |
| 503 | __ipq_reset(); | 495 | __ipq_reset(); |
| 504 | write_unlock_bh(&queue_lock); | 496 | spin_unlock_bh(&queue_lock); |
| 505 | } | 497 | } |
| 506 | return NOTIFY_DONE; | 498 | return NOTIFY_DONE; |
| 507 | } | 499 | } |
| @@ -528,7 +520,7 @@ static ctl_table ipq_table[] = { | |||
| 528 | #ifdef CONFIG_PROC_FS | 520 | #ifdef CONFIG_PROC_FS |
| 529 | static int ip_queue_show(struct seq_file *m, void *v) | 521 | static int ip_queue_show(struct seq_file *m, void *v) |
| 530 | { | 522 | { |
| 531 | read_lock_bh(&queue_lock); | 523 | spin_lock_bh(&queue_lock); |
| 532 | 524 | ||
| 533 | seq_printf(m, | 525 | seq_printf(m, |
| 534 | "Peer PID : %d\n" | 526 | "Peer PID : %d\n" |
| @@ -546,7 +538,7 @@ static int ip_queue_show(struct seq_file *m, void *v) | |||
| 546 | queue_dropped, | 538 | queue_dropped, |
| 547 | queue_user_dropped); | 539 | queue_user_dropped); |
| 548 | 540 | ||
| 549 | read_unlock_bh(&queue_lock); | 541 | spin_unlock_bh(&queue_lock); |
| 550 | return 0; | 542 | return 0; |
| 551 | } | 543 | } |
| 552 | 544 | ||
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 572330a552ef..d163f2e3b2e9 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | #include <linux/netfilter/x_tables.h> | 28 | #include <linux/netfilter/x_tables.h> |
| 29 | #include <linux/netfilter_ipv4/ip_tables.h> | 29 | #include <linux/netfilter_ipv4/ip_tables.h> |
| 30 | #include <net/netfilter/nf_log.h> | 30 | #include <net/netfilter/nf_log.h> |
| 31 | #include "../../netfilter/xt_repldata.h" | ||
| 31 | 32 | ||
| 32 | MODULE_LICENSE("GPL"); | 33 | MODULE_LICENSE("GPL"); |
| 33 | MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); | 34 | MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); |
| @@ -38,24 +39,19 @@ MODULE_DESCRIPTION("IPv4 packet filter"); | |||
| 38 | /*#define DEBUG_IP_FIREWALL_USER*/ | 39 | /*#define DEBUG_IP_FIREWALL_USER*/ |
| 39 | 40 | ||
| 40 | #ifdef DEBUG_IP_FIREWALL | 41 | #ifdef DEBUG_IP_FIREWALL |
| 41 | #define dprintf(format, args...) printk(format , ## args) | 42 | #define dprintf(format, args...) pr_info(format , ## args) |
| 42 | #else | 43 | #else |
| 43 | #define dprintf(format, args...) | 44 | #define dprintf(format, args...) |
| 44 | #endif | 45 | #endif |
| 45 | 46 | ||
| 46 | #ifdef DEBUG_IP_FIREWALL_USER | 47 | #ifdef DEBUG_IP_FIREWALL_USER |
| 47 | #define duprintf(format, args...) printk(format , ## args) | 48 | #define duprintf(format, args...) pr_info(format , ## args) |
| 48 | #else | 49 | #else |
| 49 | #define duprintf(format, args...) | 50 | #define duprintf(format, args...) |
| 50 | #endif | 51 | #endif |
| 51 | 52 | ||
| 52 | #ifdef CONFIG_NETFILTER_DEBUG | 53 | #ifdef CONFIG_NETFILTER_DEBUG |
| 53 | #define IP_NF_ASSERT(x) \ | 54 | #define IP_NF_ASSERT(x) WARN_ON(!(x)) |
| 54 | do { \ | ||
| 55 | if (!(x)) \ | ||
| 56 | printk("IP_NF_ASSERT: %s:%s:%u\n", \ | ||
| 57 | __func__, __FILE__, __LINE__); \ | ||
| 58 | } while(0) | ||
| 59 | #else | 55 | #else |
| 60 | #define IP_NF_ASSERT(x) | 56 | #define IP_NF_ASSERT(x) |
| 61 | #endif | 57 | #endif |
| @@ -66,6 +62,12 @@ do { \ | |||
| 66 | #define inline | 62 | #define inline |
| 67 | #endif | 63 | #endif |
| 68 | 64 | ||
| 65 | void *ipt_alloc_initial_table(const struct xt_table *info) | ||
| 66 | { | ||
| 67 | return xt_alloc_initial_table(ipt, IPT); | ||
| 68 | } | ||
| 69 | EXPORT_SYMBOL_GPL(ipt_alloc_initial_table); | ||
| 70 | |||
| 69 | /* | 71 | /* |
| 70 | We keep a set of rules for each CPU, so we can avoid write-locking | 72 | We keep a set of rules for each CPU, so we can avoid write-locking |
| 71 | them in the softirq when updating the counters and therefore | 73 | them in the softirq when updating the counters and therefore |
| @@ -158,33 +160,17 @@ ip_checkentry(const struct ipt_ip *ip) | |||
| 158 | } | 160 | } |
| 159 | 161 | ||
| 160 | static unsigned int | 162 | static unsigned int |
| 161 | ipt_error(struct sk_buff *skb, const struct xt_target_param *par) | 163 | ipt_error(struct sk_buff *skb, const struct xt_action_param *par) |
| 162 | { | 164 | { |
| 163 | if (net_ratelimit()) | 165 | if (net_ratelimit()) |
| 164 | printk("ip_tables: error: `%s'\n", | 166 | pr_info("error: `%s'\n", (const char *)par->targinfo); |
| 165 | (const char *)par->targinfo); | ||
| 166 | 167 | ||
| 167 | return NF_DROP; | 168 | return NF_DROP; |
| 168 | } | 169 | } |
| 169 | 170 | ||
| 170 | /* Performance critical - called for every packet */ | ||
| 171 | static inline bool | ||
| 172 | do_match(struct ipt_entry_match *m, const struct sk_buff *skb, | ||
| 173 | struct xt_match_param *par) | ||
| 174 | { | ||
| 175 | par->match = m->u.kernel.match; | ||
| 176 | par->matchinfo = m->data; | ||
| 177 | |||
| 178 | /* Stop iteration if it doesn't match */ | ||
| 179 | if (!m->u.kernel.match->match(skb, par)) | ||
| 180 | return true; | ||
| 181 | else | ||
| 182 | return false; | ||
| 183 | } | ||
| 184 | |||
| 185 | /* Performance critical */ | 171 | /* Performance critical */ |
| 186 | static inline struct ipt_entry * | 172 | static inline struct ipt_entry * |
| 187 | get_entry(void *base, unsigned int offset) | 173 | get_entry(const void *base, unsigned int offset) |
| 188 | { | 174 | { |
| 189 | return (struct ipt_entry *)(base + offset); | 175 | return (struct ipt_entry *)(base + offset); |
| 190 | } | 176 | } |
| @@ -199,6 +185,13 @@ static inline bool unconditional(const struct ipt_ip *ip) | |||
| 199 | #undef FWINV | 185 | #undef FWINV |
| 200 | } | 186 | } |
| 201 | 187 | ||
| 188 | /* for const-correctness */ | ||
| 189 | static inline const struct ipt_entry_target * | ||
| 190 | ipt_get_target_c(const struct ipt_entry *e) | ||
| 191 | { | ||
| 192 | return ipt_get_target((struct ipt_entry *)e); | ||
| 193 | } | ||
| 194 | |||
| 202 | #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ | 195 | #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ |
| 203 | defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) | 196 | defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) |
| 204 | static const char *const hooknames[] = { | 197 | static const char *const hooknames[] = { |
| @@ -233,11 +226,11 @@ static struct nf_loginfo trace_loginfo = { | |||
| 233 | 226 | ||
| 234 | /* Mildly perf critical (only if packet tracing is on) */ | 227 | /* Mildly perf critical (only if packet tracing is on) */ |
| 235 | static inline int | 228 | static inline int |
| 236 | get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e, | 229 | get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e, |
| 237 | const char *hookname, const char **chainname, | 230 | const char *hookname, const char **chainname, |
| 238 | const char **comment, unsigned int *rulenum) | 231 | const char **comment, unsigned int *rulenum) |
| 239 | { | 232 | { |
| 240 | struct ipt_standard_target *t = (void *)ipt_get_target(s); | 233 | const struct ipt_standard_target *t = (void *)ipt_get_target_c(s); |
| 241 | 234 | ||
| 242 | if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) { | 235 | if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) { |
| 243 | /* Head of user chain: ERROR target with chainname */ | 236 | /* Head of user chain: ERROR target with chainname */ |
| @@ -263,17 +256,18 @@ get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e, | |||
| 263 | return 0; | 256 | return 0; |
| 264 | } | 257 | } |
| 265 | 258 | ||
| 266 | static void trace_packet(struct sk_buff *skb, | 259 | static void trace_packet(const struct sk_buff *skb, |
| 267 | unsigned int hook, | 260 | unsigned int hook, |
| 268 | const struct net_device *in, | 261 | const struct net_device *in, |
| 269 | const struct net_device *out, | 262 | const struct net_device *out, |
| 270 | const char *tablename, | 263 | const char *tablename, |
| 271 | struct xt_table_info *private, | 264 | const struct xt_table_info *private, |
| 272 | struct ipt_entry *e) | 265 | const struct ipt_entry *e) |
| 273 | { | 266 | { |
| 274 | void *table_base; | 267 | const void *table_base; |
| 275 | const struct ipt_entry *root; | 268 | const struct ipt_entry *root; |
| 276 | const char *hookname, *chainname, *comment; | 269 | const char *hookname, *chainname, *comment; |
| 270 | const struct ipt_entry *iter; | ||
| 277 | unsigned int rulenum = 0; | 271 | unsigned int rulenum = 0; |
| 278 | 272 | ||
| 279 | table_base = private->entries[smp_processor_id()]; | 273 | table_base = private->entries[smp_processor_id()]; |
| @@ -282,10 +276,10 @@ static void trace_packet(struct sk_buff *skb, | |||
| 282 | hookname = chainname = hooknames[hook]; | 276 | hookname = chainname = hooknames[hook]; |
| 283 | comment = comments[NF_IP_TRACE_COMMENT_RULE]; | 277 | comment = comments[NF_IP_TRACE_COMMENT_RULE]; |
| 284 | 278 | ||
| 285 | IPT_ENTRY_ITERATE(root, | 279 | xt_entry_foreach(iter, root, private->size - private->hook_entry[hook]) |
| 286 | private->size - private->hook_entry[hook], | 280 | if (get_chainname_rulenum(iter, e, hookname, |
| 287 | get_chainname_rulenum, | 281 | &chainname, &comment, &rulenum) != 0) |
| 288 | e, hookname, &chainname, &comment, &rulenum); | 282 | break; |
| 289 | 283 | ||
| 290 | nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo, | 284 | nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo, |
| 291 | "TRACE: %s:%s:%s:%u ", | 285 | "TRACE: %s:%s:%s:%u ", |
| @@ -307,19 +301,16 @@ ipt_do_table(struct sk_buff *skb, | |||
| 307 | const struct net_device *out, | 301 | const struct net_device *out, |
| 308 | struct xt_table *table) | 302 | struct xt_table *table) |
| 309 | { | 303 | { |
| 310 | #define tb_comefrom ((struct ipt_entry *)table_base)->comefrom | ||
| 311 | |||
| 312 | static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); | 304 | static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); |
| 313 | const struct iphdr *ip; | 305 | const struct iphdr *ip; |
| 314 | bool hotdrop = false; | ||
| 315 | /* Initializing verdict to NF_DROP keeps gcc happy. */ | 306 | /* Initializing verdict to NF_DROP keeps gcc happy. */ |
| 316 | unsigned int verdict = NF_DROP; | 307 | unsigned int verdict = NF_DROP; |
| 317 | const char *indev, *outdev; | 308 | const char *indev, *outdev; |
| 318 | void *table_base; | 309 | const void *table_base; |
| 319 | struct ipt_entry *e, *back; | 310 | struct ipt_entry *e, **jumpstack; |
| 320 | struct xt_table_info *private; | 311 | unsigned int *stackptr, origptr, cpu; |
| 321 | struct xt_match_param mtpar; | 312 | const struct xt_table_info *private; |
| 322 | struct xt_target_param tgpar; | 313 | struct xt_action_param acpar; |
| 323 | 314 | ||
| 324 | /* Initialization */ | 315 | /* Initialization */ |
| 325 | ip = ip_hdr(skb); | 316 | ip = ip_hdr(skb); |
| @@ -331,37 +322,49 @@ ipt_do_table(struct sk_buff *skb, | |||
| 331 | * things we don't know, ie. tcp syn flag or ports). If the | 322 | * things we don't know, ie. tcp syn flag or ports). If the |
| 332 | * rule is also a fragment-specific rule, non-fragments won't | 323 | * rule is also a fragment-specific rule, non-fragments won't |
| 333 | * match it. */ | 324 | * match it. */ |
| 334 | mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; | 325 | acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; |
| 335 | mtpar.thoff = ip_hdrlen(skb); | 326 | acpar.thoff = ip_hdrlen(skb); |
| 336 | mtpar.hotdrop = &hotdrop; | 327 | acpar.hotdrop = false; |
| 337 | mtpar.in = tgpar.in = in; | 328 | acpar.in = in; |
| 338 | mtpar.out = tgpar.out = out; | 329 | acpar.out = out; |
| 339 | mtpar.family = tgpar.family = NFPROTO_IPV4; | 330 | acpar.family = NFPROTO_IPV4; |
| 340 | mtpar.hooknum = tgpar.hooknum = hook; | 331 | acpar.hooknum = hook; |
| 341 | 332 | ||
| 342 | IP_NF_ASSERT(table->valid_hooks & (1 << hook)); | 333 | IP_NF_ASSERT(table->valid_hooks & (1 << hook)); |
| 343 | xt_info_rdlock_bh(); | 334 | xt_info_rdlock_bh(); |
| 344 | private = table->private; | 335 | private = table->private; |
| 345 | table_base = private->entries[smp_processor_id()]; | 336 | cpu = smp_processor_id(); |
| 337 | table_base = private->entries[cpu]; | ||
| 338 | jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; | ||
| 339 | stackptr = per_cpu_ptr(private->stackptr, cpu); | ||
| 340 | origptr = *stackptr; | ||
| 346 | 341 | ||
| 347 | e = get_entry(table_base, private->hook_entry[hook]); | 342 | e = get_entry(table_base, private->hook_entry[hook]); |
| 348 | 343 | ||
| 349 | /* For return from builtin chain */ | 344 | pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n", |
| 350 | back = get_entry(table_base, private->underflow[hook]); | 345 | table->name, hook, origptr, |
| 346 | get_entry(table_base, private->underflow[hook])); | ||
| 351 | 347 | ||
| 352 | do { | 348 | do { |
| 353 | struct ipt_entry_target *t; | 349 | const struct ipt_entry_target *t; |
| 350 | const struct xt_entry_match *ematch; | ||
| 354 | 351 | ||
| 355 | IP_NF_ASSERT(e); | 352 | IP_NF_ASSERT(e); |
| 356 | IP_NF_ASSERT(back); | ||
| 357 | if (!ip_packet_match(ip, indev, outdev, | 353 | if (!ip_packet_match(ip, indev, outdev, |
| 358 | &e->ip, mtpar.fragoff) || | 354 | &e->ip, acpar.fragoff)) { |
| 359 | IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) { | 355 | no_match: |
| 360 | e = ipt_next_entry(e); | 356 | e = ipt_next_entry(e); |
| 361 | continue; | 357 | continue; |
| 362 | } | 358 | } |
| 363 | 359 | ||
| 364 | ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); | 360 | xt_ematch_foreach(ematch, e) { |
| 361 | acpar.match = ematch->u.kernel.match; | ||
| 362 | acpar.matchinfo = ematch->data; | ||
| 363 | if (!acpar.match->match(skb, &acpar)) | ||
| 364 | goto no_match; | ||
| 365 | } | ||
| 366 | |||
| 367 | ADD_COUNTER(e->counters, skb->len, 1); | ||
| 365 | 368 | ||
| 366 | t = ipt_get_target(e); | 369 | t = ipt_get_target(e); |
| 367 | IP_NF_ASSERT(t->u.kernel.target); | 370 | IP_NF_ASSERT(t->u.kernel.target); |
| @@ -384,41 +387,38 @@ ipt_do_table(struct sk_buff *skb, | |||
| 384 | verdict = (unsigned)(-v) - 1; | 387 | verdict = (unsigned)(-v) - 1; |
| 385 | break; | 388 | break; |
| 386 | } | 389 | } |
| 387 | e = back; | 390 | if (*stackptr == 0) { |
| 388 | back = get_entry(table_base, back->comefrom); | 391 | e = get_entry(table_base, |
| 392 | private->underflow[hook]); | ||
| 393 | pr_debug("Underflow (this is normal) " | ||
| 394 | "to %p\n", e); | ||
| 395 | } else { | ||
| 396 | e = jumpstack[--*stackptr]; | ||
| 397 | pr_debug("Pulled %p out from pos %u\n", | ||
| 398 | e, *stackptr); | ||
| 399 | e = ipt_next_entry(e); | ||
| 400 | } | ||
| 389 | continue; | 401 | continue; |
| 390 | } | 402 | } |
| 391 | if (table_base + v != ipt_next_entry(e) && | 403 | if (table_base + v != ipt_next_entry(e) && |
| 392 | !(e->ip.flags & IPT_F_GOTO)) { | 404 | !(e->ip.flags & IPT_F_GOTO)) { |
| 393 | /* Save old back ptr in next entry */ | 405 | if (*stackptr >= private->stacksize) { |
| 394 | struct ipt_entry *next = ipt_next_entry(e); | 406 | verdict = NF_DROP; |
| 395 | next->comefrom = (void *)back - table_base; | 407 | break; |
| 396 | /* set back pointer to next entry */ | 408 | } |
| 397 | back = next; | 409 | jumpstack[(*stackptr)++] = e; |
| 410 | pr_debug("Pushed %p into pos %u\n", | ||
| 411 | e, *stackptr - 1); | ||
| 398 | } | 412 | } |
| 399 | 413 | ||
| 400 | e = get_entry(table_base, v); | 414 | e = get_entry(table_base, v); |
| 401 | continue; | 415 | continue; |
| 402 | } | 416 | } |
| 403 | 417 | ||
| 404 | /* Targets which reenter must return | 418 | acpar.target = t->u.kernel.target; |
| 405 | abs. verdicts */ | 419 | acpar.targinfo = t->data; |
| 406 | tgpar.target = t->u.kernel.target; | ||
| 407 | tgpar.targinfo = t->data; | ||
| 408 | |||
| 409 | 420 | ||
| 410 | #ifdef CONFIG_NETFILTER_DEBUG | 421 | verdict = t->u.kernel.target->target(skb, &acpar); |
| 411 | tb_comefrom = 0xeeeeeeec; | ||
| 412 | #endif | ||
| 413 | verdict = t->u.kernel.target->target(skb, &tgpar); | ||
| 414 | #ifdef CONFIG_NETFILTER_DEBUG | ||
| 415 | if (tb_comefrom != 0xeeeeeeec && verdict == IPT_CONTINUE) { | ||
| 416 | printk("Target %s reentered!\n", | ||
| 417 | t->u.kernel.target->name); | ||
| 418 | verdict = NF_DROP; | ||
| 419 | } | ||
| 420 | tb_comefrom = 0x57acc001; | ||
| 421 | #endif | ||
| 422 | /* Target might have changed stuff. */ | 422 | /* Target might have changed stuff. */ |
| 423 | ip = ip_hdr(skb); | 423 | ip = ip_hdr(skb); |
| 424 | if (verdict == IPT_CONTINUE) | 424 | if (verdict == IPT_CONTINUE) |
| @@ -426,24 +426,24 @@ ipt_do_table(struct sk_buff *skb, | |||
| 426 | else | 426 | else |
| 427 | /* Verdict */ | 427 | /* Verdict */ |
| 428 | break; | 428 | break; |
| 429 | } while (!hotdrop); | 429 | } while (!acpar.hotdrop); |
| 430 | xt_info_rdunlock_bh(); | 430 | xt_info_rdunlock_bh(); |
| 431 | 431 | pr_debug("Exiting %s; resetting sp from %u to %u\n", | |
| 432 | __func__, *stackptr, origptr); | ||
| 433 | *stackptr = origptr; | ||
| 432 | #ifdef DEBUG_ALLOW_ALL | 434 | #ifdef DEBUG_ALLOW_ALL |
| 433 | return NF_ACCEPT; | 435 | return NF_ACCEPT; |
| 434 | #else | 436 | #else |
| 435 | if (hotdrop) | 437 | if (acpar.hotdrop) |
| 436 | return NF_DROP; | 438 | return NF_DROP; |
| 437 | else return verdict; | 439 | else return verdict; |
| 438 | #endif | 440 | #endif |
| 439 | |||
| 440 | #undef tb_comefrom | ||
| 441 | } | 441 | } |
| 442 | 442 | ||
| 443 | /* Figures out from what hook each rule can be called: returns 0 if | 443 | /* Figures out from what hook each rule can be called: returns 0 if |
| 444 | there are loops. Puts hook bitmask in comefrom. */ | 444 | there are loops. Puts hook bitmask in comefrom. */ |
| 445 | static int | 445 | static int |
| 446 | mark_source_chains(struct xt_table_info *newinfo, | 446 | mark_source_chains(const struct xt_table_info *newinfo, |
| 447 | unsigned int valid_hooks, void *entry0) | 447 | unsigned int valid_hooks, void *entry0) |
| 448 | { | 448 | { |
| 449 | unsigned int hook; | 449 | unsigned int hook; |
| @@ -461,12 +461,12 @@ mark_source_chains(struct xt_table_info *newinfo, | |||
| 461 | e->counters.pcnt = pos; | 461 | e->counters.pcnt = pos; |
| 462 | 462 | ||
| 463 | for (;;) { | 463 | for (;;) { |
| 464 | struct ipt_standard_target *t | 464 | const struct ipt_standard_target *t |
| 465 | = (void *)ipt_get_target(e); | 465 | = (void *)ipt_get_target_c(e); |
| 466 | int visited = e->comefrom & (1 << hook); | 466 | int visited = e->comefrom & (1 << hook); |
| 467 | 467 | ||
| 468 | if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { | 468 | if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { |
| 469 | printk("iptables: loop hook %u pos %u %08X.\n", | 469 | pr_err("iptables: loop hook %u pos %u %08X.\n", |
| 470 | hook, pos, e->comefrom); | 470 | hook, pos, e->comefrom); |
| 471 | return 0; | 471 | return 0; |
| 472 | } | 472 | } |
| @@ -552,30 +552,26 @@ mark_source_chains(struct xt_table_info *newinfo, | |||
| 552 | return 1; | 552 | return 1; |
| 553 | } | 553 | } |
| 554 | 554 | ||
| 555 | static int | 555 | static void cleanup_match(struct ipt_entry_match *m, struct net *net) |
| 556 | cleanup_match(struct ipt_entry_match *m, unsigned int *i) | ||
| 557 | { | 556 | { |
| 558 | struct xt_mtdtor_param par; | 557 | struct xt_mtdtor_param par; |
| 559 | 558 | ||
| 560 | if (i && (*i)-- == 0) | 559 | par.net = net; |
| 561 | return 1; | ||
| 562 | |||
| 563 | par.match = m->u.kernel.match; | 560 | par.match = m->u.kernel.match; |
| 564 | par.matchinfo = m->data; | 561 | par.matchinfo = m->data; |
| 565 | par.family = NFPROTO_IPV4; | 562 | par.family = NFPROTO_IPV4; |
| 566 | if (par.match->destroy != NULL) | 563 | if (par.match->destroy != NULL) |
| 567 | par.match->destroy(&par); | 564 | par.match->destroy(&par); |
| 568 | module_put(par.match->me); | 565 | module_put(par.match->me); |
| 569 | return 0; | ||
| 570 | } | 566 | } |
| 571 | 567 | ||
| 572 | static int | 568 | static int |
| 573 | check_entry(struct ipt_entry *e, const char *name) | 569 | check_entry(const struct ipt_entry *e, const char *name) |
| 574 | { | 570 | { |
| 575 | struct ipt_entry_target *t; | 571 | const struct ipt_entry_target *t; |
| 576 | 572 | ||
| 577 | if (!ip_checkentry(&e->ip)) { | 573 | if (!ip_checkentry(&e->ip)) { |
| 578 | duprintf("ip_tables: ip check failed %p %s.\n", e, name); | 574 | duprintf("ip check failed %p %s.\n", e, par->match->name); |
| 579 | return -EINVAL; | 575 | return -EINVAL; |
| 580 | } | 576 | } |
| 581 | 577 | ||
| @@ -583,7 +579,7 @@ check_entry(struct ipt_entry *e, const char *name) | |||
| 583 | e->next_offset) | 579 | e->next_offset) |
| 584 | return -EINVAL; | 580 | return -EINVAL; |
| 585 | 581 | ||
| 586 | t = ipt_get_target(e); | 582 | t = ipt_get_target_c(e); |
| 587 | if (e->target_offset + t->u.target_size > e->next_offset) | 583 | if (e->target_offset + t->u.target_size > e->next_offset) |
| 588 | return -EINVAL; | 584 | return -EINVAL; |
| 589 | 585 | ||
| @@ -591,8 +587,7 @@ check_entry(struct ipt_entry *e, const char *name) | |||
| 591 | } | 587 | } |
| 592 | 588 | ||
| 593 | static int | 589 | static int |
| 594 | check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, | 590 | check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par) |
| 595 | unsigned int *i) | ||
| 596 | { | 591 | { |
| 597 | const struct ipt_ip *ip = par->entryinfo; | 592 | const struct ipt_ip *ip = par->entryinfo; |
| 598 | int ret; | 593 | int ret; |
| @@ -603,31 +598,27 @@ check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, | |||
| 603 | ret = xt_check_match(par, m->u.match_size - sizeof(*m), | 598 | ret = xt_check_match(par, m->u.match_size - sizeof(*m), |
| 604 | ip->proto, ip->invflags & IPT_INV_PROTO); | 599 | ip->proto, ip->invflags & IPT_INV_PROTO); |
| 605 | if (ret < 0) { | 600 | if (ret < 0) { |
| 606 | duprintf("ip_tables: check failed for `%s'.\n", | 601 | duprintf("check failed for `%s'.\n", par->match->name); |
| 607 | par.match->name); | ||
| 608 | return ret; | 602 | return ret; |
| 609 | } | 603 | } |
| 610 | ++*i; | ||
| 611 | return 0; | 604 | return 0; |
| 612 | } | 605 | } |
| 613 | 606 | ||
| 614 | static int | 607 | static int |
| 615 | find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, | 608 | find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par) |
| 616 | unsigned int *i) | ||
| 617 | { | 609 | { |
| 618 | struct xt_match *match; | 610 | struct xt_match *match; |
| 619 | int ret; | 611 | int ret; |
| 620 | 612 | ||
| 621 | match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name, | 613 | match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, |
| 622 | m->u.user.revision), | 614 | m->u.user.revision); |
| 623 | "ipt_%s", m->u.user.name); | 615 | if (IS_ERR(match)) { |
| 624 | if (IS_ERR(match) || !match) { | ||
| 625 | duprintf("find_check_match: `%s' not found\n", m->u.user.name); | 616 | duprintf("find_check_match: `%s' not found\n", m->u.user.name); |
| 626 | return match ? PTR_ERR(match) : -ENOENT; | 617 | return PTR_ERR(match); |
| 627 | } | 618 | } |
| 628 | m->u.kernel.match = match; | 619 | m->u.kernel.match = match; |
| 629 | 620 | ||
| 630 | ret = check_match(m, par, i); | 621 | ret = check_match(m, par); |
| 631 | if (ret) | 622 | if (ret) |
| 632 | goto err; | 623 | goto err; |
| 633 | 624 | ||
| @@ -637,10 +628,11 @@ err: | |||
| 637 | return ret; | 628 | return ret; |
| 638 | } | 629 | } |
| 639 | 630 | ||
| 640 | static int check_target(struct ipt_entry *e, const char *name) | 631 | static int check_target(struct ipt_entry *e, struct net *net, const char *name) |
| 641 | { | 632 | { |
| 642 | struct ipt_entry_target *t = ipt_get_target(e); | 633 | struct ipt_entry_target *t = ipt_get_target(e); |
| 643 | struct xt_tgchk_param par = { | 634 | struct xt_tgchk_param par = { |
| 635 | .net = net, | ||
| 644 | .table = name, | 636 | .table = name, |
| 645 | .entryinfo = e, | 637 | .entryinfo = e, |
| 646 | .target = t->u.kernel.target, | 638 | .target = t->u.kernel.target, |
| @@ -653,7 +645,7 @@ static int check_target(struct ipt_entry *e, const char *name) | |||
| 653 | ret = xt_check_target(&par, t->u.target_size - sizeof(*t), | 645 | ret = xt_check_target(&par, t->u.target_size - sizeof(*t), |
| 654 | e->ip.proto, e->ip.invflags & IPT_INV_PROTO); | 646 | e->ip.proto, e->ip.invflags & IPT_INV_PROTO); |
| 655 | if (ret < 0) { | 647 | if (ret < 0) { |
| 656 | duprintf("ip_tables: check failed for `%s'.\n", | 648 | duprintf("check failed for `%s'.\n", |
| 657 | t->u.kernel.target->name); | 649 | t->u.kernel.target->name); |
| 658 | return ret; | 650 | return ret; |
| 659 | } | 651 | } |
| @@ -661,61 +653,66 @@ static int check_target(struct ipt_entry *e, const char *name) | |||
| 661 | } | 653 | } |
| 662 | 654 | ||
| 663 | static int | 655 | static int |
| 664 | find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, | 656 | find_check_entry(struct ipt_entry *e, struct net *net, const char *name, |
| 665 | unsigned int *i) | 657 | unsigned int size) |
| 666 | { | 658 | { |
| 667 | struct ipt_entry_target *t; | 659 | struct ipt_entry_target *t; |
| 668 | struct xt_target *target; | 660 | struct xt_target *target; |
| 669 | int ret; | 661 | int ret; |
| 670 | unsigned int j; | 662 | unsigned int j; |
| 671 | struct xt_mtchk_param mtpar; | 663 | struct xt_mtchk_param mtpar; |
| 664 | struct xt_entry_match *ematch; | ||
| 672 | 665 | ||
| 673 | ret = check_entry(e, name); | 666 | ret = check_entry(e, name); |
| 674 | if (ret) | 667 | if (ret) |
| 675 | return ret; | 668 | return ret; |
| 676 | 669 | ||
| 677 | j = 0; | 670 | j = 0; |
| 671 | mtpar.net = net; | ||
| 678 | mtpar.table = name; | 672 | mtpar.table = name; |
| 679 | mtpar.entryinfo = &e->ip; | 673 | mtpar.entryinfo = &e->ip; |
| 680 | mtpar.hook_mask = e->comefrom; | 674 | mtpar.hook_mask = e->comefrom; |
| 681 | mtpar.family = NFPROTO_IPV4; | 675 | mtpar.family = NFPROTO_IPV4; |
| 682 | ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j); | 676 | xt_ematch_foreach(ematch, e) { |
| 683 | if (ret != 0) | 677 | ret = find_check_match(ematch, &mtpar); |
| 684 | goto cleanup_matches; | 678 | if (ret != 0) |
| 679 | goto cleanup_matches; | ||
| 680 | ++j; | ||
| 681 | } | ||
| 685 | 682 | ||
| 686 | t = ipt_get_target(e); | 683 | t = ipt_get_target(e); |
| 687 | target = try_then_request_module(xt_find_target(AF_INET, | 684 | target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, |
| 688 | t->u.user.name, | 685 | t->u.user.revision); |
| 689 | t->u.user.revision), | 686 | if (IS_ERR(target)) { |
| 690 | "ipt_%s", t->u.user.name); | ||
| 691 | if (IS_ERR(target) || !target) { | ||
| 692 | duprintf("find_check_entry: `%s' not found\n", t->u.user.name); | 687 | duprintf("find_check_entry: `%s' not found\n", t->u.user.name); |
| 693 | ret = target ? PTR_ERR(target) : -ENOENT; | 688 | ret = PTR_ERR(target); |
| 694 | goto cleanup_matches; | 689 | goto cleanup_matches; |
| 695 | } | 690 | } |
| 696 | t->u.kernel.target = target; | 691 | t->u.kernel.target = target; |
| 697 | 692 | ||
| 698 | ret = check_target(e, name); | 693 | ret = check_target(e, net, name); |
| 699 | if (ret) | 694 | if (ret) |
| 700 | goto err; | 695 | goto err; |
| 701 | |||
| 702 | (*i)++; | ||
| 703 | return 0; | 696 | return 0; |
| 704 | err: | 697 | err: |
| 705 | module_put(t->u.kernel.target->me); | 698 | module_put(t->u.kernel.target->me); |
| 706 | cleanup_matches: | 699 | cleanup_matches: |
| 707 | IPT_MATCH_ITERATE(e, cleanup_match, &j); | 700 | xt_ematch_foreach(ematch, e) { |
| 701 | if (j-- == 0) | ||
| 702 | break; | ||
| 703 | cleanup_match(ematch, net); | ||
| 704 | } | ||
| 708 | return ret; | 705 | return ret; |
| 709 | } | 706 | } |
| 710 | 707 | ||
| 711 | static bool check_underflow(struct ipt_entry *e) | 708 | static bool check_underflow(const struct ipt_entry *e) |
| 712 | { | 709 | { |
| 713 | const struct ipt_entry_target *t; | 710 | const struct ipt_entry_target *t; |
| 714 | unsigned int verdict; | 711 | unsigned int verdict; |
| 715 | 712 | ||
| 716 | if (!unconditional(&e->ip)) | 713 | if (!unconditional(&e->ip)) |
| 717 | return false; | 714 | return false; |
| 718 | t = ipt_get_target(e); | 715 | t = ipt_get_target_c(e); |
| 719 | if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) | 716 | if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) |
| 720 | return false; | 717 | return false; |
| 721 | verdict = ((struct ipt_standard_target *)t)->verdict; | 718 | verdict = ((struct ipt_standard_target *)t)->verdict; |
| @@ -726,12 +723,11 @@ static bool check_underflow(struct ipt_entry *e) | |||
| 726 | static int | 723 | static int |
| 727 | check_entry_size_and_hooks(struct ipt_entry *e, | 724 | check_entry_size_and_hooks(struct ipt_entry *e, |
| 728 | struct xt_table_info *newinfo, | 725 | struct xt_table_info *newinfo, |
| 729 | unsigned char *base, | 726 | const unsigned char *base, |
| 730 | unsigned char *limit, | 727 | const unsigned char *limit, |
| 731 | const unsigned int *hook_entries, | 728 | const unsigned int *hook_entries, |
| 732 | const unsigned int *underflows, | 729 | const unsigned int *underflows, |
| 733 | unsigned int valid_hooks, | 730 | unsigned int valid_hooks) |
| 734 | unsigned int *i) | ||
| 735 | { | 731 | { |
| 736 | unsigned int h; | 732 | unsigned int h; |
| 737 | 733 | ||
| @@ -768,50 +764,42 @@ check_entry_size_and_hooks(struct ipt_entry *e, | |||
| 768 | /* Clear counters and comefrom */ | 764 | /* Clear counters and comefrom */ |
| 769 | e->counters = ((struct xt_counters) { 0, 0 }); | 765 | e->counters = ((struct xt_counters) { 0, 0 }); |
| 770 | e->comefrom = 0; | 766 | e->comefrom = 0; |
| 771 | |||
| 772 | (*i)++; | ||
| 773 | return 0; | 767 | return 0; |
| 774 | } | 768 | } |
| 775 | 769 | ||
| 776 | static int | 770 | static void |
| 777 | cleanup_entry(struct ipt_entry *e, unsigned int *i) | 771 | cleanup_entry(struct ipt_entry *e, struct net *net) |
| 778 | { | 772 | { |
| 779 | struct xt_tgdtor_param par; | 773 | struct xt_tgdtor_param par; |
| 780 | struct ipt_entry_target *t; | 774 | struct ipt_entry_target *t; |
| 781 | 775 | struct xt_entry_match *ematch; | |
| 782 | if (i && (*i)-- == 0) | ||
| 783 | return 1; | ||
| 784 | 776 | ||
| 785 | /* Cleanup all matches */ | 777 | /* Cleanup all matches */ |
| 786 | IPT_MATCH_ITERATE(e, cleanup_match, NULL); | 778 | xt_ematch_foreach(ematch, e) |
| 779 | cleanup_match(ematch, net); | ||
| 787 | t = ipt_get_target(e); | 780 | t = ipt_get_target(e); |
| 788 | 781 | ||
| 782 | par.net = net; | ||
| 789 | par.target = t->u.kernel.target; | 783 | par.target = t->u.kernel.target; |
| 790 | par.targinfo = t->data; | 784 | par.targinfo = t->data; |
| 791 | par.family = NFPROTO_IPV4; | 785 | par.family = NFPROTO_IPV4; |
| 792 | if (par.target->destroy != NULL) | 786 | if (par.target->destroy != NULL) |
| 793 | par.target->destroy(&par); | 787 | par.target->destroy(&par); |
| 794 | module_put(par.target->me); | 788 | module_put(par.target->me); |
| 795 | return 0; | ||
| 796 | } | 789 | } |
| 797 | 790 | ||
| 798 | /* Checks and translates the user-supplied table segment (held in | 791 | /* Checks and translates the user-supplied table segment (held in |
| 799 | newinfo) */ | 792 | newinfo) */ |
| 800 | static int | 793 | static int |
| 801 | translate_table(const char *name, | 794 | translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, |
| 802 | unsigned int valid_hooks, | 795 | const struct ipt_replace *repl) |
| 803 | struct xt_table_info *newinfo, | ||
| 804 | void *entry0, | ||
| 805 | unsigned int size, | ||
| 806 | unsigned int number, | ||
| 807 | const unsigned int *hook_entries, | ||
| 808 | const unsigned int *underflows) | ||
| 809 | { | 796 | { |
| 797 | struct ipt_entry *iter; | ||
| 810 | unsigned int i; | 798 | unsigned int i; |
| 811 | int ret; | 799 | int ret = 0; |
| 812 | 800 | ||
| 813 | newinfo->size = size; | 801 | newinfo->size = repl->size; |
| 814 | newinfo->number = number; | 802 | newinfo->number = repl->num_entries; |
| 815 | 803 | ||
| 816 | /* Init all hooks to impossible value. */ | 804 | /* Init all hooks to impossible value. */ |
| 817 | for (i = 0; i < NF_INET_NUMHOOKS; i++) { | 805 | for (i = 0; i < NF_INET_NUMHOOKS; i++) { |
| @@ -822,49 +810,61 @@ translate_table(const char *name, | |||
| 822 | duprintf("translate_table: size %u\n", newinfo->size); | 810 | duprintf("translate_table: size %u\n", newinfo->size); |
| 823 | i = 0; | 811 | i = 0; |
| 824 | /* Walk through entries, checking offsets. */ | 812 | /* Walk through entries, checking offsets. */ |
| 825 | ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, | 813 | xt_entry_foreach(iter, entry0, newinfo->size) { |
| 826 | check_entry_size_and_hooks, | 814 | ret = check_entry_size_and_hooks(iter, newinfo, entry0, |
| 827 | newinfo, | 815 | entry0 + repl->size, |
| 828 | entry0, | 816 | repl->hook_entry, |
| 829 | entry0 + size, | 817 | repl->underflow, |
| 830 | hook_entries, underflows, valid_hooks, &i); | 818 | repl->valid_hooks); |
| 831 | if (ret != 0) | 819 | if (ret != 0) |
| 832 | return ret; | 820 | return ret; |
| 821 | ++i; | ||
| 822 | if (strcmp(ipt_get_target(iter)->u.user.name, | ||
| 823 | XT_ERROR_TARGET) == 0) | ||
| 824 | ++newinfo->stacksize; | ||
| 825 | } | ||
| 833 | 826 | ||
| 834 | if (i != number) { | 827 | if (i != repl->num_entries) { |
| 835 | duprintf("translate_table: %u not %u entries\n", | 828 | duprintf("translate_table: %u not %u entries\n", |
| 836 | i, number); | 829 | i, repl->num_entries); |
| 837 | return -EINVAL; | 830 | return -EINVAL; |
| 838 | } | 831 | } |
| 839 | 832 | ||
| 840 | /* Check hooks all assigned */ | 833 | /* Check hooks all assigned */ |
| 841 | for (i = 0; i < NF_INET_NUMHOOKS; i++) { | 834 | for (i = 0; i < NF_INET_NUMHOOKS; i++) { |
| 842 | /* Only hooks which are valid */ | 835 | /* Only hooks which are valid */ |
| 843 | if (!(valid_hooks & (1 << i))) | 836 | if (!(repl->valid_hooks & (1 << i))) |
| 844 | continue; | 837 | continue; |
| 845 | if (newinfo->hook_entry[i] == 0xFFFFFFFF) { | 838 | if (newinfo->hook_entry[i] == 0xFFFFFFFF) { |
| 846 | duprintf("Invalid hook entry %u %u\n", | 839 | duprintf("Invalid hook entry %u %u\n", |
| 847 | i, hook_entries[i]); | 840 | i, repl->hook_entry[i]); |
| 848 | return -EINVAL; | 841 | return -EINVAL; |
| 849 | } | 842 | } |
| 850 | if (newinfo->underflow[i] == 0xFFFFFFFF) { | 843 | if (newinfo->underflow[i] == 0xFFFFFFFF) { |
| 851 | duprintf("Invalid underflow %u %u\n", | 844 | duprintf("Invalid underflow %u %u\n", |
| 852 | i, underflows[i]); | 845 | i, repl->underflow[i]); |
| 853 | return -EINVAL; | 846 | return -EINVAL; |
| 854 | } | 847 | } |
| 855 | } | 848 | } |
| 856 | 849 | ||
| 857 | if (!mark_source_chains(newinfo, valid_hooks, entry0)) | 850 | if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) |
| 858 | return -ELOOP; | 851 | return -ELOOP; |
| 859 | 852 | ||
| 860 | /* Finally, each sanity check must pass */ | 853 | /* Finally, each sanity check must pass */ |
| 861 | i = 0; | 854 | i = 0; |
| 862 | ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, | 855 | xt_entry_foreach(iter, entry0, newinfo->size) { |
| 863 | find_check_entry, name, size, &i); | 856 | ret = find_check_entry(iter, net, repl->name, repl->size); |
| 857 | if (ret != 0) | ||
| 858 | break; | ||
| 859 | ++i; | ||
| 860 | } | ||
| 864 | 861 | ||
| 865 | if (ret != 0) { | 862 | if (ret != 0) { |
| 866 | IPT_ENTRY_ITERATE(entry0, newinfo->size, | 863 | xt_entry_foreach(iter, entry0, newinfo->size) { |
| 867 | cleanup_entry, &i); | 864 | if (i-- == 0) |
| 865 | break; | ||
| 866 | cleanup_entry(iter, net); | ||
| 867 | } | ||
| 868 | return ret; | 868 | return ret; |
| 869 | } | 869 | } |
| 870 | 870 | ||
| @@ -877,36 +877,14 @@ translate_table(const char *name, | |||
| 877 | return ret; | 877 | return ret; |
| 878 | } | 878 | } |
| 879 | 879 | ||
| 880 | /* Gets counters. */ | ||
| 881 | static inline int | ||
| 882 | add_entry_to_counter(const struct ipt_entry *e, | ||
| 883 | struct xt_counters total[], | ||
| 884 | unsigned int *i) | ||
| 885 | { | ||
| 886 | ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); | ||
| 887 | |||
| 888 | (*i)++; | ||
| 889 | return 0; | ||
| 890 | } | ||
| 891 | |||
| 892 | static inline int | ||
| 893 | set_entry_to_counter(const struct ipt_entry *e, | ||
| 894 | struct ipt_counters total[], | ||
| 895 | unsigned int *i) | ||
| 896 | { | ||
| 897 | SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); | ||
| 898 | |||
| 899 | (*i)++; | ||
| 900 | return 0; | ||
| 901 | } | ||
| 902 | |||
| 903 | static void | 880 | static void |
| 904 | get_counters(const struct xt_table_info *t, | 881 | get_counters(const struct xt_table_info *t, |
| 905 | struct xt_counters counters[]) | 882 | struct xt_counters counters[]) |
| 906 | { | 883 | { |
| 884 | struct ipt_entry *iter; | ||
| 907 | unsigned int cpu; | 885 | unsigned int cpu; |
| 908 | unsigned int i; | 886 | unsigned int i; |
| 909 | unsigned int curcpu; | 887 | unsigned int curcpu = get_cpu(); |
| 910 | 888 | ||
| 911 | /* Instead of clearing (by a previous call to memset()) | 889 | /* Instead of clearing (by a previous call to memset()) |
| 912 | * the counters and using adds, we set the counters | 890 | * the counters and using adds, we set the counters |
| @@ -916,41 +894,45 @@ get_counters(const struct xt_table_info *t, | |||
| 916 | * if new softirq were to run and call ipt_do_table | 894 | * if new softirq were to run and call ipt_do_table |
| 917 | */ | 895 | */ |
| 918 | local_bh_disable(); | 896 | local_bh_disable(); |
| 919 | curcpu = smp_processor_id(); | ||
| 920 | |||
| 921 | i = 0; | 897 | i = 0; |
| 922 | IPT_ENTRY_ITERATE(t->entries[curcpu], | 898 | xt_entry_foreach(iter, t->entries[curcpu], t->size) { |
| 923 | t->size, | 899 | SET_COUNTER(counters[i], iter->counters.bcnt, |
| 924 | set_entry_to_counter, | 900 | iter->counters.pcnt); |
| 925 | counters, | 901 | ++i; |
| 926 | &i); | 902 | } |
| 903 | local_bh_enable(); | ||
| 904 | /* Processing counters from other cpus, we can let bottom half enabled, | ||
| 905 | * (preemption is disabled) | ||
| 906 | */ | ||
| 927 | 907 | ||
| 928 | for_each_possible_cpu(cpu) { | 908 | for_each_possible_cpu(cpu) { |
| 929 | if (cpu == curcpu) | 909 | if (cpu == curcpu) |
| 930 | continue; | 910 | continue; |
| 931 | i = 0; | 911 | i = 0; |
| 912 | local_bh_disable(); | ||
| 932 | xt_info_wrlock(cpu); | 913 | xt_info_wrlock(cpu); |
| 933 | IPT_ENTRY_ITERATE(t->entries[cpu], | 914 | xt_entry_foreach(iter, t->entries[cpu], t->size) { |
| 934 | t->size, | 915 | ADD_COUNTER(counters[i], iter->counters.bcnt, |
| 935 | add_entry_to_counter, | 916 | iter->counters.pcnt); |
| 936 | counters, | 917 | ++i; /* macro does multi eval of i */ |
| 937 | &i); | 918 | } |
| 938 | xt_info_wrunlock(cpu); | 919 | xt_info_wrunlock(cpu); |
| 920 | local_bh_enable(); | ||
| 939 | } | 921 | } |
| 940 | local_bh_enable(); | 922 | put_cpu(); |
| 941 | } | 923 | } |
| 942 | 924 | ||
| 943 | static struct xt_counters * alloc_counters(struct xt_table *table) | 925 | static struct xt_counters *alloc_counters(const struct xt_table *table) |
| 944 | { | 926 | { |
| 945 | unsigned int countersize; | 927 | unsigned int countersize; |
| 946 | struct xt_counters *counters; | 928 | struct xt_counters *counters; |
| 947 | struct xt_table_info *private = table->private; | 929 | const struct xt_table_info *private = table->private; |
| 948 | 930 | ||
| 949 | /* We need atomic snapshot of counters: rest doesn't change | 931 | /* We need atomic snapshot of counters: rest doesn't change |
| 950 | (other than comefrom, which userspace doesn't care | 932 | (other than comefrom, which userspace doesn't care |
| 951 | about). */ | 933 | about). */ |
| 952 | countersize = sizeof(struct xt_counters) * private->number; | 934 | countersize = sizeof(struct xt_counters) * private->number; |
| 953 | counters = vmalloc_node(countersize, numa_node_id()); | 935 | counters = vmalloc(countersize); |
| 954 | 936 | ||
| 955 | if (counters == NULL) | 937 | if (counters == NULL) |
| 956 | return ERR_PTR(-ENOMEM); | 938 | return ERR_PTR(-ENOMEM); |
| @@ -962,11 +944,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table) | |||
| 962 | 944 | ||
| 963 | static int | 945 | static int |
| 964 | copy_entries_to_user(unsigned int total_size, | 946 | copy_entries_to_user(unsigned int total_size, |
| 965 | struct xt_table *table, | 947 | const struct xt_table *table, |
| 966 | void __user *userptr) | 948 | void __user *userptr) |
| 967 | { | 949 | { |
| 968 | unsigned int off, num; | 950 | unsigned int off, num; |
| 969 | struct ipt_entry *e; | 951 | const struct ipt_entry *e; |
| 970 | struct xt_counters *counters; | 952 | struct xt_counters *counters; |
| 971 | const struct xt_table_info *private = table->private; | 953 | const struct xt_table_info *private = table->private; |
| 972 | int ret = 0; | 954 | int ret = 0; |
| @@ -1018,7 +1000,7 @@ copy_entries_to_user(unsigned int total_size, | |||
| 1018 | } | 1000 | } |
| 1019 | } | 1001 | } |
| 1020 | 1002 | ||
| 1021 | t = ipt_get_target(e); | 1003 | t = ipt_get_target_c(e); |
| 1022 | if (copy_to_user(userptr + off + e->target_offset | 1004 | if (copy_to_user(userptr + off + e->target_offset |
| 1023 | + offsetof(struct ipt_entry_target, | 1005 | + offsetof(struct ipt_entry_target, |
| 1024 | u.user.name), | 1006 | u.user.name), |
| @@ -1035,7 +1017,7 @@ copy_entries_to_user(unsigned int total_size, | |||
| 1035 | } | 1017 | } |
| 1036 | 1018 | ||
| 1037 | #ifdef CONFIG_COMPAT | 1019 | #ifdef CONFIG_COMPAT |
| 1038 | static void compat_standard_from_user(void *dst, void *src) | 1020 | static void compat_standard_from_user(void *dst, const void *src) |
| 1039 | { | 1021 | { |
| 1040 | int v = *(compat_int_t *)src; | 1022 | int v = *(compat_int_t *)src; |
| 1041 | 1023 | ||
| @@ -1044,7 +1026,7 @@ static void compat_standard_from_user(void *dst, void *src) | |||
| 1044 | memcpy(dst, &v, sizeof(v)); | 1026 | memcpy(dst, &v, sizeof(v)); |
| 1045 | } | 1027 | } |
| 1046 | 1028 | ||
| 1047 | static int compat_standard_to_user(void __user *dst, void *src) | 1029 | static int compat_standard_to_user(void __user *dst, const void *src) |
| 1048 | { | 1030 | { |
| 1049 | compat_int_t cv = *(int *)src; | 1031 | compat_int_t cv = *(int *)src; |
| 1050 | 1032 | ||
| @@ -1053,25 +1035,20 @@ static int compat_standard_to_user(void __user *dst, void *src) | |||
| 1053 | return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; | 1035 | return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; |
| 1054 | } | 1036 | } |
| 1055 | 1037 | ||
| 1056 | static inline int | 1038 | static int compat_calc_entry(const struct ipt_entry *e, |
| 1057 | compat_calc_match(struct ipt_entry_match *m, int *size) | ||
| 1058 | { | ||
| 1059 | *size += xt_compat_match_offset(m->u.kernel.match); | ||
| 1060 | return 0; | ||
| 1061 | } | ||
| 1062 | |||
| 1063 | static int compat_calc_entry(struct ipt_entry *e, | ||
| 1064 | const struct xt_table_info *info, | 1039 | const struct xt_table_info *info, |
| 1065 | void *base, struct xt_table_info *newinfo) | 1040 | const void *base, struct xt_table_info *newinfo) |
| 1066 | { | 1041 | { |
| 1067 | struct ipt_entry_target *t; | 1042 | const struct xt_entry_match *ematch; |
| 1043 | const struct ipt_entry_target *t; | ||
| 1068 | unsigned int entry_offset; | 1044 | unsigned int entry_offset; |
| 1069 | int off, i, ret; | 1045 | int off, i, ret; |
| 1070 | 1046 | ||
| 1071 | off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); | 1047 | off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); |
| 1072 | entry_offset = (void *)e - base; | 1048 | entry_offset = (void *)e - base; |
| 1073 | IPT_MATCH_ITERATE(e, compat_calc_match, &off); | 1049 | xt_ematch_foreach(ematch, e) |
| 1074 | t = ipt_get_target(e); | 1050 | off += xt_compat_match_offset(ematch->u.kernel.match); |
| 1051 | t = ipt_get_target_c(e); | ||
| 1075 | off += xt_compat_target_offset(t->u.kernel.target); | 1052 | off += xt_compat_target_offset(t->u.kernel.target); |
| 1076 | newinfo->size -= off; | 1053 | newinfo->size -= off; |
| 1077 | ret = xt_compat_add_offset(AF_INET, entry_offset, off); | 1054 | ret = xt_compat_add_offset(AF_INET, entry_offset, off); |
| @@ -1092,7 +1069,9 @@ static int compat_calc_entry(struct ipt_entry *e, | |||
| 1092 | static int compat_table_info(const struct xt_table_info *info, | 1069 | static int compat_table_info(const struct xt_table_info *info, |
| 1093 | struct xt_table_info *newinfo) | 1070 | struct xt_table_info *newinfo) |
| 1094 | { | 1071 | { |
| 1072 | struct ipt_entry *iter; | ||
| 1095 | void *loc_cpu_entry; | 1073 | void *loc_cpu_entry; |
| 1074 | int ret; | ||
| 1096 | 1075 | ||
| 1097 | if (!newinfo || !info) | 1076 | if (!newinfo || !info) |
| 1098 | return -EINVAL; | 1077 | return -EINVAL; |
| @@ -1101,13 +1080,17 @@ static int compat_table_info(const struct xt_table_info *info, | |||
| 1101 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | 1080 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); |
| 1102 | newinfo->initial_entries = 0; | 1081 | newinfo->initial_entries = 0; |
| 1103 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; | 1082 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; |
| 1104 | return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size, | 1083 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { |
| 1105 | compat_calc_entry, info, loc_cpu_entry, | 1084 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); |
| 1106 | newinfo); | 1085 | if (ret != 0) |
| 1086 | return ret; | ||
| 1087 | } | ||
| 1088 | return 0; | ||
| 1107 | } | 1089 | } |
| 1108 | #endif | 1090 | #endif |
| 1109 | 1091 | ||
| 1110 | static int get_info(struct net *net, void __user *user, int *len, int compat) | 1092 | static int get_info(struct net *net, void __user *user, |
| 1093 | const int *len, int compat) | ||
| 1111 | { | 1094 | { |
| 1112 | char name[IPT_TABLE_MAXNAMELEN]; | 1095 | char name[IPT_TABLE_MAXNAMELEN]; |
| 1113 | struct xt_table *t; | 1096 | struct xt_table *t; |
| @@ -1132,10 +1115,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) | |||
| 1132 | if (t && !IS_ERR(t)) { | 1115 | if (t && !IS_ERR(t)) { |
| 1133 | struct ipt_getinfo info; | 1116 | struct ipt_getinfo info; |
| 1134 | const struct xt_table_info *private = t->private; | 1117 | const struct xt_table_info *private = t->private; |
| 1135 | |||
| 1136 | #ifdef CONFIG_COMPAT | 1118 | #ifdef CONFIG_COMPAT |
| 1119 | struct xt_table_info tmp; | ||
| 1120 | |||
| 1137 | if (compat) { | 1121 | if (compat) { |
| 1138 | struct xt_table_info tmp; | ||
| 1139 | ret = compat_table_info(private, &tmp); | 1122 | ret = compat_table_info(private, &tmp); |
| 1140 | xt_compat_flush_offsets(AF_INET); | 1123 | xt_compat_flush_offsets(AF_INET); |
| 1141 | private = &tmp; | 1124 | private = &tmp; |
| @@ -1167,7 +1150,8 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) | |||
| 1167 | } | 1150 | } |
| 1168 | 1151 | ||
| 1169 | static int | 1152 | static int |
| 1170 | get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len) | 1153 | get_entries(struct net *net, struct ipt_get_entries __user *uptr, |
| 1154 | const int *len) | ||
| 1171 | { | 1155 | { |
| 1172 | int ret; | 1156 | int ret; |
| 1173 | struct ipt_get_entries get; | 1157 | struct ipt_get_entries get; |
| @@ -1215,6 +1199,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, | |||
| 1215 | struct xt_table_info *oldinfo; | 1199 | struct xt_table_info *oldinfo; |
| 1216 | struct xt_counters *counters; | 1200 | struct xt_counters *counters; |
| 1217 | void *loc_cpu_old_entry; | 1201 | void *loc_cpu_old_entry; |
| 1202 | struct ipt_entry *iter; | ||
| 1218 | 1203 | ||
| 1219 | ret = 0; | 1204 | ret = 0; |
| 1220 | counters = vmalloc(num_counters * sizeof(struct xt_counters)); | 1205 | counters = vmalloc(num_counters * sizeof(struct xt_counters)); |
| @@ -1257,8 +1242,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, | |||
| 1257 | 1242 | ||
| 1258 | /* Decrease module usage counts and free resource */ | 1243 | /* Decrease module usage counts and free resource */ |
| 1259 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; | 1244 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; |
| 1260 | IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, | 1245 | xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) |
| 1261 | NULL); | 1246 | cleanup_entry(iter, net); |
| 1247 | |||
| 1262 | xt_free_table_info(oldinfo); | 1248 | xt_free_table_info(oldinfo); |
| 1263 | if (copy_to_user(counters_ptr, counters, | 1249 | if (copy_to_user(counters_ptr, counters, |
| 1264 | sizeof(struct xt_counters) * num_counters) != 0) | 1250 | sizeof(struct xt_counters) * num_counters) != 0) |
| @@ -1277,12 +1263,13 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, | |||
| 1277 | } | 1263 | } |
| 1278 | 1264 | ||
| 1279 | static int | 1265 | static int |
| 1280 | do_replace(struct net *net, void __user *user, unsigned int len) | 1266 | do_replace(struct net *net, const void __user *user, unsigned int len) |
| 1281 | { | 1267 | { |
| 1282 | int ret; | 1268 | int ret; |
| 1283 | struct ipt_replace tmp; | 1269 | struct ipt_replace tmp; |
| 1284 | struct xt_table_info *newinfo; | 1270 | struct xt_table_info *newinfo; |
| 1285 | void *loc_cpu_entry; | 1271 | void *loc_cpu_entry; |
| 1272 | struct ipt_entry *iter; | ||
| 1286 | 1273 | ||
| 1287 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) | 1274 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) |
| 1288 | return -EFAULT; | 1275 | return -EFAULT; |
| @@ -1303,13 +1290,11 @@ do_replace(struct net *net, void __user *user, unsigned int len) | |||
| 1303 | goto free_newinfo; | 1290 | goto free_newinfo; |
| 1304 | } | 1291 | } |
| 1305 | 1292 | ||
| 1306 | ret = translate_table(tmp.name, tmp.valid_hooks, | 1293 | ret = translate_table(net, newinfo, loc_cpu_entry, &tmp); |
| 1307 | newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, | ||
| 1308 | tmp.hook_entry, tmp.underflow); | ||
| 1309 | if (ret != 0) | 1294 | if (ret != 0) |
| 1310 | goto free_newinfo; | 1295 | goto free_newinfo; |
| 1311 | 1296 | ||
| 1312 | duprintf("ip_tables: Translated table\n"); | 1297 | duprintf("Translated table\n"); |
| 1313 | 1298 | ||
| 1314 | ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, | 1299 | ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, |
| 1315 | tmp.num_counters, tmp.counters); | 1300 | tmp.num_counters, tmp.counters); |
| @@ -1318,27 +1303,16 @@ do_replace(struct net *net, void __user *user, unsigned int len) | |||
| 1318 | return 0; | 1303 | return 0; |
| 1319 | 1304 | ||
| 1320 | free_newinfo_untrans: | 1305 | free_newinfo_untrans: |
| 1321 | IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); | 1306 | xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) |
| 1307 | cleanup_entry(iter, net); | ||
| 1322 | free_newinfo: | 1308 | free_newinfo: |
| 1323 | xt_free_table_info(newinfo); | 1309 | xt_free_table_info(newinfo); |
| 1324 | return ret; | 1310 | return ret; |
| 1325 | } | 1311 | } |
| 1326 | 1312 | ||
| 1327 | /* We're lazy, and add to the first CPU; overflow works its fey magic | ||
| 1328 | * and everything is OK. */ | ||
| 1329 | static int | 1313 | static int |
| 1330 | add_counter_to_entry(struct ipt_entry *e, | 1314 | do_add_counters(struct net *net, const void __user *user, |
| 1331 | const struct xt_counters addme[], | 1315 | unsigned int len, int compat) |
| 1332 | unsigned int *i) | ||
| 1333 | { | ||
| 1334 | ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); | ||
| 1335 | |||
| 1336 | (*i)++; | ||
| 1337 | return 0; | ||
| 1338 | } | ||
| 1339 | |||
| 1340 | static int | ||
| 1341 | do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) | ||
| 1342 | { | 1316 | { |
| 1343 | unsigned int i, curcpu; | 1317 | unsigned int i, curcpu; |
| 1344 | struct xt_counters_info tmp; | 1318 | struct xt_counters_info tmp; |
| @@ -1351,6 +1325,7 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat | |||
| 1351 | const struct xt_table_info *private; | 1325 | const struct xt_table_info *private; |
| 1352 | int ret = 0; | 1326 | int ret = 0; |
| 1353 | void *loc_cpu_entry; | 1327 | void *loc_cpu_entry; |
| 1328 | struct ipt_entry *iter; | ||
| 1354 | #ifdef CONFIG_COMPAT | 1329 | #ifdef CONFIG_COMPAT |
| 1355 | struct compat_xt_counters_info compat_tmp; | 1330 | struct compat_xt_counters_info compat_tmp; |
| 1356 | 1331 | ||
| @@ -1381,7 +1356,7 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat | |||
| 1381 | if (len != size + num_counters * sizeof(struct xt_counters)) | 1356 | if (len != size + num_counters * sizeof(struct xt_counters)) |
| 1382 | return -EINVAL; | 1357 | return -EINVAL; |
| 1383 | 1358 | ||
| 1384 | paddc = vmalloc_node(len - size, numa_node_id()); | 1359 | paddc = vmalloc(len - size); |
| 1385 | if (!paddc) | 1360 | if (!paddc) |
| 1386 | return -ENOMEM; | 1361 | return -ENOMEM; |
| 1387 | 1362 | ||
| @@ -1408,11 +1383,10 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat | |||
| 1408 | curcpu = smp_processor_id(); | 1383 | curcpu = smp_processor_id(); |
| 1409 | loc_cpu_entry = private->entries[curcpu]; | 1384 | loc_cpu_entry = private->entries[curcpu]; |
| 1410 | xt_info_wrlock(curcpu); | 1385 | xt_info_wrlock(curcpu); |
| 1411 | IPT_ENTRY_ITERATE(loc_cpu_entry, | 1386 | xt_entry_foreach(iter, loc_cpu_entry, private->size) { |
| 1412 | private->size, | 1387 | ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); |
| 1413 | add_counter_to_entry, | 1388 | ++i; |
| 1414 | paddc, | 1389 | } |
| 1415 | &i); | ||
| 1416 | xt_info_wrunlock(curcpu); | 1390 | xt_info_wrunlock(curcpu); |
| 1417 | unlock_up_free: | 1391 | unlock_up_free: |
| 1418 | local_bh_enable(); | 1392 | local_bh_enable(); |
| @@ -1440,45 +1414,40 @@ struct compat_ipt_replace { | |||
| 1440 | static int | 1414 | static int |
| 1441 | compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, | 1415 | compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, |
| 1442 | unsigned int *size, struct xt_counters *counters, | 1416 | unsigned int *size, struct xt_counters *counters, |
| 1443 | unsigned int *i) | 1417 | unsigned int i) |
| 1444 | { | 1418 | { |
| 1445 | struct ipt_entry_target *t; | 1419 | struct ipt_entry_target *t; |
| 1446 | struct compat_ipt_entry __user *ce; | 1420 | struct compat_ipt_entry __user *ce; |
| 1447 | u_int16_t target_offset, next_offset; | 1421 | u_int16_t target_offset, next_offset; |
| 1448 | compat_uint_t origsize; | 1422 | compat_uint_t origsize; |
| 1449 | int ret; | 1423 | const struct xt_entry_match *ematch; |
| 1424 | int ret = 0; | ||
| 1450 | 1425 | ||
| 1451 | ret = -EFAULT; | ||
| 1452 | origsize = *size; | 1426 | origsize = *size; |
| 1453 | ce = (struct compat_ipt_entry __user *)*dstptr; | 1427 | ce = (struct compat_ipt_entry __user *)*dstptr; |
| 1454 | if (copy_to_user(ce, e, sizeof(struct ipt_entry))) | 1428 | if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 || |
| 1455 | goto out; | 1429 | copy_to_user(&ce->counters, &counters[i], |
| 1456 | 1430 | sizeof(counters[i])) != 0) | |
| 1457 | if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i]))) | 1431 | return -EFAULT; |
| 1458 | goto out; | ||
| 1459 | 1432 | ||
| 1460 | *dstptr += sizeof(struct compat_ipt_entry); | 1433 | *dstptr += sizeof(struct compat_ipt_entry); |
| 1461 | *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); | 1434 | *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); |
| 1462 | 1435 | ||
| 1463 | ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size); | 1436 | xt_ematch_foreach(ematch, e) { |
| 1437 | ret = xt_compat_match_to_user(ematch, dstptr, size); | ||
| 1438 | if (ret != 0) | ||
| 1439 | return ret; | ||
| 1440 | } | ||
| 1464 | target_offset = e->target_offset - (origsize - *size); | 1441 | target_offset = e->target_offset - (origsize - *size); |
| 1465 | if (ret) | ||
| 1466 | goto out; | ||
| 1467 | t = ipt_get_target(e); | 1442 | t = ipt_get_target(e); |
| 1468 | ret = xt_compat_target_to_user(t, dstptr, size); | 1443 | ret = xt_compat_target_to_user(t, dstptr, size); |
| 1469 | if (ret) | 1444 | if (ret) |
| 1470 | goto out; | 1445 | return ret; |
| 1471 | ret = -EFAULT; | ||
| 1472 | next_offset = e->next_offset - (origsize - *size); | 1446 | next_offset = e->next_offset - (origsize - *size); |
| 1473 | if (put_user(target_offset, &ce->target_offset)) | 1447 | if (put_user(target_offset, &ce->target_offset) != 0 || |
| 1474 | goto out; | 1448 | put_user(next_offset, &ce->next_offset) != 0) |
| 1475 | if (put_user(next_offset, &ce->next_offset)) | 1449 | return -EFAULT; |
| 1476 | goto out; | ||
| 1477 | |||
| 1478 | (*i)++; | ||
| 1479 | return 0; | 1450 | return 0; |
| 1480 | out: | ||
| 1481 | return ret; | ||
| 1482 | } | 1451 | } |
| 1483 | 1452 | ||
| 1484 | static int | 1453 | static int |
| @@ -1486,61 +1455,45 @@ compat_find_calc_match(struct ipt_entry_match *m, | |||
| 1486 | const char *name, | 1455 | const char *name, |
| 1487 | const struct ipt_ip *ip, | 1456 | const struct ipt_ip *ip, |
| 1488 | unsigned int hookmask, | 1457 | unsigned int hookmask, |
| 1489 | int *size, unsigned int *i) | 1458 | int *size) |
| 1490 | { | 1459 | { |
| 1491 | struct xt_match *match; | 1460 | struct xt_match *match; |
| 1492 | 1461 | ||
| 1493 | match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name, | 1462 | match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, |
| 1494 | m->u.user.revision), | 1463 | m->u.user.revision); |
| 1495 | "ipt_%s", m->u.user.name); | 1464 | if (IS_ERR(match)) { |
| 1496 | if (IS_ERR(match) || !match) { | ||
| 1497 | duprintf("compat_check_calc_match: `%s' not found\n", | 1465 | duprintf("compat_check_calc_match: `%s' not found\n", |
| 1498 | m->u.user.name); | 1466 | m->u.user.name); |
| 1499 | return match ? PTR_ERR(match) : -ENOENT; | 1467 | return PTR_ERR(match); |
| 1500 | } | 1468 | } |
| 1501 | m->u.kernel.match = match; | 1469 | m->u.kernel.match = match; |
| 1502 | *size += xt_compat_match_offset(match); | 1470 | *size += xt_compat_match_offset(match); |
| 1503 | |||
| 1504 | (*i)++; | ||
| 1505 | return 0; | 1471 | return 0; |
| 1506 | } | 1472 | } |
| 1507 | 1473 | ||
| 1508 | static int | 1474 | static void compat_release_entry(struct compat_ipt_entry *e) |
| 1509 | compat_release_match(struct ipt_entry_match *m, unsigned int *i) | ||
| 1510 | { | ||
| 1511 | if (i && (*i)-- == 0) | ||
| 1512 | return 1; | ||
| 1513 | |||
| 1514 | module_put(m->u.kernel.match->me); | ||
| 1515 | return 0; | ||
| 1516 | } | ||
| 1517 | |||
| 1518 | static int | ||
| 1519 | compat_release_entry(struct compat_ipt_entry *e, unsigned int *i) | ||
| 1520 | { | 1475 | { |
| 1521 | struct ipt_entry_target *t; | 1476 | struct ipt_entry_target *t; |
| 1522 | 1477 | struct xt_entry_match *ematch; | |
| 1523 | if (i && (*i)-- == 0) | ||
| 1524 | return 1; | ||
| 1525 | 1478 | ||
| 1526 | /* Cleanup all matches */ | 1479 | /* Cleanup all matches */ |
| 1527 | COMPAT_IPT_MATCH_ITERATE(e, compat_release_match, NULL); | 1480 | xt_ematch_foreach(ematch, e) |
| 1481 | module_put(ematch->u.kernel.match->me); | ||
| 1528 | t = compat_ipt_get_target(e); | 1482 | t = compat_ipt_get_target(e); |
| 1529 | module_put(t->u.kernel.target->me); | 1483 | module_put(t->u.kernel.target->me); |
| 1530 | return 0; | ||
| 1531 | } | 1484 | } |
| 1532 | 1485 | ||
| 1533 | static int | 1486 | static int |
| 1534 | check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, | 1487 | check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, |
| 1535 | struct xt_table_info *newinfo, | 1488 | struct xt_table_info *newinfo, |
| 1536 | unsigned int *size, | 1489 | unsigned int *size, |
| 1537 | unsigned char *base, | 1490 | const unsigned char *base, |
| 1538 | unsigned char *limit, | 1491 | const unsigned char *limit, |
| 1539 | unsigned int *hook_entries, | 1492 | const unsigned int *hook_entries, |
| 1540 | unsigned int *underflows, | 1493 | const unsigned int *underflows, |
| 1541 | unsigned int *i, | ||
| 1542 | const char *name) | 1494 | const char *name) |
| 1543 | { | 1495 | { |
| 1496 | struct xt_entry_match *ematch; | ||
| 1544 | struct ipt_entry_target *t; | 1497 | struct ipt_entry_target *t; |
| 1545 | struct xt_target *target; | 1498 | struct xt_target *target; |
| 1546 | unsigned int entry_offset; | 1499 | unsigned int entry_offset; |
| @@ -1569,20 +1522,21 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, | |||
| 1569 | off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); | 1522 | off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); |
| 1570 | entry_offset = (void *)e - (void *)base; | 1523 | entry_offset = (void *)e - (void *)base; |
| 1571 | j = 0; | 1524 | j = 0; |
| 1572 | ret = COMPAT_IPT_MATCH_ITERATE(e, compat_find_calc_match, name, | 1525 | xt_ematch_foreach(ematch, e) { |
| 1573 | &e->ip, e->comefrom, &off, &j); | 1526 | ret = compat_find_calc_match(ematch, name, |
| 1574 | if (ret != 0) | 1527 | &e->ip, e->comefrom, &off); |
| 1575 | goto release_matches; | 1528 | if (ret != 0) |
| 1529 | goto release_matches; | ||
| 1530 | ++j; | ||
| 1531 | } | ||
| 1576 | 1532 | ||
| 1577 | t = compat_ipt_get_target(e); | 1533 | t = compat_ipt_get_target(e); |
| 1578 | target = try_then_request_module(xt_find_target(AF_INET, | 1534 | target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, |
| 1579 | t->u.user.name, | 1535 | t->u.user.revision); |
| 1580 | t->u.user.revision), | 1536 | if (IS_ERR(target)) { |
| 1581 | "ipt_%s", t->u.user.name); | ||
| 1582 | if (IS_ERR(target) || !target) { | ||
| 1583 | duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", | 1537 | duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", |
| 1584 | t->u.user.name); | 1538 | t->u.user.name); |
| 1585 | ret = target ? PTR_ERR(target) : -ENOENT; | 1539 | ret = PTR_ERR(target); |
| 1586 | goto release_matches; | 1540 | goto release_matches; |
| 1587 | } | 1541 | } |
| 1588 | t->u.kernel.target = target; | 1542 | t->u.kernel.target = target; |
| @@ -1604,14 +1558,16 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, | |||
| 1604 | /* Clear counters and comefrom */ | 1558 | /* Clear counters and comefrom */ |
| 1605 | memset(&e->counters, 0, sizeof(e->counters)); | 1559 | memset(&e->counters, 0, sizeof(e->counters)); |
| 1606 | e->comefrom = 0; | 1560 | e->comefrom = 0; |
| 1607 | |||
| 1608 | (*i)++; | ||
| 1609 | return 0; | 1561 | return 0; |
| 1610 | 1562 | ||
| 1611 | out: | 1563 | out: |
| 1612 | module_put(t->u.kernel.target->me); | 1564 | module_put(t->u.kernel.target->me); |
| 1613 | release_matches: | 1565 | release_matches: |
| 1614 | IPT_MATCH_ITERATE(e, compat_release_match, &j); | 1566 | xt_ematch_foreach(ematch, e) { |
| 1567 | if (j-- == 0) | ||
| 1568 | break; | ||
| 1569 | module_put(ematch->u.kernel.match->me); | ||
| 1570 | } | ||
| 1615 | return ret; | 1571 | return ret; |
| 1616 | } | 1572 | } |
| 1617 | 1573 | ||
| @@ -1625,6 +1581,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, | |||
| 1625 | struct ipt_entry *de; | 1581 | struct ipt_entry *de; |
| 1626 | unsigned int origsize; | 1582 | unsigned int origsize; |
| 1627 | int ret, h; | 1583 | int ret, h; |
| 1584 | struct xt_entry_match *ematch; | ||
| 1628 | 1585 | ||
| 1629 | ret = 0; | 1586 | ret = 0; |
| 1630 | origsize = *size; | 1587 | origsize = *size; |
| @@ -1635,10 +1592,11 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, | |||
| 1635 | *dstptr += sizeof(struct ipt_entry); | 1592 | *dstptr += sizeof(struct ipt_entry); |
| 1636 | *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); | 1593 | *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); |
| 1637 | 1594 | ||
| 1638 | ret = COMPAT_IPT_MATCH_ITERATE(e, xt_compat_match_from_user, | 1595 | xt_ematch_foreach(ematch, e) { |
| 1639 | dstptr, size); | 1596 | ret = xt_compat_match_from_user(ematch, dstptr, size); |
| 1640 | if (ret) | 1597 | if (ret != 0) |
| 1641 | return ret; | 1598 | return ret; |
| 1599 | } | ||
| 1642 | de->target_offset = e->target_offset - (origsize - *size); | 1600 | de->target_offset = e->target_offset - (origsize - *size); |
| 1643 | t = compat_ipt_get_target(e); | 1601 | t = compat_ipt_get_target(e); |
| 1644 | target = t->u.kernel.target; | 1602 | target = t->u.kernel.target; |
| @@ -1655,36 +1613,43 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, | |||
| 1655 | } | 1613 | } |
| 1656 | 1614 | ||
| 1657 | static int | 1615 | static int |
| 1658 | compat_check_entry(struct ipt_entry *e, const char *name, | 1616 | compat_check_entry(struct ipt_entry *e, struct net *net, const char *name) |
| 1659 | unsigned int *i) | ||
| 1660 | { | 1617 | { |
| 1618 | struct xt_entry_match *ematch; | ||
| 1661 | struct xt_mtchk_param mtpar; | 1619 | struct xt_mtchk_param mtpar; |
| 1662 | unsigned int j; | 1620 | unsigned int j; |
| 1663 | int ret; | 1621 | int ret = 0; |
| 1664 | 1622 | ||
| 1665 | j = 0; | 1623 | j = 0; |
| 1624 | mtpar.net = net; | ||
| 1666 | mtpar.table = name; | 1625 | mtpar.table = name; |
| 1667 | mtpar.entryinfo = &e->ip; | 1626 | mtpar.entryinfo = &e->ip; |
| 1668 | mtpar.hook_mask = e->comefrom; | 1627 | mtpar.hook_mask = e->comefrom; |
| 1669 | mtpar.family = NFPROTO_IPV4; | 1628 | mtpar.family = NFPROTO_IPV4; |
| 1670 | ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j); | 1629 | xt_ematch_foreach(ematch, e) { |
| 1671 | if (ret) | 1630 | ret = check_match(ematch, &mtpar); |
| 1672 | goto cleanup_matches; | 1631 | if (ret != 0) |
| 1632 | goto cleanup_matches; | ||
| 1633 | ++j; | ||
| 1634 | } | ||
| 1673 | 1635 | ||
| 1674 | ret = check_target(e, name); | 1636 | ret = check_target(e, net, name); |
| 1675 | if (ret) | 1637 | if (ret) |
| 1676 | goto cleanup_matches; | 1638 | goto cleanup_matches; |
| 1677 | |||
| 1678 | (*i)++; | ||
| 1679 | return 0; | 1639 | return 0; |
| 1680 | 1640 | ||
| 1681 | cleanup_matches: | 1641 | cleanup_matches: |
| 1682 | IPT_MATCH_ITERATE(e, cleanup_match, &j); | 1642 | xt_ematch_foreach(ematch, e) { |
| 1643 | if (j-- == 0) | ||
| 1644 | break; | ||
| 1645 | cleanup_match(ematch, net); | ||
| 1646 | } | ||
| 1683 | return ret; | 1647 | return ret; |
| 1684 | } | 1648 | } |
| 1685 | 1649 | ||
| 1686 | static int | 1650 | static int |
| 1687 | translate_compat_table(const char *name, | 1651 | translate_compat_table(struct net *net, |
| 1652 | const char *name, | ||
| 1688 | unsigned int valid_hooks, | 1653 | unsigned int valid_hooks, |
| 1689 | struct xt_table_info **pinfo, | 1654 | struct xt_table_info **pinfo, |
| 1690 | void **pentry0, | 1655 | void **pentry0, |
| @@ -1696,6 +1661,8 @@ translate_compat_table(const char *name, | |||
| 1696 | unsigned int i, j; | 1661 | unsigned int i, j; |
| 1697 | struct xt_table_info *newinfo, *info; | 1662 | struct xt_table_info *newinfo, *info; |
| 1698 | void *pos, *entry0, *entry1; | 1663 | void *pos, *entry0, *entry1; |
| 1664 | struct compat_ipt_entry *iter0; | ||
| 1665 | struct ipt_entry *iter1; | ||
| 1699 | unsigned int size; | 1666 | unsigned int size; |
| 1700 | int ret; | 1667 | int ret; |
| 1701 | 1668 | ||
| @@ -1714,13 +1681,17 @@ translate_compat_table(const char *name, | |||
| 1714 | j = 0; | 1681 | j = 0; |
| 1715 | xt_compat_lock(AF_INET); | 1682 | xt_compat_lock(AF_INET); |
| 1716 | /* Walk through entries, checking offsets. */ | 1683 | /* Walk through entries, checking offsets. */ |
| 1717 | ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, | 1684 | xt_entry_foreach(iter0, entry0, total_size) { |
| 1718 | check_compat_entry_size_and_hooks, | 1685 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, |
| 1719 | info, &size, entry0, | 1686 | entry0, |
| 1720 | entry0 + total_size, | 1687 | entry0 + total_size, |
| 1721 | hook_entries, underflows, &j, name); | 1688 | hook_entries, |
| 1722 | if (ret != 0) | 1689 | underflows, |
| 1723 | goto out_unlock; | 1690 | name); |
| 1691 | if (ret != 0) | ||
| 1692 | goto out_unlock; | ||
| 1693 | ++j; | ||
| 1694 | } | ||
| 1724 | 1695 | ||
| 1725 | ret = -EINVAL; | 1696 | ret = -EINVAL; |
| 1726 | if (j != number) { | 1697 | if (j != number) { |
| @@ -1759,9 +1730,12 @@ translate_compat_table(const char *name, | |||
| 1759 | entry1 = newinfo->entries[raw_smp_processor_id()]; | 1730 | entry1 = newinfo->entries[raw_smp_processor_id()]; |
| 1760 | pos = entry1; | 1731 | pos = entry1; |
| 1761 | size = total_size; | 1732 | size = total_size; |
| 1762 | ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, | 1733 | xt_entry_foreach(iter0, entry0, total_size) { |
| 1763 | compat_copy_entry_from_user, | 1734 | ret = compat_copy_entry_from_user(iter0, &pos, &size, |
| 1764 | &pos, &size, name, newinfo, entry1); | 1735 | name, newinfo, entry1); |
| 1736 | if (ret != 0) | ||
| 1737 | break; | ||
| 1738 | } | ||
| 1765 | xt_compat_flush_offsets(AF_INET); | 1739 | xt_compat_flush_offsets(AF_INET); |
| 1766 | xt_compat_unlock(AF_INET); | 1740 | xt_compat_unlock(AF_INET); |
| 1767 | if (ret) | 1741 | if (ret) |
| @@ -1772,13 +1746,35 @@ translate_compat_table(const char *name, | |||
| 1772 | goto free_newinfo; | 1746 | goto free_newinfo; |
| 1773 | 1747 | ||
| 1774 | i = 0; | 1748 | i = 0; |
| 1775 | ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, | 1749 | xt_entry_foreach(iter1, entry1, newinfo->size) { |
| 1776 | name, &i); | 1750 | ret = compat_check_entry(iter1, net, name); |
| 1751 | if (ret != 0) | ||
| 1752 | break; | ||
| 1753 | ++i; | ||
| 1754 | if (strcmp(ipt_get_target(iter1)->u.user.name, | ||
| 1755 | XT_ERROR_TARGET) == 0) | ||
| 1756 | ++newinfo->stacksize; | ||
| 1757 | } | ||
| 1777 | if (ret) { | 1758 | if (ret) { |
| 1759 | /* | ||
| 1760 | * The first i matches need cleanup_entry (calls ->destroy) | ||
| 1761 | * because they had called ->check already. The other j-i | ||
| 1762 | * entries need only release. | ||
| 1763 | */ | ||
| 1764 | int skip = i; | ||
| 1778 | j -= i; | 1765 | j -= i; |
| 1779 | COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i, | 1766 | xt_entry_foreach(iter0, entry0, newinfo->size) { |
| 1780 | compat_release_entry, &j); | 1767 | if (skip-- > 0) |
| 1781 | IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i); | 1768 | continue; |
| 1769 | if (j-- == 0) | ||
| 1770 | break; | ||
| 1771 | compat_release_entry(iter0); | ||
| 1772 | } | ||
| 1773 | xt_entry_foreach(iter1, entry1, newinfo->size) { | ||
| 1774 | if (i-- == 0) | ||
| 1775 | break; | ||
| 1776 | cleanup_entry(iter1, net); | ||
| 1777 | } | ||
| 1782 | xt_free_table_info(newinfo); | 1778 | xt_free_table_info(newinfo); |
| 1783 | return ret; | 1779 | return ret; |
| 1784 | } | 1780 | } |
| @@ -1796,7 +1792,11 @@ translate_compat_table(const char *name, | |||
| 1796 | free_newinfo: | 1792 | free_newinfo: |
| 1797 | xt_free_table_info(newinfo); | 1793 | xt_free_table_info(newinfo); |
| 1798 | out: | 1794 | out: |
| 1799 | COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j); | 1795 | xt_entry_foreach(iter0, entry0, total_size) { |
| 1796 | if (j-- == 0) | ||
| 1797 | break; | ||
| 1798 | compat_release_entry(iter0); | ||
| 1799 | } | ||
| 1800 | return ret; | 1800 | return ret; |
| 1801 | out_unlock: | 1801 | out_unlock: |
| 1802 | xt_compat_flush_offsets(AF_INET); | 1802 | xt_compat_flush_offsets(AF_INET); |
| @@ -1811,6 +1811,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) | |||
| 1811 | struct compat_ipt_replace tmp; | 1811 | struct compat_ipt_replace tmp; |
| 1812 | struct xt_table_info *newinfo; | 1812 | struct xt_table_info *newinfo; |
| 1813 | void *loc_cpu_entry; | 1813 | void *loc_cpu_entry; |
| 1814 | struct ipt_entry *iter; | ||
| 1814 | 1815 | ||
| 1815 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) | 1816 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) |
| 1816 | return -EFAULT; | 1817 | return -EFAULT; |
| @@ -1833,7 +1834,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) | |||
| 1833 | goto free_newinfo; | 1834 | goto free_newinfo; |
| 1834 | } | 1835 | } |
| 1835 | 1836 | ||
| 1836 | ret = translate_compat_table(tmp.name, tmp.valid_hooks, | 1837 | ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, |
| 1837 | &newinfo, &loc_cpu_entry, tmp.size, | 1838 | &newinfo, &loc_cpu_entry, tmp.size, |
| 1838 | tmp.num_entries, tmp.hook_entry, | 1839 | tmp.num_entries, tmp.hook_entry, |
| 1839 | tmp.underflow); | 1840 | tmp.underflow); |
| @@ -1849,7 +1850,8 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) | |||
| 1849 | return 0; | 1850 | return 0; |
| 1850 | 1851 | ||
| 1851 | free_newinfo_untrans: | 1852 | free_newinfo_untrans: |
| 1852 | IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); | 1853 | xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) |
| 1854 | cleanup_entry(iter, net); | ||
| 1853 | free_newinfo: | 1855 | free_newinfo: |
| 1854 | xt_free_table_info(newinfo); | 1856 | xt_free_table_info(newinfo); |
| 1855 | return ret; | 1857 | return ret; |
| @@ -1898,6 +1900,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, | |||
| 1898 | int ret = 0; | 1900 | int ret = 0; |
| 1899 | const void *loc_cpu_entry; | 1901 | const void *loc_cpu_entry; |
| 1900 | unsigned int i = 0; | 1902 | unsigned int i = 0; |
| 1903 | struct ipt_entry *iter; | ||
| 1901 | 1904 | ||
| 1902 | counters = alloc_counters(table); | 1905 | counters = alloc_counters(table); |
| 1903 | if (IS_ERR(counters)) | 1906 | if (IS_ERR(counters)) |
| @@ -1910,9 +1913,12 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, | |||
| 1910 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; | 1913 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; |
| 1911 | pos = userptr; | 1914 | pos = userptr; |
| 1912 | size = total_size; | 1915 | size = total_size; |
| 1913 | ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size, | 1916 | xt_entry_foreach(iter, loc_cpu_entry, total_size) { |
| 1914 | compat_copy_entry_to_user, | 1917 | ret = compat_copy_entry_to_user(iter, &pos, |
| 1915 | &pos, &size, counters, &i); | 1918 | &size, counters, i++); |
| 1919 | if (ret != 0) | ||
| 1920 | break; | ||
| 1921 | } | ||
| 1916 | 1922 | ||
| 1917 | vfree(counters); | 1923 | vfree(counters); |
| 1918 | return ret; | 1924 | return ret; |
| @@ -2071,8 +2077,7 @@ struct xt_table *ipt_register_table(struct net *net, | |||
| 2071 | { | 2077 | { |
| 2072 | int ret; | 2078 | int ret; |
| 2073 | struct xt_table_info *newinfo; | 2079 | struct xt_table_info *newinfo; |
| 2074 | struct xt_table_info bootstrap | 2080 | struct xt_table_info bootstrap = {0}; |
| 2075 | = { 0, 0, 0, { 0 }, { 0 }, { } }; | ||
| 2076 | void *loc_cpu_entry; | 2081 | void *loc_cpu_entry; |
| 2077 | struct xt_table *new_table; | 2082 | struct xt_table *new_table; |
| 2078 | 2083 | ||
| @@ -2086,11 +2091,7 @@ struct xt_table *ipt_register_table(struct net *net, | |||
| 2086 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; | 2091 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; |
| 2087 | memcpy(loc_cpu_entry, repl->entries, repl->size); | 2092 | memcpy(loc_cpu_entry, repl->entries, repl->size); |
| 2088 | 2093 | ||
| 2089 | ret = translate_table(table->name, table->valid_hooks, | 2094 | ret = translate_table(net, newinfo, loc_cpu_entry, repl); |
| 2090 | newinfo, loc_cpu_entry, repl->size, | ||
| 2091 | repl->num_entries, | ||
| 2092 | repl->hook_entry, | ||
| 2093 | repl->underflow); | ||
| 2094 | if (ret != 0) | 2095 | if (ret != 0) |
| 2095 | goto out_free; | 2096 | goto out_free; |
| 2096 | 2097 | ||
| @@ -2108,17 +2109,19 @@ out: | |||
| 2108 | return ERR_PTR(ret); | 2109 | return ERR_PTR(ret); |
| 2109 | } | 2110 | } |
| 2110 | 2111 | ||
| 2111 | void ipt_unregister_table(struct xt_table *table) | 2112 | void ipt_unregister_table(struct net *net, struct xt_table *table) |
| 2112 | { | 2113 | { |
| 2113 | struct xt_table_info *private; | 2114 | struct xt_table_info *private; |
| 2114 | void *loc_cpu_entry; | 2115 | void *loc_cpu_entry; |
| 2115 | struct module *table_owner = table->me; | 2116 | struct module *table_owner = table->me; |
| 2117 | struct ipt_entry *iter; | ||
| 2116 | 2118 | ||
| 2117 | private = xt_unregister_table(table); | 2119 | private = xt_unregister_table(table); |
| 2118 | 2120 | ||
| 2119 | /* Decrease module usage counts and free resources */ | 2121 | /* Decrease module usage counts and free resources */ |
| 2120 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; | 2122 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; |
| 2121 | IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL); | 2123 | xt_entry_foreach(iter, loc_cpu_entry, private->size) |
| 2124 | cleanup_entry(iter, net); | ||
| 2122 | if (private->number > private->initial_entries) | 2125 | if (private->number > private->initial_entries) |
| 2123 | module_put(table_owner); | 2126 | module_put(table_owner); |
| 2124 | xt_free_table_info(private); | 2127 | xt_free_table_info(private); |
| @@ -2136,7 +2139,7 @@ icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, | |||
| 2136 | } | 2139 | } |
| 2137 | 2140 | ||
| 2138 | static bool | 2141 | static bool |
| 2139 | icmp_match(const struct sk_buff *skb, const struct xt_match_param *par) | 2142 | icmp_match(const struct sk_buff *skb, struct xt_action_param *par) |
| 2140 | { | 2143 | { |
| 2141 | const struct icmphdr *ic; | 2144 | const struct icmphdr *ic; |
| 2142 | struct icmphdr _icmph; | 2145 | struct icmphdr _icmph; |
| @@ -2152,7 +2155,7 @@ icmp_match(const struct sk_buff *skb, const struct xt_match_param *par) | |||
| 2152 | * can't. Hence, no choice but to drop. | 2155 | * can't. Hence, no choice but to drop. |
| 2153 | */ | 2156 | */ |
| 2154 | duprintf("Dropping evil ICMP tinygram.\n"); | 2157 | duprintf("Dropping evil ICMP tinygram.\n"); |
| 2155 | *par->hotdrop = true; | 2158 | par->hotdrop = true; |
| 2156 | return false; | 2159 | return false; |
| 2157 | } | 2160 | } |
| 2158 | 2161 | ||
| @@ -2163,31 +2166,31 @@ icmp_match(const struct sk_buff *skb, const struct xt_match_param *par) | |||
| 2163 | !!(icmpinfo->invflags&IPT_ICMP_INV)); | 2166 | !!(icmpinfo->invflags&IPT_ICMP_INV)); |
| 2164 | } | 2167 | } |
| 2165 | 2168 | ||
| 2166 | static bool icmp_checkentry(const struct xt_mtchk_param *par) | 2169 | static int icmp_checkentry(const struct xt_mtchk_param *par) |
| 2167 | { | 2170 | { |
| 2168 | const struct ipt_icmp *icmpinfo = par->matchinfo; | 2171 | const struct ipt_icmp *icmpinfo = par->matchinfo; |
| 2169 | 2172 | ||
| 2170 | /* Must specify no unknown invflags */ | 2173 | /* Must specify no unknown invflags */ |
| 2171 | return !(icmpinfo->invflags & ~IPT_ICMP_INV); | 2174 | return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0; |
| 2172 | } | 2175 | } |
| 2173 | 2176 | ||
| 2174 | /* The built-in targets: standard (NULL) and error. */ | 2177 | static struct xt_target ipt_builtin_tg[] __read_mostly = { |
| 2175 | static struct xt_target ipt_standard_target __read_mostly = { | 2178 | { |
| 2176 | .name = IPT_STANDARD_TARGET, | 2179 | .name = IPT_STANDARD_TARGET, |
| 2177 | .targetsize = sizeof(int), | 2180 | .targetsize = sizeof(int), |
| 2178 | .family = NFPROTO_IPV4, | 2181 | .family = NFPROTO_IPV4, |
| 2179 | #ifdef CONFIG_COMPAT | 2182 | #ifdef CONFIG_COMPAT |
| 2180 | .compatsize = sizeof(compat_int_t), | 2183 | .compatsize = sizeof(compat_int_t), |
| 2181 | .compat_from_user = compat_standard_from_user, | 2184 | .compat_from_user = compat_standard_from_user, |
| 2182 | .compat_to_user = compat_standard_to_user, | 2185 | .compat_to_user = compat_standard_to_user, |
| 2183 | #endif | 2186 | #endif |
| 2184 | }; | 2187 | }, |
| 2185 | 2188 | { | |
| 2186 | static struct xt_target ipt_error_target __read_mostly = { | 2189 | .name = IPT_ERROR_TARGET, |
| 2187 | .name = IPT_ERROR_TARGET, | 2190 | .target = ipt_error, |
| 2188 | .target = ipt_error, | 2191 | .targetsize = IPT_FUNCTION_MAXNAMELEN, |
| 2189 | .targetsize = IPT_FUNCTION_MAXNAMELEN, | 2192 | .family = NFPROTO_IPV4, |
| 2190 | .family = NFPROTO_IPV4, | 2193 | }, |
| 2191 | }; | 2194 | }; |
| 2192 | 2195 | ||
| 2193 | static struct nf_sockopt_ops ipt_sockopts = { | 2196 | static struct nf_sockopt_ops ipt_sockopts = { |
| @@ -2207,13 +2210,15 @@ static struct nf_sockopt_ops ipt_sockopts = { | |||
| 2207 | .owner = THIS_MODULE, | 2210 | .owner = THIS_MODULE, |
| 2208 | }; | 2211 | }; |
| 2209 | 2212 | ||
| 2210 | static struct xt_match icmp_matchstruct __read_mostly = { | 2213 | static struct xt_match ipt_builtin_mt[] __read_mostly = { |
| 2211 | .name = "icmp", | 2214 | { |
| 2212 | .match = icmp_match, | 2215 | .name = "icmp", |
| 2213 | .matchsize = sizeof(struct ipt_icmp), | 2216 | .match = icmp_match, |
| 2214 | .checkentry = icmp_checkentry, | 2217 | .matchsize = sizeof(struct ipt_icmp), |
| 2215 | .proto = IPPROTO_ICMP, | 2218 | .checkentry = icmp_checkentry, |
| 2216 | .family = NFPROTO_IPV4, | 2219 | .proto = IPPROTO_ICMP, |
| 2220 | .family = NFPROTO_IPV4, | ||
| 2221 | }, | ||
| 2217 | }; | 2222 | }; |
| 2218 | 2223 | ||
| 2219 | static int __net_init ip_tables_net_init(struct net *net) | 2224 | static int __net_init ip_tables_net_init(struct net *net) |
| @@ -2240,13 +2245,10 @@ static int __init ip_tables_init(void) | |||
| 2240 | goto err1; | 2245 | goto err1; |
| 2241 | 2246 | ||
| 2242 | /* Noone else will be downing sem now, so we won't sleep */ | 2247 | /* Noone else will be downing sem now, so we won't sleep */ |
| 2243 | ret = xt_register_target(&ipt_standard_target); | 2248 | ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); |
| 2244 | if (ret < 0) | 2249 | if (ret < 0) |
| 2245 | goto err2; | 2250 | goto err2; |
| 2246 | ret = xt_register_target(&ipt_error_target); | 2251 | ret = xt_register_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt)); |
| 2247 | if (ret < 0) | ||
| 2248 | goto err3; | ||
| 2249 | ret = xt_register_match(&icmp_matchstruct); | ||
| 2250 | if (ret < 0) | 2252 | if (ret < 0) |
| 2251 | goto err4; | 2253 | goto err4; |
| 2252 | 2254 | ||
| @@ -2255,15 +2257,13 @@ static int __init ip_tables_init(void) | |||
| 2255 | if (ret < 0) | 2257 | if (ret < 0) |
| 2256 | goto err5; | 2258 | goto err5; |
| 2257 | 2259 | ||
| 2258 | printk(KERN_INFO "ip_tables: (C) 2000-2006 Netfilter Core Team\n"); | 2260 | pr_info("(C) 2000-2006 Netfilter Core Team\n"); |
| 2259 | return 0; | 2261 | return 0; |
| 2260 | 2262 | ||
| 2261 | err5: | 2263 | err5: |
| 2262 | xt_unregister_match(&icmp_matchstruct); | 2264 | xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt)); |
| 2263 | err4: | 2265 | err4: |
| 2264 | xt_unregister_target(&ipt_error_target); | 2266 | xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); |
| 2265 | err3: | ||
| 2266 | xt_unregister_target(&ipt_standard_target); | ||
| 2267 | err2: | 2267 | err2: |
| 2268 | unregister_pernet_subsys(&ip_tables_net_ops); | 2268 | unregister_pernet_subsys(&ip_tables_net_ops); |
| 2269 | err1: | 2269 | err1: |
| @@ -2274,10 +2274,8 @@ static void __exit ip_tables_fini(void) | |||
| 2274 | { | 2274 | { |
| 2275 | nf_unregister_sockopt(&ipt_sockopts); | 2275 | nf_unregister_sockopt(&ipt_sockopts); |
| 2276 | 2276 | ||
| 2277 | xt_unregister_match(&icmp_matchstruct); | 2277 | xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt)); |
| 2278 | xt_unregister_target(&ipt_error_target); | 2278 | xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); |
| 2279 | xt_unregister_target(&ipt_standard_target); | ||
| 2280 | |||
| 2281 | unregister_pernet_subsys(&ip_tables_net_ops); | 2279 | unregister_pernet_subsys(&ip_tables_net_ops); |
| 2282 | } | 2280 | } |
| 2283 | 2281 | ||
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 40ca2d240abb..3a43cf36db87 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
| @@ -9,11 +9,13 @@ | |||
| 9 | * published by the Free Software Foundation. | 9 | * published by the Free Software Foundation. |
| 10 | * | 10 | * |
| 11 | */ | 11 | */ |
| 12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 12 | #include <linux/module.h> | 13 | #include <linux/module.h> |
| 13 | #include <linux/proc_fs.h> | 14 | #include <linux/proc_fs.h> |
| 14 | #include <linux/jhash.h> | 15 | #include <linux/jhash.h> |
| 15 | #include <linux/bitops.h> | 16 | #include <linux/bitops.h> |
| 16 | #include <linux/skbuff.h> | 17 | #include <linux/skbuff.h> |
| 18 | #include <linux/slab.h> | ||
| 17 | #include <linux/ip.h> | 19 | #include <linux/ip.h> |
| 18 | #include <linux/tcp.h> | 20 | #include <linux/tcp.h> |
| 19 | #include <linux/udp.h> | 21 | #include <linux/udp.h> |
| @@ -51,12 +53,13 @@ struct clusterip_config { | |||
| 51 | #endif | 53 | #endif |
| 52 | enum clusterip_hashmode hash_mode; /* which hashing mode */ | 54 | enum clusterip_hashmode hash_mode; /* which hashing mode */ |
| 53 | u_int32_t hash_initval; /* hash initialization */ | 55 | u_int32_t hash_initval; /* hash initialization */ |
| 56 | struct rcu_head rcu; | ||
| 54 | }; | 57 | }; |
| 55 | 58 | ||
| 56 | static LIST_HEAD(clusterip_configs); | 59 | static LIST_HEAD(clusterip_configs); |
| 57 | 60 | ||
| 58 | /* clusterip_lock protects the clusterip_configs list */ | 61 | /* clusterip_lock protects the clusterip_configs list */ |
| 59 | static DEFINE_RWLOCK(clusterip_lock); | 62 | static DEFINE_SPINLOCK(clusterip_lock); |
| 60 | 63 | ||
| 61 | #ifdef CONFIG_PROC_FS | 64 | #ifdef CONFIG_PROC_FS |
| 62 | static const struct file_operations clusterip_proc_fops; | 65 | static const struct file_operations clusterip_proc_fops; |
| @@ -69,11 +72,17 @@ clusterip_config_get(struct clusterip_config *c) | |||
| 69 | atomic_inc(&c->refcount); | 72 | atomic_inc(&c->refcount); |
| 70 | } | 73 | } |
| 71 | 74 | ||
| 75 | |||
| 76 | static void clusterip_config_rcu_free(struct rcu_head *head) | ||
| 77 | { | ||
| 78 | kfree(container_of(head, struct clusterip_config, rcu)); | ||
| 79 | } | ||
| 80 | |||
| 72 | static inline void | 81 | static inline void |
| 73 | clusterip_config_put(struct clusterip_config *c) | 82 | clusterip_config_put(struct clusterip_config *c) |
| 74 | { | 83 | { |
| 75 | if (atomic_dec_and_test(&c->refcount)) | 84 | if (atomic_dec_and_test(&c->refcount)) |
| 76 | kfree(c); | 85 | call_rcu_bh(&c->rcu, clusterip_config_rcu_free); |
| 77 | } | 86 | } |
| 78 | 87 | ||
| 79 | /* decrease the count of entries using/referencing this config. If last | 88 | /* decrease the count of entries using/referencing this config. If last |
| @@ -82,12 +91,13 @@ clusterip_config_put(struct clusterip_config *c) | |||
| 82 | static inline void | 91 | static inline void |
| 83 | clusterip_config_entry_put(struct clusterip_config *c) | 92 | clusterip_config_entry_put(struct clusterip_config *c) |
| 84 | { | 93 | { |
| 85 | write_lock_bh(&clusterip_lock); | 94 | local_bh_disable(); |
| 86 | if (atomic_dec_and_test(&c->entries)) { | 95 | if (atomic_dec_and_lock(&c->entries, &clusterip_lock)) { |
| 87 | list_del(&c->list); | 96 | list_del_rcu(&c->list); |
| 88 | write_unlock_bh(&clusterip_lock); | 97 | spin_unlock(&clusterip_lock); |
| 98 | local_bh_enable(); | ||
| 89 | 99 | ||
| 90 | dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); | 100 | dev_mc_del(c->dev, c->clustermac); |
| 91 | dev_put(c->dev); | 101 | dev_put(c->dev); |
| 92 | 102 | ||
| 93 | /* In case anyone still accesses the file, the open/close | 103 | /* In case anyone still accesses the file, the open/close |
| @@ -98,7 +108,7 @@ clusterip_config_entry_put(struct clusterip_config *c) | |||
| 98 | #endif | 108 | #endif |
| 99 | return; | 109 | return; |
| 100 | } | 110 | } |
| 101 | write_unlock_bh(&clusterip_lock); | 111 | local_bh_enable(); |
| 102 | } | 112 | } |
| 103 | 113 | ||
| 104 | static struct clusterip_config * | 114 | static struct clusterip_config * |
| @@ -106,7 +116,7 @@ __clusterip_config_find(__be32 clusterip) | |||
| 106 | { | 116 | { |
| 107 | struct clusterip_config *c; | 117 | struct clusterip_config *c; |
| 108 | 118 | ||
| 109 | list_for_each_entry(c, &clusterip_configs, list) { | 119 | list_for_each_entry_rcu(c, &clusterip_configs, list) { |
| 110 | if (c->clusterip == clusterip) | 120 | if (c->clusterip == clusterip) |
| 111 | return c; | 121 | return c; |
| 112 | } | 122 | } |
| @@ -119,16 +129,15 @@ clusterip_config_find_get(__be32 clusterip, int entry) | |||
| 119 | { | 129 | { |
| 120 | struct clusterip_config *c; | 130 | struct clusterip_config *c; |
| 121 | 131 | ||
| 122 | read_lock_bh(&clusterip_lock); | 132 | rcu_read_lock_bh(); |
| 123 | c = __clusterip_config_find(clusterip); | 133 | c = __clusterip_config_find(clusterip); |
| 124 | if (!c) { | 134 | if (c) { |
| 125 | read_unlock_bh(&clusterip_lock); | 135 | if (unlikely(!atomic_inc_not_zero(&c->refcount))) |
| 126 | return NULL; | 136 | c = NULL; |
| 137 | else if (entry) | ||
| 138 | atomic_inc(&c->entries); | ||
| 127 | } | 139 | } |
| 128 | atomic_inc(&c->refcount); | 140 | rcu_read_unlock_bh(); |
| 129 | if (entry) | ||
| 130 | atomic_inc(&c->entries); | ||
| 131 | read_unlock_bh(&clusterip_lock); | ||
| 132 | 141 | ||
| 133 | return c; | 142 | return c; |
| 134 | } | 143 | } |
| @@ -179,9 +188,9 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, | |||
| 179 | } | 188 | } |
| 180 | #endif | 189 | #endif |
| 181 | 190 | ||
| 182 | write_lock_bh(&clusterip_lock); | 191 | spin_lock_bh(&clusterip_lock); |
| 183 | list_add(&c->list, &clusterip_configs); | 192 | list_add_rcu(&c->list, &clusterip_configs); |
| 184 | write_unlock_bh(&clusterip_lock); | 193 | spin_unlock_bh(&clusterip_lock); |
| 185 | 194 | ||
| 186 | return c; | 195 | return c; |
| 187 | } | 196 | } |
| @@ -238,8 +247,7 @@ clusterip_hashfn(const struct sk_buff *skb, | |||
| 238 | break; | 247 | break; |
| 239 | default: | 248 | default: |
| 240 | if (net_ratelimit()) | 249 | if (net_ratelimit()) |
| 241 | printk(KERN_NOTICE "CLUSTERIP: unknown protocol `%u'\n", | 250 | pr_info("unknown protocol %u\n", iph->protocol); |
| 242 | iph->protocol); | ||
| 243 | sport = dport = 0; | 251 | sport = dport = 0; |
| 244 | } | 252 | } |
| 245 | 253 | ||
| @@ -261,7 +269,7 @@ clusterip_hashfn(const struct sk_buff *skb, | |||
| 261 | hashval = 0; | 269 | hashval = 0; |
| 262 | /* This cannot happen, unless the check function wasn't called | 270 | /* This cannot happen, unless the check function wasn't called |
| 263 | * at rule load time */ | 271 | * at rule load time */ |
| 264 | printk("CLUSTERIP: unknown mode `%u'\n", config->hash_mode); | 272 | pr_info("unknown mode %u\n", config->hash_mode); |
| 265 | BUG(); | 273 | BUG(); |
| 266 | break; | 274 | break; |
| 267 | } | 275 | } |
| @@ -281,7 +289,7 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) | |||
| 281 | ***********************************************************************/ | 289 | ***********************************************************************/ |
| 282 | 290 | ||
| 283 | static unsigned int | 291 | static unsigned int |
| 284 | clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par) | 292 | clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) |
| 285 | { | 293 | { |
| 286 | const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; | 294 | const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; |
| 287 | struct nf_conn *ct; | 295 | struct nf_conn *ct; |
| @@ -294,7 +302,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par) | |||
| 294 | 302 | ||
| 295 | ct = nf_ct_get(skb, &ctinfo); | 303 | ct = nf_ct_get(skb, &ctinfo); |
| 296 | if (ct == NULL) { | 304 | if (ct == NULL) { |
| 297 | printk(KERN_ERR "CLUSTERIP: no conntrack!\n"); | 305 | pr_info("no conntrack!\n"); |
| 298 | /* FIXME: need to drop invalid ones, since replies | 306 | /* FIXME: need to drop invalid ones, since replies |
| 299 | * to outgoing connections of other nodes will be | 307 | * to outgoing connections of other nodes will be |
| 300 | * marked as INVALID */ | 308 | * marked as INVALID */ |
| @@ -347,25 +355,24 @@ clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par) | |||
| 347 | return XT_CONTINUE; | 355 | return XT_CONTINUE; |
| 348 | } | 356 | } |
| 349 | 357 | ||
| 350 | static bool clusterip_tg_check(const struct xt_tgchk_param *par) | 358 | static int clusterip_tg_check(const struct xt_tgchk_param *par) |
| 351 | { | 359 | { |
| 352 | struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; | 360 | struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; |
| 353 | const struct ipt_entry *e = par->entryinfo; | 361 | const struct ipt_entry *e = par->entryinfo; |
| 354 | |||
| 355 | struct clusterip_config *config; | 362 | struct clusterip_config *config; |
| 363 | int ret; | ||
| 356 | 364 | ||
| 357 | if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP && | 365 | if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP && |
| 358 | cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT && | 366 | cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT && |
| 359 | cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { | 367 | cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { |
| 360 | printk(KERN_WARNING "CLUSTERIP: unknown mode `%u'\n", | 368 | pr_info("unknown mode %u\n", cipinfo->hash_mode); |
| 361 | cipinfo->hash_mode); | 369 | return -EINVAL; |
| 362 | return false; | ||
| 363 | 370 | ||
| 364 | } | 371 | } |
| 365 | if (e->ip.dmsk.s_addr != htonl(0xffffffff) || | 372 | if (e->ip.dmsk.s_addr != htonl(0xffffffff) || |
| 366 | e->ip.dst.s_addr == 0) { | 373 | e->ip.dst.s_addr == 0) { |
| 367 | printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n"); | 374 | pr_info("Please specify destination IP\n"); |
| 368 | return false; | 375 | return -EINVAL; |
| 369 | } | 376 | } |
| 370 | 377 | ||
| 371 | /* FIXME: further sanity checks */ | 378 | /* FIXME: further sanity checks */ |
| @@ -373,41 +380,41 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par) | |||
| 373 | config = clusterip_config_find_get(e->ip.dst.s_addr, 1); | 380 | config = clusterip_config_find_get(e->ip.dst.s_addr, 1); |
| 374 | if (!config) { | 381 | if (!config) { |
| 375 | if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { | 382 | if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { |
| 376 | printk(KERN_WARNING "CLUSTERIP: no config found for %pI4, need 'new'\n", &e->ip.dst.s_addr); | 383 | pr_info("no config found for %pI4, need 'new'\n", |
| 377 | return false; | 384 | &e->ip.dst.s_addr); |
| 385 | return -EINVAL; | ||
| 378 | } else { | 386 | } else { |
| 379 | struct net_device *dev; | 387 | struct net_device *dev; |
| 380 | 388 | ||
| 381 | if (e->ip.iniface[0] == '\0') { | 389 | if (e->ip.iniface[0] == '\0') { |
| 382 | printk(KERN_WARNING "CLUSTERIP: Please specify an interface name\n"); | 390 | pr_info("Please specify an interface name\n"); |
| 383 | return false; | 391 | return -EINVAL; |
| 384 | } | 392 | } |
| 385 | 393 | ||
| 386 | dev = dev_get_by_name(&init_net, e->ip.iniface); | 394 | dev = dev_get_by_name(&init_net, e->ip.iniface); |
| 387 | if (!dev) { | 395 | if (!dev) { |
| 388 | printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface); | 396 | pr_info("no such interface %s\n", |
| 389 | return false; | 397 | e->ip.iniface); |
| 398 | return -ENOENT; | ||
| 390 | } | 399 | } |
| 391 | 400 | ||
| 392 | config = clusterip_config_init(cipinfo, | 401 | config = clusterip_config_init(cipinfo, |
| 393 | e->ip.dst.s_addr, dev); | 402 | e->ip.dst.s_addr, dev); |
| 394 | if (!config) { | 403 | if (!config) { |
| 395 | printk(KERN_WARNING "CLUSTERIP: cannot allocate config\n"); | 404 | pr_info("cannot allocate config\n"); |
| 396 | dev_put(dev); | 405 | dev_put(dev); |
| 397 | return false; | 406 | return -ENOMEM; |
| 398 | } | 407 | } |
| 399 | dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); | 408 | dev_mc_add(config->dev, config->clustermac); |
| 400 | } | 409 | } |
| 401 | } | 410 | } |
| 402 | cipinfo->config = config; | 411 | cipinfo->config = config; |
| 403 | 412 | ||
| 404 | if (nf_ct_l3proto_try_module_get(par->target->family) < 0) { | 413 | ret = nf_ct_l3proto_try_module_get(par->family); |
| 405 | printk(KERN_WARNING "can't load conntrack support for " | 414 | if (ret < 0) |
| 406 | "proto=%u\n", par->target->family); | 415 | pr_info("cannot load conntrack support for proto=%u\n", |
| 407 | return false; | 416 | par->family); |
| 408 | } | 417 | return ret; |
| 409 | |||
| 410 | return true; | ||
| 411 | } | 418 | } |
| 412 | 419 | ||
| 413 | /* drop reference count of cluster config when rule is deleted */ | 420 | /* drop reference count of cluster config when rule is deleted */ |
| @@ -421,7 +428,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) | |||
| 421 | 428 | ||
| 422 | clusterip_config_put(cipinfo->config); | 429 | clusterip_config_put(cipinfo->config); |
| 423 | 430 | ||
| 424 | nf_ct_l3proto_module_put(par->target->family); | 431 | nf_ct_l3proto_module_put(par->family); |
| 425 | } | 432 | } |
| 426 | 433 | ||
| 427 | #ifdef CONFIG_COMPAT | 434 | #ifdef CONFIG_COMPAT |
| @@ -462,7 +469,7 @@ struct arp_payload { | |||
| 462 | __be32 src_ip; | 469 | __be32 src_ip; |
| 463 | u_int8_t dst_hw[ETH_ALEN]; | 470 | u_int8_t dst_hw[ETH_ALEN]; |
| 464 | __be32 dst_ip; | 471 | __be32 dst_ip; |
| 465 | } __attribute__ ((packed)); | 472 | } __packed; |
| 466 | 473 | ||
| 467 | #ifdef DEBUG | 474 | #ifdef DEBUG |
| 468 | static void arp_print(struct arp_payload *payload) | 475 | static void arp_print(struct arp_payload *payload) |
| @@ -478,8 +485,8 @@ static void arp_print(struct arp_payload *payload) | |||
| 478 | } | 485 | } |
| 479 | hbuffer[--k]='\0'; | 486 | hbuffer[--k]='\0'; |
| 480 | 487 | ||
| 481 | printk("src %pI4@%s, dst %pI4\n", | 488 | pr_debug("src %pI4@%s, dst %pI4\n", |
| 482 | &payload->src_ip, hbuffer, &payload->dst_ip); | 489 | &payload->src_ip, hbuffer, &payload->dst_ip); |
| 483 | } | 490 | } |
| 484 | #endif | 491 | #endif |
| 485 | 492 | ||
| @@ -518,7 +525,7 @@ arp_mangle(unsigned int hook, | |||
| 518 | * this wouldn't work, since we didn't subscribe the mcast group on | 525 | * this wouldn't work, since we didn't subscribe the mcast group on |
| 519 | * other interfaces */ | 526 | * other interfaces */ |
| 520 | if (c->dev != out) { | 527 | if (c->dev != out) { |
| 521 | pr_debug("CLUSTERIP: not mangling arp reply on different " | 528 | pr_debug("not mangling arp reply on different " |
| 522 | "interface: cip'%s'-skb'%s'\n", | 529 | "interface: cip'%s'-skb'%s'\n", |
| 523 | c->dev->name, out->name); | 530 | c->dev->name, out->name); |
| 524 | clusterip_config_put(c); | 531 | clusterip_config_put(c); |
| @@ -529,7 +536,7 @@ arp_mangle(unsigned int hook, | |||
| 529 | memcpy(payload->src_hw, c->clustermac, arp->ar_hln); | 536 | memcpy(payload->src_hw, c->clustermac, arp->ar_hln); |
| 530 | 537 | ||
| 531 | #ifdef DEBUG | 538 | #ifdef DEBUG |
| 532 | pr_debug(KERN_DEBUG "CLUSTERIP mangled arp reply: "); | 539 | pr_debug("mangled arp reply: "); |
| 533 | arp_print(payload); | 540 | arp_print(payload); |
| 534 | #endif | 541 | #endif |
| 535 | 542 | ||
| @@ -560,8 +567,7 @@ struct clusterip_seq_position { | |||
| 560 | 567 | ||
| 561 | static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) | 568 | static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) |
| 562 | { | 569 | { |
| 563 | const struct proc_dir_entry *pde = s->private; | 570 | struct clusterip_config *c = s->private; |
| 564 | struct clusterip_config *c = pde->data; | ||
| 565 | unsigned int weight; | 571 | unsigned int weight; |
| 566 | u_int32_t local_nodes; | 572 | u_int32_t local_nodes; |
| 567 | struct clusterip_seq_position *idx; | 573 | struct clusterip_seq_position *idx; |
| @@ -601,7 +607,8 @@ static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) | |||
| 601 | 607 | ||
| 602 | static void clusterip_seq_stop(struct seq_file *s, void *v) | 608 | static void clusterip_seq_stop(struct seq_file *s, void *v) |
| 603 | { | 609 | { |
| 604 | kfree(v); | 610 | if (!IS_ERR(v)) |
| 611 | kfree(v); | ||
| 605 | } | 612 | } |
| 606 | 613 | ||
| 607 | static int clusterip_seq_show(struct seq_file *s, void *v) | 614 | static int clusterip_seq_show(struct seq_file *s, void *v) |
| @@ -632,10 +639,9 @@ static int clusterip_proc_open(struct inode *inode, struct file *file) | |||
| 632 | 639 | ||
| 633 | if (!ret) { | 640 | if (!ret) { |
| 634 | struct seq_file *sf = file->private_data; | 641 | struct seq_file *sf = file->private_data; |
| 635 | struct proc_dir_entry *pde = PDE(inode); | 642 | struct clusterip_config *c = PDE(inode)->data; |
| 636 | struct clusterip_config *c = pde->data; | ||
| 637 | 643 | ||
| 638 | sf->private = pde; | 644 | sf->private = c; |
| 639 | 645 | ||
| 640 | clusterip_config_get(c); | 646 | clusterip_config_get(c); |
| 641 | } | 647 | } |
| @@ -645,8 +651,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file) | |||
| 645 | 651 | ||
| 646 | static int clusterip_proc_release(struct inode *inode, struct file *file) | 652 | static int clusterip_proc_release(struct inode *inode, struct file *file) |
| 647 | { | 653 | { |
| 648 | struct proc_dir_entry *pde = PDE(inode); | 654 | struct clusterip_config *c = PDE(inode)->data; |
| 649 | struct clusterip_config *c = pde->data; | ||
| 650 | int ret; | 655 | int ret; |
| 651 | 656 | ||
| 652 | ret = seq_release(inode, file); | 657 | ret = seq_release(inode, file); |
| @@ -660,10 +665,9 @@ static int clusterip_proc_release(struct inode *inode, struct file *file) | |||
| 660 | static ssize_t clusterip_proc_write(struct file *file, const char __user *input, | 665 | static ssize_t clusterip_proc_write(struct file *file, const char __user *input, |
| 661 | size_t size, loff_t *ofs) | 666 | size_t size, loff_t *ofs) |
| 662 | { | 667 | { |
| 668 | struct clusterip_config *c = PDE(file->f_path.dentry->d_inode)->data; | ||
| 663 | #define PROC_WRITELEN 10 | 669 | #define PROC_WRITELEN 10 |
| 664 | char buffer[PROC_WRITELEN+1]; | 670 | char buffer[PROC_WRITELEN+1]; |
| 665 | const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
| 666 | struct clusterip_config *c = pde->data; | ||
| 667 | unsigned long nodenum; | 671 | unsigned long nodenum; |
| 668 | 672 | ||
| 669 | if (copy_from_user(buffer, input, PROC_WRITELEN)) | 673 | if (copy_from_user(buffer, input, PROC_WRITELEN)) |
| @@ -709,13 +713,13 @@ static int __init clusterip_tg_init(void) | |||
| 709 | #ifdef CONFIG_PROC_FS | 713 | #ifdef CONFIG_PROC_FS |
| 710 | clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net); | 714 | clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net); |
| 711 | if (!clusterip_procdir) { | 715 | if (!clusterip_procdir) { |
| 712 | printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n"); | 716 | pr_err("Unable to proc dir entry\n"); |
| 713 | ret = -ENOMEM; | 717 | ret = -ENOMEM; |
| 714 | goto cleanup_hook; | 718 | goto cleanup_hook; |
| 715 | } | 719 | } |
| 716 | #endif /* CONFIG_PROC_FS */ | 720 | #endif /* CONFIG_PROC_FS */ |
| 717 | 721 | ||
| 718 | printk(KERN_NOTICE "ClusterIP Version %s loaded successfully\n", | 722 | pr_info("ClusterIP Version %s loaded successfully\n", |
| 719 | CLUSTERIP_VERSION); | 723 | CLUSTERIP_VERSION); |
| 720 | return 0; | 724 | return 0; |
| 721 | 725 | ||
| @@ -730,13 +734,15 @@ cleanup_target: | |||
| 730 | 734 | ||
| 731 | static void __exit clusterip_tg_exit(void) | 735 | static void __exit clusterip_tg_exit(void) |
| 732 | { | 736 | { |
| 733 | printk(KERN_NOTICE "ClusterIP Version %s unloading\n", | 737 | pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION); |
| 734 | CLUSTERIP_VERSION); | ||
| 735 | #ifdef CONFIG_PROC_FS | 738 | #ifdef CONFIG_PROC_FS |
| 736 | remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent); | 739 | remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent); |
| 737 | #endif | 740 | #endif |
| 738 | nf_unregister_hook(&cip_arp_ops); | 741 | nf_unregister_hook(&cip_arp_ops); |
| 739 | xt_unregister_target(&clusterip_tg_reg); | 742 | xt_unregister_target(&clusterip_tg_reg); |
| 743 | |||
| 744 | /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */ | ||
| 745 | rcu_barrier_bh(); | ||
| 740 | } | 746 | } |
| 741 | 747 | ||
| 742 | module_init(clusterip_tg_init); | 748 | module_init(clusterip_tg_init); |
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index ea5cea2415c1..4bf3dc49ad1e 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | * it under the terms of the GNU General Public License version 2 as | 6 | * it under the terms of the GNU General Public License version 2 as |
| 7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
| 8 | */ | 8 | */ |
| 9 | 9 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
| 10 | #include <linux/in.h> | 10 | #include <linux/in.h> |
| 11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
| 12 | #include <linux/skbuff.h> | 12 | #include <linux/skbuff.h> |
| @@ -77,7 +77,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) | |||
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | static unsigned int | 79 | static unsigned int |
| 80 | ecn_tg(struct sk_buff *skb, const struct xt_target_param *par) | 80 | ecn_tg(struct sk_buff *skb, const struct xt_action_param *par) |
| 81 | { | 81 | { |
| 82 | const struct ipt_ECN_info *einfo = par->targinfo; | 82 | const struct ipt_ECN_info *einfo = par->targinfo; |
| 83 | 83 | ||
| @@ -93,28 +93,25 @@ ecn_tg(struct sk_buff *skb, const struct xt_target_param *par) | |||
| 93 | return XT_CONTINUE; | 93 | return XT_CONTINUE; |
| 94 | } | 94 | } |
| 95 | 95 | ||
| 96 | static bool ecn_tg_check(const struct xt_tgchk_param *par) | 96 | static int ecn_tg_check(const struct xt_tgchk_param *par) |
| 97 | { | 97 | { |
| 98 | const struct ipt_ECN_info *einfo = par->targinfo; | 98 | const struct ipt_ECN_info *einfo = par->targinfo; |
| 99 | const struct ipt_entry *e = par->entryinfo; | 99 | const struct ipt_entry *e = par->entryinfo; |
| 100 | 100 | ||
| 101 | if (einfo->operation & IPT_ECN_OP_MASK) { | 101 | if (einfo->operation & IPT_ECN_OP_MASK) { |
| 102 | printk(KERN_WARNING "ECN: unsupported ECN operation %x\n", | 102 | pr_info("unsupported ECN operation %x\n", einfo->operation); |
| 103 | einfo->operation); | 103 | return -EINVAL; |
| 104 | return false; | ||
| 105 | } | 104 | } |
| 106 | if (einfo->ip_ect & ~IPT_ECN_IP_MASK) { | 105 | if (einfo->ip_ect & ~IPT_ECN_IP_MASK) { |
| 107 | printk(KERN_WARNING "ECN: new ECT codepoint %x out of mask\n", | 106 | pr_info("new ECT codepoint %x out of mask\n", einfo->ip_ect); |
| 108 | einfo->ip_ect); | 107 | return -EINVAL; |
| 109 | return false; | ||
| 110 | } | 108 | } |
| 111 | if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) && | 109 | if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) && |
| 112 | (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { | 110 | (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { |
| 113 | printk(KERN_WARNING "ECN: cannot use TCP operations on a " | 111 | pr_info("cannot use TCP operations on a non-tcp rule\n"); |
| 114 | "non-tcp rule\n"); | 112 | return -EINVAL; |
| 115 | return false; | ||
| 116 | } | 113 | } |
| 117 | return true; | 114 | return 0; |
| 118 | } | 115 | } |
| 119 | 116 | ||
| 120 | static struct xt_target ecn_tg_reg __read_mostly = { | 117 | static struct xt_target ecn_tg_reg __read_mostly = { |
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index ee128efa1c8d..915fc17d7ce2 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c | |||
| @@ -9,10 +9,11 @@ | |||
| 9 | * it under the terms of the GNU General Public License version 2 as | 9 | * it under the terms of the GNU General Public License version 2 as |
| 10 | * published by the Free Software Foundation. | 10 | * published by the Free Software Foundation. |
| 11 | */ | 11 | */ |
| 12 | 12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
| 13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
| 14 | #include <linux/spinlock.h> | 14 | #include <linux/spinlock.h> |
| 15 | #include <linux/skbuff.h> | 15 | #include <linux/skbuff.h> |
| 16 | #include <linux/if_arp.h> | ||
| 16 | #include <linux/ip.h> | 17 | #include <linux/ip.h> |
| 17 | #include <net/icmp.h> | 18 | #include <net/icmp.h> |
| 18 | #include <net/udp.h> | 19 | #include <net/udp.h> |
| @@ -363,11 +364,47 @@ static void dump_packet(const struct nf_loginfo *info, | |||
| 363 | /* maxlen = 230+ 91 + 230 + 252 = 803 */ | 364 | /* maxlen = 230+ 91 + 230 + 252 = 803 */ |
| 364 | } | 365 | } |
| 365 | 366 | ||
| 367 | static void dump_mac_header(const struct nf_loginfo *info, | ||
| 368 | const struct sk_buff *skb) | ||
| 369 | { | ||
| 370 | struct net_device *dev = skb->dev; | ||
| 371 | unsigned int logflags = 0; | ||
| 372 | |||
| 373 | if (info->type == NF_LOG_TYPE_LOG) | ||
| 374 | logflags = info->u.log.logflags; | ||
| 375 | |||
| 376 | if (!(logflags & IPT_LOG_MACDECODE)) | ||
| 377 | goto fallback; | ||
| 378 | |||
| 379 | switch (dev->type) { | ||
| 380 | case ARPHRD_ETHER: | ||
| 381 | printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ", | ||
| 382 | eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, | ||
| 383 | ntohs(eth_hdr(skb)->h_proto)); | ||
| 384 | return; | ||
| 385 | default: | ||
| 386 | break; | ||
| 387 | } | ||
| 388 | |||
| 389 | fallback: | ||
| 390 | printk("MAC="); | ||
| 391 | if (dev->hard_header_len && | ||
| 392 | skb->mac_header != skb->network_header) { | ||
| 393 | const unsigned char *p = skb_mac_header(skb); | ||
| 394 | unsigned int i; | ||
| 395 | |||
| 396 | printk("%02x", *p++); | ||
| 397 | for (i = 1; i < dev->hard_header_len; i++, p++) | ||
| 398 | printk(":%02x", *p); | ||
| 399 | } | ||
| 400 | printk(" "); | ||
| 401 | } | ||
| 402 | |||
| 366 | static struct nf_loginfo default_loginfo = { | 403 | static struct nf_loginfo default_loginfo = { |
| 367 | .type = NF_LOG_TYPE_LOG, | 404 | .type = NF_LOG_TYPE_LOG, |
| 368 | .u = { | 405 | .u = { |
| 369 | .log = { | 406 | .log = { |
| 370 | .level = 0, | 407 | .level = 5, |
| 371 | .logflags = NF_LOG_MASK, | 408 | .logflags = NF_LOG_MASK, |
| 372 | }, | 409 | }, |
| 373 | }, | 410 | }, |
| @@ -404,20 +441,9 @@ ipt_log_packet(u_int8_t pf, | |||
| 404 | } | 441 | } |
| 405 | #endif | 442 | #endif |
| 406 | 443 | ||
| 407 | if (in && !out) { | 444 | /* MAC logging for input path only. */ |
| 408 | /* MAC logging for input chain only. */ | 445 | if (in && !out) |
| 409 | printk("MAC="); | 446 | dump_mac_header(loginfo, skb); |
| 410 | if (skb->dev && skb->dev->hard_header_len && | ||
| 411 | skb->mac_header != skb->network_header) { | ||
| 412 | int i; | ||
| 413 | const unsigned char *p = skb_mac_header(skb); | ||
| 414 | for (i = 0; i < skb->dev->hard_header_len; i++,p++) | ||
| 415 | printk("%02x%c", *p, | ||
| 416 | i==skb->dev->hard_header_len - 1 | ||
| 417 | ? ' ':':'); | ||
| 418 | } else | ||
| 419 | printk(" "); | ||
| 420 | } | ||
| 421 | 447 | ||
| 422 | dump_packet(loginfo, skb, 0); | 448 | dump_packet(loginfo, skb, 0); |
| 423 | printk("\n"); | 449 | printk("\n"); |
| @@ -425,7 +451,7 @@ ipt_log_packet(u_int8_t pf, | |||
| 425 | } | 451 | } |
| 426 | 452 | ||
| 427 | static unsigned int | 453 | static unsigned int |
| 428 | log_tg(struct sk_buff *skb, const struct xt_target_param *par) | 454 | log_tg(struct sk_buff *skb, const struct xt_action_param *par) |
| 429 | { | 455 | { |
| 430 | const struct ipt_log_info *loginfo = par->targinfo; | 456 | const struct ipt_log_info *loginfo = par->targinfo; |
| 431 | struct nf_loginfo li; | 457 | struct nf_loginfo li; |
| @@ -439,20 +465,19 @@ log_tg(struct sk_buff *skb, const struct xt_target_param *par) | |||
| 439 | return XT_CONTINUE; | 465 | return XT_CONTINUE; |
| 440 | } | 466 | } |
| 441 | 467 | ||
| 442 | static bool log_tg_check(const struct xt_tgchk_param *par) | 468 | static int log_tg_check(const struct xt_tgchk_param *par) |
| 443 | { | 469 | { |
| 444 | const struct ipt_log_info *loginfo = par->targinfo; | 470 | const struct ipt_log_info *loginfo = par->targinfo; |
| 445 | 471 | ||
| 446 | if (loginfo->level >= 8) { | 472 | if (loginfo->level >= 8) { |
| 447 | pr_debug("LOG: level %u >= 8\n", loginfo->level); | 473 | pr_debug("level %u >= 8\n", loginfo->level); |
| 448 | return false; | 474 | return -EINVAL; |
| 449 | } | 475 | } |
| 450 | if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') { | 476 | if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') { |
| 451 | pr_debug("LOG: prefix term %i\n", | 477 | pr_debug("prefix is not null-terminated\n"); |
| 452 | loginfo->prefix[sizeof(loginfo->prefix)-1]); | 478 | return -EINVAL; |
| 453 | return false; | ||
| 454 | } | 479 | } |
| 455 | return true; | 480 | return 0; |
| 456 | } | 481 | } |
| 457 | 482 | ||
| 458 | static struct xt_target log_tg_reg __read_mostly = { | 483 | static struct xt_target log_tg_reg __read_mostly = { |
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 650b54042b01..d2ed9dc74ebc 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | * it under the terms of the GNU General Public License version 2 as | 8 | * it under the terms of the GNU General Public License version 2 as |
| 9 | * published by the Free Software Foundation. | 9 | * published by the Free Software Foundation. |
| 10 | */ | 10 | */ |
| 11 | 11 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
| 12 | #include <linux/types.h> | 12 | #include <linux/types.h> |
| 13 | #include <linux/inetdevice.h> | 13 | #include <linux/inetdevice.h> |
| 14 | #include <linux/ip.h> | 14 | #include <linux/ip.h> |
| @@ -28,23 +28,23 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); | |||
| 28 | MODULE_DESCRIPTION("Xtables: automatic-address SNAT"); | 28 | MODULE_DESCRIPTION("Xtables: automatic-address SNAT"); |
| 29 | 29 | ||
| 30 | /* FIXME: Multiple targets. --RR */ | 30 | /* FIXME: Multiple targets. --RR */ |
| 31 | static bool masquerade_tg_check(const struct xt_tgchk_param *par) | 31 | static int masquerade_tg_check(const struct xt_tgchk_param *par) |
| 32 | { | 32 | { |
| 33 | const struct nf_nat_multi_range_compat *mr = par->targinfo; | 33 | const struct nf_nat_multi_range_compat *mr = par->targinfo; |
| 34 | 34 | ||
| 35 | if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { | 35 | if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { |
| 36 | pr_debug("masquerade_check: bad MAP_IPS.\n"); | 36 | pr_debug("bad MAP_IPS.\n"); |
| 37 | return false; | 37 | return -EINVAL; |
| 38 | } | 38 | } |
| 39 | if (mr->rangesize != 1) { | 39 | if (mr->rangesize != 1) { |
| 40 | pr_debug("masquerade_check: bad rangesize %u\n", mr->rangesize); | 40 | pr_debug("bad rangesize %u\n", mr->rangesize); |
| 41 | return false; | 41 | return -EINVAL; |
| 42 | } | 42 | } |
| 43 | return true; | 43 | return 0; |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | static unsigned int | 46 | static unsigned int |
| 47 | masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par) | 47 | masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) |
| 48 | { | 48 | { |
| 49 | struct nf_conn *ct; | 49 | struct nf_conn *ct; |
| 50 | struct nf_conn_nat *nat; | 50 | struct nf_conn_nat *nat; |
| @@ -72,7 +72,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par) | |||
| 72 | rt = skb_rtable(skb); | 72 | rt = skb_rtable(skb); |
| 73 | newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE); | 73 | newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE); |
| 74 | if (!newsrc) { | 74 | if (!newsrc) { |
| 75 | printk("MASQUERADE: %s ate my IP address\n", par->out->name); | 75 | pr_info("%s ate my IP address\n", par->out->name); |
| 76 | return NF_DROP; | 76 | return NF_DROP; |
| 77 | } | 77 | } |
| 78 | 78 | ||
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 7c29582d4ec8..6cdb298f1035 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | * it under the terms of the GNU General Public License version 2 as | 9 | * it under the terms of the GNU General Public License version 2 as |
| 10 | * published by the Free Software Foundation. | 10 | * published by the Free Software Foundation. |
| 11 | */ | 11 | */ |
| 12 | 12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
| 13 | #include <linux/ip.h> | 13 | #include <linux/ip.h> |
| 14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
| 15 | #include <linux/netdevice.h> | 15 | #include <linux/netdevice.h> |
| @@ -22,23 +22,23 @@ MODULE_LICENSE("GPL"); | |||
| 22 | MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>"); | 22 | MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>"); |
| 23 | MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets"); | 23 | MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets"); |
| 24 | 24 | ||
| 25 | static bool netmap_tg_check(const struct xt_tgchk_param *par) | 25 | static int netmap_tg_check(const struct xt_tgchk_param *par) |
| 26 | { | 26 | { |
| 27 | const struct nf_nat_multi_range_compat *mr = par->targinfo; | 27 | const struct nf_nat_multi_range_compat *mr = par->targinfo; |
| 28 | 28 | ||
| 29 | if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) { | 29 | if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) { |
| 30 | pr_debug("NETMAP:check: bad MAP_IPS.\n"); | 30 | pr_debug("bad MAP_IPS.\n"); |
| 31 | return false; | 31 | return -EINVAL; |
| 32 | } | 32 | } |
| 33 | if (mr->rangesize != 1) { | 33 | if (mr->rangesize != 1) { |
| 34 | pr_debug("NETMAP:check: bad rangesize %u.\n", mr->rangesize); | 34 | pr_debug("bad rangesize %u.\n", mr->rangesize); |
| 35 | return false; | 35 | return -EINVAL; |
| 36 | } | 36 | } |
| 37 | return true; | 37 | return 0; |
| 38 | } | 38 | } |
| 39 | 39 | ||
| 40 | static unsigned int | 40 | static unsigned int |
| 41 | netmap_tg(struct sk_buff *skb, const struct xt_target_param *par) | 41 | netmap_tg(struct sk_buff *skb, const struct xt_action_param *par) |
| 42 | { | 42 | { |
| 43 | struct nf_conn *ct; | 43 | struct nf_conn *ct; |
| 44 | enum ip_conntrack_info ctinfo; | 44 | enum ip_conntrack_info ctinfo; |
| @@ -48,7 +48,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_target_param *par) | |||
| 48 | 48 | ||
| 49 | NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || | 49 | NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || |
| 50 | par->hooknum == NF_INET_POST_ROUTING || | 50 | par->hooknum == NF_INET_POST_ROUTING || |
| 51 | par->hooknum == NF_INET_LOCAL_OUT); | 51 | par->hooknum == NF_INET_LOCAL_OUT || |
| 52 | par->hooknum == NF_INET_LOCAL_IN); | ||
| 52 | ct = nf_ct_get(skb, &ctinfo); | 53 | ct = nf_ct_get(skb, &ctinfo); |
| 53 | 54 | ||
| 54 | netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); | 55 | netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); |
| @@ -77,7 +78,8 @@ static struct xt_target netmap_tg_reg __read_mostly = { | |||
| 77 | .table = "nat", | 78 | .table = "nat", |
| 78 | .hooks = (1 << NF_INET_PRE_ROUTING) | | 79 | .hooks = (1 << NF_INET_PRE_ROUTING) | |
| 79 | (1 << NF_INET_POST_ROUTING) | | 80 | (1 << NF_INET_POST_ROUTING) | |
| 80 | (1 << NF_INET_LOCAL_OUT), | 81 | (1 << NF_INET_LOCAL_OUT) | |
| 82 | (1 << NF_INET_LOCAL_IN), | ||
| 81 | .checkentry = netmap_tg_check, | 83 | .checkentry = netmap_tg_check, |
| 82 | .me = THIS_MODULE | 84 | .me = THIS_MODULE |
| 83 | }; | 85 | }; |
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 698e5e78685b..18a0656505a0 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | * it under the terms of the GNU General Public License version 2 as | 6 | * it under the terms of the GNU General Public License version 2 as |
| 7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
| 8 | */ | 8 | */ |
| 9 | 9 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
| 10 | #include <linux/types.h> | 10 | #include <linux/types.h> |
| 11 | #include <linux/ip.h> | 11 | #include <linux/ip.h> |
| 12 | #include <linux/timer.h> | 12 | #include <linux/timer.h> |
| @@ -26,23 +26,23 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); | |||
| 26 | MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); | 26 | MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); |
| 27 | 27 | ||
| 28 | /* FIXME: Take multiple ranges --RR */ | 28 | /* FIXME: Take multiple ranges --RR */ |
| 29 | static bool redirect_tg_check(const struct xt_tgchk_param *par) | 29 | static int redirect_tg_check(const struct xt_tgchk_param *par) |
| 30 | { | 30 | { |
| 31 | const struct nf_nat_multi_range_compat *mr = par->targinfo; | 31 | const struct nf_nat_multi_range_compat *mr = par->targinfo; |
| 32 | 32 | ||
| 33 | if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { | 33 | if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { |
| 34 | pr_debug("redirect_check: bad MAP_IPS.\n"); | 34 | pr_debug("bad MAP_IPS.\n"); |
| 35 | return false; | 35 | return -EINVAL; |
| 36 | } | 36 | } |
| 37 | if (mr->rangesize != 1) { | 37 | if (mr->rangesize != 1) { |
| 38 | pr_debug("redirect_check: bad rangesize %u.\n", mr->rangesize); | 38 | pr_debug("bad rangesize %u.\n", mr->rangesize); |
| 39 | return false; | 39 | return -EINVAL; |
| 40 | } | 40 | } |
| 41 | return true; | 41 | return 0; |
| 42 | } | 42 | } |
| 43 | 43 | ||
| 44 | static unsigned int | 44 | static unsigned int |
| 45 | redirect_tg(struct sk_buff *skb, const struct xt_target_param *par) | 45 | redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) |
| 46 | { | 46 | { |
| 47 | struct nf_conn *ct; | 47 | struct nf_conn *ct; |
| 48 | enum ip_conntrack_info ctinfo; | 48 | enum ip_conntrack_info ctinfo; |
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 5113b8f1a379..43eec80c0e7c 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c | |||
| @@ -9,9 +9,10 @@ | |||
| 9 | * it under the terms of the GNU General Public License version 2 as | 9 | * it under the terms of the GNU General Public License version 2 as |
| 10 | * published by the Free Software Foundation. | 10 | * published by the Free Software Foundation. |
| 11 | */ | 11 | */ |
| 12 | 12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
| 13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
| 14 | #include <linux/skbuff.h> | 14 | #include <linux/skbuff.h> |
| 15 | #include <linux/slab.h> | ||
| 15 | #include <linux/ip.h> | 16 | #include <linux/ip.h> |
| 16 | #include <linux/udp.h> | 17 | #include <linux/udp.h> |
| 17 | #include <linux/icmp.h> | 18 | #include <linux/icmp.h> |
| @@ -94,10 +95,11 @@ static void send_reset(struct sk_buff *oldskb, int hook) | |||
| 94 | } | 95 | } |
| 95 | 96 | ||
| 96 | tcph->rst = 1; | 97 | tcph->rst = 1; |
| 97 | tcph->check = tcp_v4_check(sizeof(struct tcphdr), | 98 | tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr, |
| 98 | niph->saddr, niph->daddr, | 99 | niph->daddr, 0); |
| 99 | csum_partial(tcph, | 100 | nskb->ip_summed = CHECKSUM_PARTIAL; |
| 100 | sizeof(struct tcphdr), 0)); | 101 | nskb->csum_start = (unsigned char *)tcph - nskb->head; |
| 102 | nskb->csum_offset = offsetof(struct tcphdr, check); | ||
| 101 | 103 | ||
| 102 | addr_type = RTN_UNSPEC; | 104 | addr_type = RTN_UNSPEC; |
| 103 | if (hook != NF_INET_FORWARD | 105 | if (hook != NF_INET_FORWARD |
| @@ -108,13 +110,13 @@ static void send_reset(struct sk_buff *oldskb, int hook) | |||
| 108 | addr_type = RTN_LOCAL; | 110 | addr_type = RTN_LOCAL; |
| 109 | 111 | ||
| 110 | /* ip_route_me_harder expects skb->dst to be set */ | 112 | /* ip_route_me_harder expects skb->dst to be set */ |
| 111 | skb_dst_set(nskb, dst_clone(skb_dst(oldskb))); | 113 | skb_dst_set_noref(nskb, skb_dst(oldskb)); |
| 112 | 114 | ||
| 115 | nskb->protocol = htons(ETH_P_IP); | ||
| 113 | if (ip_route_me_harder(nskb, addr_type)) | 116 | if (ip_route_me_harder(nskb, addr_type)) |
| 114 | goto free_nskb; | 117 | goto free_nskb; |
| 115 | 118 | ||
| 116 | niph->ttl = dst_metric(skb_dst(nskb), RTAX_HOPLIMIT); | 119 | niph->ttl = dst_metric(skb_dst(nskb), RTAX_HOPLIMIT); |
| 117 | nskb->ip_summed = CHECKSUM_NONE; | ||
| 118 | 120 | ||
| 119 | /* "Never happens" */ | 121 | /* "Never happens" */ |
| 120 | if (nskb->len > dst_mtu(skb_dst(nskb))) | 122 | if (nskb->len > dst_mtu(skb_dst(nskb))) |
| @@ -135,13 +137,10 @@ static inline void send_unreach(struct sk_buff *skb_in, int code) | |||
| 135 | } | 137 | } |
| 136 | 138 | ||
| 137 | static unsigned int | 139 | static unsigned int |
| 138 | reject_tg(struct sk_buff *skb, const struct xt_target_param *par) | 140 | reject_tg(struct sk_buff *skb, const struct xt_action_param *par) |
| 139 | { | 141 | { |
| 140 | const struct ipt_reject_info *reject = par->targinfo; | 142 | const struct ipt_reject_info *reject = par->targinfo; |
| 141 | 143 | ||
| 142 | /* WARNING: This code causes reentry within iptables. | ||
| 143 | This means that the iptables jump stack is now crap. We | ||
| 144 | must return an absolute verdict. --RR */ | ||
| 145 | switch (reject->with) { | 144 | switch (reject->with) { |
| 146 | case IPT_ICMP_NET_UNREACHABLE: | 145 | case IPT_ICMP_NET_UNREACHABLE: |
| 147 | send_unreach(skb, ICMP_NET_UNREACH); | 146 | send_unreach(skb, ICMP_NET_UNREACH); |
| @@ -174,23 +173,23 @@ reject_tg(struct sk_buff *skb, const struct xt_target_param *par) | |||
| 174 | return NF_DROP; | 173 | return NF_DROP; |
| 175 | } | 174 | } |
| 176 | 175 | ||
| 177 | static bool reject_tg_check(const struct xt_tgchk_param *par) | 176 | static int reject_tg_check(const struct xt_tgchk_param *par) |
| 178 | { | 177 | { |
| 179 | const struct ipt_reject_info *rejinfo = par->targinfo; | 178 | const struct ipt_reject_info *rejinfo = par->targinfo; |
| 180 | const struct ipt_entry *e = par->entryinfo; | 179 | const struct ipt_entry *e = par->entryinfo; |
| 181 | 180 | ||
| 182 | if (rejinfo->with == IPT_ICMP_ECHOREPLY) { | 181 | if (rejinfo->with == IPT_ICMP_ECHOREPLY) { |
| 183 | printk("ipt_REJECT: ECHOREPLY no longer supported.\n"); | 182 | pr_info("ECHOREPLY no longer supported.\n"); |
| 184 | return false; | 183 | return -EINVAL; |
| 185 | } else if (rejinfo->with == IPT_TCP_RESET) { | 184 | } else if (rejinfo->with == IPT_TCP_RESET) { |
| 186 | /* Must specify that it's a TCP packet */ | 185 | /* Must specify that it's a TCP packet */ |
| 187 | if (e->ip.proto != IPPROTO_TCP || | 186 | if (e->ip.proto != IPPROTO_TCP || |
| 188 | (e->ip.invflags & XT_INV_PROTO)) { | 187 | (e->ip.invflags & XT_INV_PROTO)) { |
| 189 | printk("ipt_REJECT: TCP_RESET invalid for non-tcp\n"); | 188 | pr_info("TCP_RESET invalid for non-tcp\n"); |
| 190 | return false; | 189 | return -EINVAL; |
| 191 | } | 190 | } |
| 192 | } | 191 | } |
| 193 | return true; | 192 | return 0; |
| 194 | } | 193 | } |
| 195 | 194 | ||
| 196 | static struct xt_target reject_tg_reg __read_mostly = { | 195 | static struct xt_target reject_tg_reg __read_mostly = { |
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 399061c3fd7d..446e0f467a17 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c | |||
| @@ -29,10 +29,11 @@ | |||
| 29 | * Specify, after how many hundredths of a second the queue should be | 29 | * Specify, after how many hundredths of a second the queue should be |
| 30 | * flushed even if it is not full yet. | 30 | * flushed even if it is not full yet. |
| 31 | */ | 31 | */ |
| 32 | 32 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
| 33 | #include <linux/module.h> | 33 | #include <linux/module.h> |
| 34 | #include <linux/spinlock.h> | 34 | #include <linux/spinlock.h> |
| 35 | #include <linux/socket.h> | 35 | #include <linux/socket.h> |
| 36 | #include <linux/slab.h> | ||
| 36 | #include <linux/skbuff.h> | 37 | #include <linux/skbuff.h> |
| 37 | #include <linux/kernel.h> | 38 | #include <linux/kernel.h> |
| 38 | #include <linux/timer.h> | 39 | #include <linux/timer.h> |
| @@ -56,8 +57,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG); | |||
| 56 | #define ULOG_NL_EVENT 111 /* Harald's favorite number */ | 57 | #define ULOG_NL_EVENT 111 /* Harald's favorite number */ |
| 57 | #define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */ | 58 | #define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */ |
| 58 | 59 | ||
| 59 | #define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0) | ||
| 60 | |||
| 61 | static unsigned int nlbufsiz = NLMSG_GOODSIZE; | 60 | static unsigned int nlbufsiz = NLMSG_GOODSIZE; |
| 62 | module_param(nlbufsiz, uint, 0400); | 61 | module_param(nlbufsiz, uint, 0400); |
| 63 | MODULE_PARM_DESC(nlbufsiz, "netlink buffer size"); | 62 | MODULE_PARM_DESC(nlbufsiz, "netlink buffer size"); |
| @@ -90,12 +89,12 @@ static void ulog_send(unsigned int nlgroupnum) | |||
| 90 | ulog_buff_t *ub = &ulog_buffers[nlgroupnum]; | 89 | ulog_buff_t *ub = &ulog_buffers[nlgroupnum]; |
| 91 | 90 | ||
| 92 | if (timer_pending(&ub->timer)) { | 91 | if (timer_pending(&ub->timer)) { |
| 93 | pr_debug("ipt_ULOG: ulog_send: timer was pending, deleting\n"); | 92 | pr_debug("ulog_send: timer was pending, deleting\n"); |
| 94 | del_timer(&ub->timer); | 93 | del_timer(&ub->timer); |
| 95 | } | 94 | } |
| 96 | 95 | ||
| 97 | if (!ub->skb) { | 96 | if (!ub->skb) { |
| 98 | pr_debug("ipt_ULOG: ulog_send: nothing to send\n"); | 97 | pr_debug("ulog_send: nothing to send\n"); |
| 99 | return; | 98 | return; |
| 100 | } | 99 | } |
| 101 | 100 | ||
| @@ -104,7 +103,7 @@ static void ulog_send(unsigned int nlgroupnum) | |||
| 104 | ub->lastnlh->nlmsg_type = NLMSG_DONE; | 103 | ub->lastnlh->nlmsg_type = NLMSG_DONE; |
| 105 | 104 | ||
| 106 | NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1; | 105 | NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1; |
| 107 | pr_debug("ipt_ULOG: throwing %d packets to netlink group %u\n", | 106 | pr_debug("throwing %d packets to netlink group %u\n", |
| 108 | ub->qlen, nlgroupnum + 1); | 107 | ub->qlen, nlgroupnum + 1); |
| 109 | netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC); | 108 | netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC); |
| 110 | 109 | ||
| @@ -117,7 +116,7 @@ static void ulog_send(unsigned int nlgroupnum) | |||
| 117 | /* timer function to flush queue in flushtimeout time */ | 116 | /* timer function to flush queue in flushtimeout time */ |
| 118 | static void ulog_timer(unsigned long data) | 117 | static void ulog_timer(unsigned long data) |
| 119 | { | 118 | { |
| 120 | pr_debug("ipt_ULOG: timer function called, calling ulog_send\n"); | 119 | pr_debug("timer function called, calling ulog_send\n"); |
| 121 | 120 | ||
| 122 | /* lock to protect against somebody modifying our structure | 121 | /* lock to protect against somebody modifying our structure |
| 123 | * from ipt_ulog_target at the same time */ | 122 | * from ipt_ulog_target at the same time */ |
| @@ -138,7 +137,7 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size) | |||
| 138 | n = max(size, nlbufsiz); | 137 | n = max(size, nlbufsiz); |
| 139 | skb = alloc_skb(n, GFP_ATOMIC); | 138 | skb = alloc_skb(n, GFP_ATOMIC); |
| 140 | if (!skb) { | 139 | if (!skb) { |
| 141 | PRINTR("ipt_ULOG: can't alloc whole buffer %ub!\n", n); | 140 | pr_debug("cannot alloc whole buffer %ub!\n", n); |
| 142 | 141 | ||
| 143 | if (n > size) { | 142 | if (n > size) { |
| 144 | /* try to allocate only as much as we need for | 143 | /* try to allocate only as much as we need for |
| @@ -146,8 +145,7 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size) | |||
| 146 | 145 | ||
| 147 | skb = alloc_skb(size, GFP_ATOMIC); | 146 | skb = alloc_skb(size, GFP_ATOMIC); |
| 148 | if (!skb) | 147 | if (!skb) |
| 149 | PRINTR("ipt_ULOG: can't even allocate %ub\n", | 148 | pr_debug("cannot even allocate %ub\n", size); |
| 150 | size); | ||
| 151 | } | 149 | } |
| 152 | } | 150 | } |
| 153 | 151 | ||
| @@ -198,8 +196,7 @@ static void ipt_ulog_packet(unsigned int hooknum, | |||
| 198 | goto alloc_failure; | 196 | goto alloc_failure; |
| 199 | } | 197 | } |
| 200 | 198 | ||
| 201 | pr_debug("ipt_ULOG: qlen %d, qthreshold %Zu\n", ub->qlen, | 199 | pr_debug("qlen %d, qthreshold %Zu\n", ub->qlen, loginfo->qthreshold); |
| 202 | loginfo->qthreshold); | ||
| 203 | 200 | ||
| 204 | /* NLMSG_PUT contains a hidden goto nlmsg_failure !!! */ | 201 | /* NLMSG_PUT contains a hidden goto nlmsg_failure !!! */ |
| 205 | nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, ULOG_NL_EVENT, | 202 | nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, ULOG_NL_EVENT, |
| @@ -272,16 +269,14 @@ static void ipt_ulog_packet(unsigned int hooknum, | |||
| 272 | return; | 269 | return; |
| 273 | 270 | ||
| 274 | nlmsg_failure: | 271 | nlmsg_failure: |
| 275 | PRINTR("ipt_ULOG: error during NLMSG_PUT\n"); | 272 | pr_debug("error during NLMSG_PUT\n"); |
| 276 | |||
| 277 | alloc_failure: | 273 | alloc_failure: |
| 278 | PRINTR("ipt_ULOG: Error building netlink message\n"); | 274 | pr_debug("Error building netlink message\n"); |
| 279 | |||
| 280 | spin_unlock_bh(&ulog_lock); | 275 | spin_unlock_bh(&ulog_lock); |
| 281 | } | 276 | } |
| 282 | 277 | ||
| 283 | static unsigned int | 278 | static unsigned int |
| 284 | ulog_tg(struct sk_buff *skb, const struct xt_target_param *par) | 279 | ulog_tg(struct sk_buff *skb, const struct xt_action_param *par) |
| 285 | { | 280 | { |
| 286 | ipt_ulog_packet(par->hooknum, skb, par->in, par->out, | 281 | ipt_ulog_packet(par->hooknum, skb, par->in, par->out, |
| 287 | par->targinfo, NULL); | 282 | par->targinfo, NULL); |
| @@ -313,21 +308,20 @@ static void ipt_logfn(u_int8_t pf, | |||
| 313 | ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); | 308 | ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); |
| 314 | } | 309 | } |
| 315 | 310 | ||
| 316 | static bool ulog_tg_check(const struct xt_tgchk_param *par) | 311 | static int ulog_tg_check(const struct xt_tgchk_param *par) |
| 317 | { | 312 | { |
| 318 | const struct ipt_ulog_info *loginfo = par->targinfo; | 313 | const struct ipt_ulog_info *loginfo = par->targinfo; |
| 319 | 314 | ||
| 320 | if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') { | 315 | if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') { |
| 321 | pr_debug("ipt_ULOG: prefix term %i\n", | 316 | pr_debug("prefix not null-terminated\n"); |
| 322 | loginfo->prefix[sizeof(loginfo->prefix) - 1]); | 317 | return -EINVAL; |
| 323 | return false; | ||
| 324 | } | 318 | } |
| 325 | if (loginfo->qthreshold > ULOG_MAX_QLEN) { | 319 | if (loginfo->qthreshold > ULOG_MAX_QLEN) { |
| 326 | pr_debug("ipt_ULOG: queue threshold %Zu > MAX_QLEN\n", | 320 | pr_debug("queue threshold %Zu > MAX_QLEN\n", |
| 327 | loginfo->qthreshold); | 321 | loginfo->qthreshold); |
| 328 | return false; | 322 | return -EINVAL; |
| 329 | } | 323 | } |
| 330 | return true; | 324 | return 0; |
| 331 | } | 325 | } |
| 332 | 326 | ||
| 333 | #ifdef CONFIG_COMPAT | 327 | #ifdef CONFIG_COMPAT |
| @@ -338,7 +332,7 @@ struct compat_ipt_ulog_info { | |||
| 338 | char prefix[ULOG_PREFIX_LEN]; | 332 | char prefix[ULOG_PREFIX_LEN]; |
| 339 | }; | 333 | }; |
| 340 | 334 | ||
| 341 | static void ulog_tg_compat_from_user(void *dst, void *src) | 335 | static void ulog_tg_compat_from_user(void *dst, const void *src) |
| 342 | { | 336 | { |
| 343 | const struct compat_ipt_ulog_info *cl = src; | 337 | const struct compat_ipt_ulog_info *cl = src; |
| 344 | struct ipt_ulog_info l = { | 338 | struct ipt_ulog_info l = { |
| @@ -351,7 +345,7 @@ static void ulog_tg_compat_from_user(void *dst, void *src) | |||
| 351 | memcpy(dst, &l, sizeof(l)); | 345 | memcpy(dst, &l, sizeof(l)); |
| 352 | } | 346 | } |
| 353 | 347 | ||
| 354 | static int ulog_tg_compat_to_user(void __user *dst, void *src) | 348 | static int ulog_tg_compat_to_user(void __user *dst, const void *src) |
| 355 | { | 349 | { |
| 356 | const struct ipt_ulog_info *l = src; | 350 | const struct ipt_ulog_info *l = src; |
| 357 | struct compat_ipt_ulog_info cl = { | 351 | struct compat_ipt_ulog_info cl = { |
| @@ -389,10 +383,10 @@ static int __init ulog_tg_init(void) | |||
| 389 | { | 383 | { |
| 390 | int ret, i; | 384 | int ret, i; |
| 391 | 385 | ||
| 392 | pr_debug("ipt_ULOG: init module\n"); | 386 | pr_debug("init module\n"); |
| 393 | 387 | ||
| 394 | if (nlbufsiz > 128*1024) { | 388 | if (nlbufsiz > 128*1024) { |
| 395 | printk("Netlink buffer has to be <= 128kB\n"); | 389 | pr_warning("Netlink buffer has to be <= 128kB\n"); |
| 396 | return -EINVAL; | 390 | return -EINVAL; |
| 397 | } | 391 | } |
| 398 | 392 | ||
| @@ -422,7 +416,7 @@ static void __exit ulog_tg_exit(void) | |||
| 422 | ulog_buff_t *ub; | 416 | ulog_buff_t *ub; |
| 423 | int i; | 417 | int i; |
| 424 | 418 | ||
| 425 | pr_debug("ipt_ULOG: cleanup_module\n"); | 419 | pr_debug("cleanup_module\n"); |
| 426 | 420 | ||
| 427 | if (nflog) | 421 | if (nflog) |
| 428 | nf_log_unregister(&ipt_ulog_logger); | 422 | nf_log_unregister(&ipt_ulog_logger); |
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index 3b216be3bc9f..db8bff0fb86d 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | * it under the terms of the GNU General Public License version 2 as | 8 | * it under the terms of the GNU General Public License version 2 as |
| 9 | * published by the Free Software Foundation. | 9 | * published by the Free Software Foundation. |
| 10 | */ | 10 | */ |
| 11 | 11 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
| 12 | #include <linux/kernel.h> | 12 | #include <linux/kernel.h> |
| 13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
| 14 | #include <linux/skbuff.h> | 14 | #include <linux/skbuff.h> |
| @@ -30,7 +30,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev, | |||
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | static bool | 32 | static bool |
| 33 | addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) | 33 | addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) |
| 34 | { | 34 | { |
| 35 | struct net *net = dev_net(par->in ? par->in : par->out); | 35 | struct net *net = dev_net(par->in ? par->in : par->out); |
| 36 | const struct ipt_addrtype_info *info = par->matchinfo; | 36 | const struct ipt_addrtype_info *info = par->matchinfo; |
| @@ -48,7 +48,7 @@ addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) | |||
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | static bool | 50 | static bool |
| 51 | addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) | 51 | addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) |
| 52 | { | 52 | { |
| 53 | struct net *net = dev_net(par->in ? par->in : par->out); | 53 | struct net *net = dev_net(par->in ? par->in : par->out); |
| 54 | const struct ipt_addrtype_info_v1 *info = par->matchinfo; | 54 | const struct ipt_addrtype_info_v1 *info = par->matchinfo; |
| @@ -70,34 +70,34 @@ addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) | |||
| 70 | return ret; | 70 | return ret; |
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | static bool addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) | 73 | static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) |
| 74 | { | 74 | { |
| 75 | struct ipt_addrtype_info_v1 *info = par->matchinfo; | 75 | struct ipt_addrtype_info_v1 *info = par->matchinfo; |
| 76 | 76 | ||
| 77 | if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN && | 77 | if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN && |
| 78 | info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { | 78 | info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { |
| 79 | printk(KERN_ERR "ipt_addrtype: both incoming and outgoing " | 79 | pr_info("both incoming and outgoing " |
| 80 | "interface limitation cannot be selected\n"); | 80 | "interface limitation cannot be selected\n"); |
| 81 | return false; | 81 | return -EINVAL; |
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | | 84 | if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | |
| 85 | (1 << NF_INET_LOCAL_IN)) && | 85 | (1 << NF_INET_LOCAL_IN)) && |
| 86 | info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { | 86 | info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { |
| 87 | printk(KERN_ERR "ipt_addrtype: output interface limitation " | 87 | pr_info("output interface limitation " |
| 88 | "not valid in PRE_ROUTING and INPUT\n"); | 88 | "not valid in PREROUTING and INPUT\n"); |
| 89 | return false; | 89 | return -EINVAL; |
| 90 | } | 90 | } |
| 91 | 91 | ||
| 92 | if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | | 92 | if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | |
| 93 | (1 << NF_INET_LOCAL_OUT)) && | 93 | (1 << NF_INET_LOCAL_OUT)) && |
| 94 | info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) { | 94 | info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) { |
| 95 | printk(KERN_ERR "ipt_addrtype: input interface limitation " | 95 | pr_info("input interface limitation " |
| 96 | "not valid in POST_ROUTING and OUTPUT\n"); | 96 | "not valid in POSTROUTING and OUTPUT\n"); |
| 97 | return false; | 97 | return -EINVAL; |
| 98 | } | 98 | } |
| 99 | 99 | ||
| 100 | return true; | 100 | return 0; |
| 101 | } | 101 | } |
| 102 | 102 | ||
| 103 | static struct xt_match addrtype_mt_reg[] __read_mostly = { | 103 | static struct xt_match addrtype_mt_reg[] __read_mostly = { |
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index 0104c0b399de..14a2aa8b8a14 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | * it under the terms of the GNU General Public License version 2 as | 5 | * it under the terms of the GNU General Public License version 2 as |
| 6 | * published by the Free Software Foundation. | 6 | * published by the Free Software Foundation. |
| 7 | */ | 7 | */ |
| 8 | 8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
| 9 | #include <linux/in.h> | 9 | #include <linux/in.h> |
| 10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
| 11 | #include <linux/skbuff.h> | 11 | #include <linux/skbuff.h> |
| @@ -18,25 +18,19 @@ MODULE_LICENSE("GPL"); | |||
| 18 | MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>"); | 18 | MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>"); |
| 19 | MODULE_DESCRIPTION("Xtables: IPv4 IPsec-AH SPI match"); | 19 | MODULE_DESCRIPTION("Xtables: IPv4 IPsec-AH SPI match"); |
| 20 | 20 | ||
| 21 | #ifdef DEBUG_CONNTRACK | ||
| 22 | #define duprintf(format, args...) printk(format , ## args) | ||
| 23 | #else | ||
| 24 | #define duprintf(format, args...) | ||
| 25 | #endif | ||
| 26 | |||
| 27 | /* Returns 1 if the spi is matched by the range, 0 otherwise */ | 21 | /* Returns 1 if the spi is matched by the range, 0 otherwise */ |
| 28 | static inline bool | 22 | static inline bool |
| 29 | spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) | 23 | spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) |
| 30 | { | 24 | { |
| 31 | bool r; | 25 | bool r; |
| 32 | duprintf("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ', | 26 | pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n", |
| 33 | min,spi,max); | 27 | invert ? '!' : ' ', min, spi, max); |
| 34 | r=(spi >= min && spi <= max) ^ invert; | 28 | r=(spi >= min && spi <= max) ^ invert; |
| 35 | duprintf(" result %s\n",r? "PASS" : "FAILED"); | 29 | pr_debug(" result %s\n", r ? "PASS" : "FAILED"); |
| 36 | return r; | 30 | return r; |
| 37 | } | 31 | } |
| 38 | 32 | ||
| 39 | static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par) | 33 | static bool ah_mt(const struct sk_buff *skb, struct xt_action_param *par) |
| 40 | { | 34 | { |
| 41 | struct ip_auth_hdr _ahdr; | 35 | struct ip_auth_hdr _ahdr; |
| 42 | const struct ip_auth_hdr *ah; | 36 | const struct ip_auth_hdr *ah; |
| @@ -51,8 +45,8 @@ static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par) | |||
| 51 | /* We've been asked to examine this packet, and we | 45 | /* We've been asked to examine this packet, and we |
| 52 | * can't. Hence, no choice but to drop. | 46 | * can't. Hence, no choice but to drop. |
| 53 | */ | 47 | */ |
| 54 | duprintf("Dropping evil AH tinygram.\n"); | 48 | pr_debug("Dropping evil AH tinygram.\n"); |
| 55 | *par->hotdrop = true; | 49 | par->hotdrop = true; |
| 56 | return 0; | 50 | return 0; |
| 57 | } | 51 | } |
| 58 | 52 | ||
| @@ -61,16 +55,16 @@ static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par) | |||
| 61 | !!(ahinfo->invflags & IPT_AH_INV_SPI)); | 55 | !!(ahinfo->invflags & IPT_AH_INV_SPI)); |
| 62 | } | 56 | } |
| 63 | 57 | ||
| 64 | static bool ah_mt_check(const struct xt_mtchk_param *par) | 58 | static int ah_mt_check(const struct xt_mtchk_param *par) |
| 65 | { | 59 | { |
| 66 | const struct ipt_ah *ahinfo = par->matchinfo; | 60 | const struct ipt_ah *ahinfo = par->matchinfo; |
| 67 | 61 | ||
| 68 | /* Must specify no unknown invflags */ | 62 | /* Must specify no unknown invflags */ |
| 69 | if (ahinfo->invflags & ~IPT_AH_INV_MASK) { | 63 | if (ahinfo->invflags & ~IPT_AH_INV_MASK) { |
| 70 | duprintf("ipt_ah: unknown flags %X\n", ahinfo->invflags); | 64 | pr_debug("unknown flags %X\n", ahinfo->invflags); |
| 71 | return false; | 65 | return -EINVAL; |
| 72 | } | 66 | } |
| 73 | return true; | 67 | return 0; |
| 74 | } | 68 | } |
| 75 | 69 | ||
| 76 | static struct xt_match ah_mt_reg __read_mostly = { | 70 | static struct xt_match ah_mt_reg __read_mostly = { |
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index 2a1e56b71908..af6e9c778345 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | * it under the terms of the GNU General Public License version 2 as | 6 | * it under the terms of the GNU General Public License version 2 as |
| 7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
| 8 | */ | 8 | */ |
| 9 | 9 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
| 10 | #include <linux/in.h> | 10 | #include <linux/in.h> |
| 11 | #include <linux/ip.h> | 11 | #include <linux/ip.h> |
| 12 | #include <net/ip.h> | 12 | #include <net/ip.h> |
| @@ -67,7 +67,7 @@ static inline bool match_tcp(const struct sk_buff *skb, | |||
| 67 | return true; | 67 | return true; |
| 68 | } | 68 | } |
| 69 | 69 | ||
| 70 | static bool ecn_mt(const struct sk_buff *skb, const struct xt_match_param *par) | 70 | static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par) |
| 71 | { | 71 | { |
| 72 | const struct ipt_ecn_info *info = par->matchinfo; | 72 | const struct ipt_ecn_info *info = par->matchinfo; |
| 73 | 73 | ||
| @@ -78,32 +78,31 @@ static bool ecn_mt(const struct sk_buff *skb, const struct xt_match_param *par) | |||
| 78 | if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) { | 78 | if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) { |
| 79 | if (ip_hdr(skb)->protocol != IPPROTO_TCP) | 79 | if (ip_hdr(skb)->protocol != IPPROTO_TCP) |
| 80 | return false; | 80 | return false; |
| 81 | if (!match_tcp(skb, info, par->hotdrop)) | 81 | if (!match_tcp(skb, info, &par->hotdrop)) |
| 82 | return false; | 82 | return false; |
| 83 | } | 83 | } |
| 84 | 84 | ||
| 85 | return true; | 85 | return true; |
| 86 | } | 86 | } |
| 87 | 87 | ||
| 88 | static bool ecn_mt_check(const struct xt_mtchk_param *par) | 88 | static int ecn_mt_check(const struct xt_mtchk_param *par) |
| 89 | { | 89 | { |
| 90 | const struct ipt_ecn_info *info = par->matchinfo; | 90 | const struct ipt_ecn_info *info = par->matchinfo; |
| 91 | const struct ipt_ip *ip = par->entryinfo; | 91 | const struct ipt_ip *ip = par->entryinfo; |
| 92 | 92 | ||
| 93 | if (info->operation & IPT_ECN_OP_MATCH_MASK) | 93 | if (info->operation & IPT_ECN_OP_MATCH_MASK) |
| 94 | return false; | 94 | return -EINVAL; |
| 95 | 95 | ||
| 96 | if (info->invert & IPT_ECN_OP_MATCH_MASK) | 96 | if (info->invert & IPT_ECN_OP_MATCH_MASK) |
| 97 | return false; | 97 | return -EINVAL; |
| 98 | 98 | ||
| 99 | if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) && | 99 | if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) && |
| 100 | ip->proto != IPPROTO_TCP) { | 100 | ip->proto != IPPROTO_TCP) { |
| 101 | printk(KERN_WARNING "ipt_ecn: can't match TCP bits in rule for" | 101 | pr_info("cannot match TCP bits in rule for non-tcp packets\n"); |
| 102 | " non-tcp packets\n"); | 102 | return -EINVAL; |
| 103 | return false; | ||
| 104 | } | 103 | } |
| 105 | 104 | ||
| 106 | return true; | 105 | return 0; |
| 107 | } | 106 | } |
| 108 | 107 | ||
| 109 | static struct xt_match ecn_mt_reg __read_mostly = { | 108 | static struct xt_match ecn_mt_reg __read_mostly = { |
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index df566cbd68e5..c37641e819f2 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
| 14 | #include <linux/moduleparam.h> | 14 | #include <linux/moduleparam.h> |
| 15 | #include <linux/netfilter_ipv4/ip_tables.h> | 15 | #include <linux/netfilter_ipv4/ip_tables.h> |
| 16 | #include <linux/slab.h> | ||
| 16 | #include <net/ip.h> | 17 | #include <net/ip.h> |
| 17 | 18 | ||
| 18 | MODULE_LICENSE("GPL"); | 19 | MODULE_LICENSE("GPL"); |
| @@ -23,104 +24,32 @@ MODULE_DESCRIPTION("iptables filter table"); | |||
| 23 | (1 << NF_INET_FORWARD) | \ | 24 | (1 << NF_INET_FORWARD) | \ |
| 24 | (1 << NF_INET_LOCAL_OUT)) | 25 | (1 << NF_INET_LOCAL_OUT)) |
| 25 | 26 | ||
| 26 | static struct | ||
| 27 | { | ||
| 28 | struct ipt_replace repl; | ||
| 29 | struct ipt_standard entries[3]; | ||
| 30 | struct ipt_error term; | ||
| 31 | } initial_table __net_initdata = { | ||
| 32 | .repl = { | ||
| 33 | .name = "filter", | ||
| 34 | .valid_hooks = FILTER_VALID_HOOKS, | ||
| 35 | .num_entries = 4, | ||
| 36 | .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), | ||
| 37 | .hook_entry = { | ||
| 38 | [NF_INET_LOCAL_IN] = 0, | ||
| 39 | [NF_INET_FORWARD] = sizeof(struct ipt_standard), | ||
| 40 | [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, | ||
| 41 | }, | ||
| 42 | .underflow = { | ||
| 43 | [NF_INET_LOCAL_IN] = 0, | ||
| 44 | [NF_INET_FORWARD] = sizeof(struct ipt_standard), | ||
| 45 | [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, | ||
| 46 | }, | ||
| 47 | }, | ||
| 48 | .entries = { | ||
| 49 | IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ | ||
| 50 | IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ | ||
| 51 | IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ | ||
| 52 | }, | ||
| 53 | .term = IPT_ERROR_INIT, /* ERROR */ | ||
| 54 | }; | ||
| 55 | |||
| 56 | static const struct xt_table packet_filter = { | 27 | static const struct xt_table packet_filter = { |
| 57 | .name = "filter", | 28 | .name = "filter", |
| 58 | .valid_hooks = FILTER_VALID_HOOKS, | 29 | .valid_hooks = FILTER_VALID_HOOKS, |
| 59 | .me = THIS_MODULE, | 30 | .me = THIS_MODULE, |
| 60 | .af = NFPROTO_IPV4, | 31 | .af = NFPROTO_IPV4, |
| 32 | .priority = NF_IP_PRI_FILTER, | ||
| 61 | }; | 33 | }; |
| 62 | 34 | ||
| 63 | /* The work comes in here from netfilter.c. */ | ||
| 64 | static unsigned int | ||
| 65 | ipt_local_in_hook(unsigned int hook, | ||
| 66 | struct sk_buff *skb, | ||
| 67 | const struct net_device *in, | ||
| 68 | const struct net_device *out, | ||
| 69 | int (*okfn)(struct sk_buff *)) | ||
| 70 | { | ||
| 71 | return ipt_do_table(skb, hook, in, out, | ||
| 72 | dev_net(in)->ipv4.iptable_filter); | ||
| 73 | } | ||
| 74 | |||
| 75 | static unsigned int | 35 | static unsigned int |
| 76 | ipt_hook(unsigned int hook, | 36 | iptable_filter_hook(unsigned int hook, struct sk_buff *skb, |
| 77 | struct sk_buff *skb, | 37 | const struct net_device *in, const struct net_device *out, |
| 78 | const struct net_device *in, | 38 | int (*okfn)(struct sk_buff *)) |
| 79 | const struct net_device *out, | ||
| 80 | int (*okfn)(struct sk_buff *)) | ||
| 81 | { | 39 | { |
| 82 | return ipt_do_table(skb, hook, in, out, | 40 | const struct net *net; |
| 83 | dev_net(in)->ipv4.iptable_filter); | ||
| 84 | } | ||
| 85 | 41 | ||
| 86 | static unsigned int | 42 | if (hook == NF_INET_LOCAL_OUT && |
| 87 | ipt_local_out_hook(unsigned int hook, | 43 | (skb->len < sizeof(struct iphdr) || |
| 88 | struct sk_buff *skb, | 44 | ip_hdrlen(skb) < sizeof(struct iphdr))) |
| 89 | const struct net_device *in, | 45 | /* root is playing with raw sockets. */ |
| 90 | const struct net_device *out, | ||
| 91 | int (*okfn)(struct sk_buff *)) | ||
| 92 | { | ||
| 93 | /* root is playing with raw sockets. */ | ||
| 94 | if (skb->len < sizeof(struct iphdr) || | ||
| 95 | ip_hdrlen(skb) < sizeof(struct iphdr)) | ||
| 96 | return NF_ACCEPT; | 46 | return NF_ACCEPT; |
| 97 | return ipt_do_table(skb, hook, in, out, | 47 | |
| 98 | dev_net(out)->ipv4.iptable_filter); | 48 | net = dev_net((in != NULL) ? in : out); |
| 49 | return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_filter); | ||
| 99 | } | 50 | } |
| 100 | 51 | ||
| 101 | static struct nf_hook_ops ipt_ops[] __read_mostly = { | 52 | static struct nf_hook_ops *filter_ops __read_mostly; |
| 102 | { | ||
| 103 | .hook = ipt_local_in_hook, | ||
| 104 | .owner = THIS_MODULE, | ||
| 105 | .pf = NFPROTO_IPV4, | ||
| 106 | .hooknum = NF_INET_LOCAL_IN, | ||
| 107 | .priority = NF_IP_PRI_FILTER, | ||
| 108 | }, | ||
| 109 | { | ||
| 110 | .hook = ipt_hook, | ||
| 111 | .owner = THIS_MODULE, | ||
| 112 | .pf = NFPROTO_IPV4, | ||
| 113 | .hooknum = NF_INET_FORWARD, | ||
| 114 | .priority = NF_IP_PRI_FILTER, | ||
| 115 | }, | ||
| 116 | { | ||
| 117 | .hook = ipt_local_out_hook, | ||
| 118 | .owner = THIS_MODULE, | ||
| 119 | .pf = NFPROTO_IPV4, | ||
| 120 | .hooknum = NF_INET_LOCAL_OUT, | ||
| 121 | .priority = NF_IP_PRI_FILTER, | ||
| 122 | }, | ||
| 123 | }; | ||
| 124 | 53 | ||
| 125 | /* Default to forward because I got too much mail already. */ | 54 | /* Default to forward because I got too much mail already. */ |
| 126 | static int forward = NF_ACCEPT; | 55 | static int forward = NF_ACCEPT; |
| @@ -128,9 +57,18 @@ module_param(forward, bool, 0000); | |||
| 128 | 57 | ||
| 129 | static int __net_init iptable_filter_net_init(struct net *net) | 58 | static int __net_init iptable_filter_net_init(struct net *net) |
| 130 | { | 59 | { |
| 131 | /* Register table */ | 60 | struct ipt_replace *repl; |
| 61 | |||
| 62 | repl = ipt_alloc_initial_table(&packet_filter); | ||
| 63 | if (repl == NULL) | ||
| 64 | return -ENOMEM; | ||
| 65 | /* Entry 1 is the FORWARD hook */ | ||
| 66 | ((struct ipt_standard *)repl->entries)[1].target.verdict = | ||
| 67 | -forward - 1; | ||
| 68 | |||
| 132 | net->ipv4.iptable_filter = | 69 | net->ipv4.iptable_filter = |
| 133 | ipt_register_table(net, &packet_filter, &initial_table.repl); | 70 | ipt_register_table(net, &packet_filter, repl); |
| 71 | kfree(repl); | ||
| 134 | if (IS_ERR(net->ipv4.iptable_filter)) | 72 | if (IS_ERR(net->ipv4.iptable_filter)) |
| 135 | return PTR_ERR(net->ipv4.iptable_filter); | 73 | return PTR_ERR(net->ipv4.iptable_filter); |
| 136 | return 0; | 74 | return 0; |
| @@ -138,7 +76,7 @@ static int __net_init iptable_filter_net_init(struct net *net) | |||
| 138 | 76 | ||
| 139 | static void __net_exit iptable_filter_net_exit(struct net *net) | 77 | static void __net_exit iptable_filter_net_exit(struct net *net) |
| 140 | { | 78 | { |
| 141 | ipt_unregister_table(net->ipv4.iptable_filter); | 79 | ipt_unregister_table(net, net->ipv4.iptable_filter); |
| 142 | } | 80 | } |
| 143 | 81 | ||
| 144 | static struct pernet_operations iptable_filter_net_ops = { | 82 | static struct pernet_operations iptable_filter_net_ops = { |
| @@ -151,21 +89,20 @@ static int __init iptable_filter_init(void) | |||
| 151 | int ret; | 89 | int ret; |
| 152 | 90 | ||
| 153 | if (forward < 0 || forward > NF_MAX_VERDICT) { | 91 | if (forward < 0 || forward > NF_MAX_VERDICT) { |
| 154 | printk("iptables forward must be 0 or 1\n"); | 92 | pr_err("iptables forward must be 0 or 1\n"); |
| 155 | return -EINVAL; | 93 | return -EINVAL; |
| 156 | } | 94 | } |
| 157 | 95 | ||
| 158 | /* Entry 1 is the FORWARD hook */ | ||
| 159 | initial_table.entries[1].target.verdict = -forward - 1; | ||
| 160 | |||
| 161 | ret = register_pernet_subsys(&iptable_filter_net_ops); | 96 | ret = register_pernet_subsys(&iptable_filter_net_ops); |
| 162 | if (ret < 0) | 97 | if (ret < 0) |
| 163 | return ret; | 98 | return ret; |
| 164 | 99 | ||
| 165 | /* Register hooks */ | 100 | /* Register hooks */ |
| 166 | ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); | 101 | filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook); |
| 167 | if (ret < 0) | 102 | if (IS_ERR(filter_ops)) { |
| 103 | ret = PTR_ERR(filter_ops); | ||
| 168 | goto cleanup_table; | 104 | goto cleanup_table; |
| 105 | } | ||
| 169 | 106 | ||
| 170 | return ret; | 107 | return ret; |
| 171 | 108 | ||
| @@ -176,7 +113,7 @@ static int __init iptable_filter_init(void) | |||
| 176 | 113 | ||
| 177 | static void __exit iptable_filter_fini(void) | 114 | static void __exit iptable_filter_fini(void) |
| 178 | { | 115 | { |
| 179 | nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); | 116 | xt_hook_unlink(&packet_filter, filter_ops); |
| 180 | unregister_pernet_subsys(&iptable_filter_net_ops); | 117 | unregister_pernet_subsys(&iptable_filter_net_ops); |
| 181 | } | 118 | } |
| 182 | 119 | ||
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index fae78c3076c4..294a2a32f293 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include <linux/netfilter_ipv4/ip_tables.h> | 12 | #include <linux/netfilter_ipv4/ip_tables.h> |
| 13 | #include <linux/netdevice.h> | 13 | #include <linux/netdevice.h> |
| 14 | #include <linux/skbuff.h> | 14 | #include <linux/skbuff.h> |
| 15 | #include <linux/slab.h> | ||
| 15 | #include <net/sock.h> | 16 | #include <net/sock.h> |
| 16 | #include <net/route.h> | 17 | #include <net/route.h> |
| 17 | #include <linux/ip.h> | 18 | #include <linux/ip.h> |
| @@ -27,101 +28,16 @@ MODULE_DESCRIPTION("iptables mangle table"); | |||
| 27 | (1 << NF_INET_LOCAL_OUT) | \ | 28 | (1 << NF_INET_LOCAL_OUT) | \ |
| 28 | (1 << NF_INET_POST_ROUTING)) | 29 | (1 << NF_INET_POST_ROUTING)) |
| 29 | 30 | ||
| 30 | /* Ouch - five different hooks? Maybe this should be a config option..... -- BC */ | ||
| 31 | static const struct | ||
| 32 | { | ||
| 33 | struct ipt_replace repl; | ||
| 34 | struct ipt_standard entries[5]; | ||
| 35 | struct ipt_error term; | ||
| 36 | } initial_table __net_initdata = { | ||
| 37 | .repl = { | ||
| 38 | .name = "mangle", | ||
| 39 | .valid_hooks = MANGLE_VALID_HOOKS, | ||
| 40 | .num_entries = 6, | ||
| 41 | .size = sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error), | ||
| 42 | .hook_entry = { | ||
| 43 | [NF_INET_PRE_ROUTING] = 0, | ||
| 44 | [NF_INET_LOCAL_IN] = sizeof(struct ipt_standard), | ||
| 45 | [NF_INET_FORWARD] = sizeof(struct ipt_standard) * 2, | ||
| 46 | [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 3, | ||
| 47 | [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard) * 4, | ||
| 48 | }, | ||
| 49 | .underflow = { | ||
| 50 | [NF_INET_PRE_ROUTING] = 0, | ||
| 51 | [NF_INET_LOCAL_IN] = sizeof(struct ipt_standard), | ||
| 52 | [NF_INET_FORWARD] = sizeof(struct ipt_standard) * 2, | ||
| 53 | [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 3, | ||
| 54 | [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard) * 4, | ||
| 55 | }, | ||
| 56 | }, | ||
| 57 | .entries = { | ||
| 58 | IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ | ||
| 59 | IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ | ||
| 60 | IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ | ||
| 61 | IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ | ||
| 62 | IPT_STANDARD_INIT(NF_ACCEPT), /* POST_ROUTING */ | ||
| 63 | }, | ||
| 64 | .term = IPT_ERROR_INIT, /* ERROR */ | ||
| 65 | }; | ||
| 66 | |||
| 67 | static const struct xt_table packet_mangler = { | 31 | static const struct xt_table packet_mangler = { |
| 68 | .name = "mangle", | 32 | .name = "mangle", |
| 69 | .valid_hooks = MANGLE_VALID_HOOKS, | 33 | .valid_hooks = MANGLE_VALID_HOOKS, |
| 70 | .me = THIS_MODULE, | 34 | .me = THIS_MODULE, |
| 71 | .af = NFPROTO_IPV4, | 35 | .af = NFPROTO_IPV4, |
| 36 | .priority = NF_IP_PRI_MANGLE, | ||
| 72 | }; | 37 | }; |
| 73 | 38 | ||
| 74 | /* The work comes in here from netfilter.c. */ | ||
| 75 | static unsigned int | ||
| 76 | ipt_pre_routing_hook(unsigned int hook, | ||
| 77 | struct sk_buff *skb, | ||
| 78 | const struct net_device *in, | ||
| 79 | const struct net_device *out, | ||
| 80 | int (*okfn)(struct sk_buff *)) | ||
| 81 | { | ||
| 82 | return ipt_do_table(skb, hook, in, out, | ||
| 83 | dev_net(in)->ipv4.iptable_mangle); | ||
| 84 | } | ||
| 85 | |||
| 86 | static unsigned int | ||
| 87 | ipt_post_routing_hook(unsigned int hook, | ||
| 88 | struct sk_buff *skb, | ||
| 89 | const struct net_device *in, | ||
| 90 | const struct net_device *out, | ||
| 91 | int (*okfn)(struct sk_buff *)) | ||
| 92 | { | ||
| 93 | return ipt_do_table(skb, hook, in, out, | ||
| 94 | dev_net(out)->ipv4.iptable_mangle); | ||
| 95 | } | ||
| 96 | |||
| 97 | static unsigned int | ||
| 98 | ipt_local_in_hook(unsigned int hook, | ||
| 99 | struct sk_buff *skb, | ||
| 100 | const struct net_device *in, | ||
| 101 | const struct net_device *out, | ||
| 102 | int (*okfn)(struct sk_buff *)) | ||
| 103 | { | ||
| 104 | return ipt_do_table(skb, hook, in, out, | ||
| 105 | dev_net(in)->ipv4.iptable_mangle); | ||
| 106 | } | ||
| 107 | |||
| 108 | static unsigned int | ||
| 109 | ipt_forward_hook(unsigned int hook, | ||
| 110 | struct sk_buff *skb, | ||
| 111 | const struct net_device *in, | ||
| 112 | const struct net_device *out, | ||
| 113 | int (*okfn)(struct sk_buff *)) | ||
| 114 | { | ||
| 115 | return ipt_do_table(skb, hook, in, out, | ||
| 116 | dev_net(in)->ipv4.iptable_mangle); | ||
| 117 | } | ||
| 118 | |||
| 119 | static unsigned int | 39 | static unsigned int |
| 120 | ipt_local_hook(unsigned int hook, | 40 | ipt_mangle_out(struct sk_buff *skb, const struct net_device *out) |
| 121 | struct sk_buff *skb, | ||
| 122 | const struct net_device *in, | ||
| 123 | const struct net_device *out, | ||
| 124 | int (*okfn)(struct sk_buff *)) | ||
| 125 | { | 41 | { |
| 126 | unsigned int ret; | 42 | unsigned int ret; |
| 127 | const struct iphdr *iph; | 43 | const struct iphdr *iph; |
| @@ -141,7 +57,7 @@ ipt_local_hook(unsigned int hook, | |||
| 141 | daddr = iph->daddr; | 57 | daddr = iph->daddr; |
| 142 | tos = iph->tos; | 58 | tos = iph->tos; |
| 143 | 59 | ||
| 144 | ret = ipt_do_table(skb, hook, in, out, | 60 | ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out, |
| 145 | dev_net(out)->ipv4.iptable_mangle); | 61 | dev_net(out)->ipv4.iptable_mangle); |
| 146 | /* Reroute for ANY change. */ | 62 | /* Reroute for ANY change. */ |
| 147 | if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { | 63 | if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { |
| @@ -158,49 +74,36 @@ ipt_local_hook(unsigned int hook, | |||
| 158 | return ret; | 74 | return ret; |
| 159 | } | 75 | } |
| 160 | 76 | ||
| 161 | static struct nf_hook_ops ipt_ops[] __read_mostly = { | 77 | /* The work comes in here from netfilter.c. */ |
| 162 | { | 78 | static unsigned int |
| 163 | .hook = ipt_pre_routing_hook, | 79 | iptable_mangle_hook(unsigned int hook, |
| 164 | .owner = THIS_MODULE, | 80 | struct sk_buff *skb, |
| 165 | .pf = NFPROTO_IPV4, | 81 | const struct net_device *in, |
| 166 | .hooknum = NF_INET_PRE_ROUTING, | 82 | const struct net_device *out, |
| 167 | .priority = NF_IP_PRI_MANGLE, | 83 | int (*okfn)(struct sk_buff *)) |
| 168 | }, | 84 | { |
| 169 | { | 85 | if (hook == NF_INET_LOCAL_OUT) |
| 170 | .hook = ipt_local_in_hook, | 86 | return ipt_mangle_out(skb, out); |
| 171 | .owner = THIS_MODULE, | 87 | if (hook == NF_INET_POST_ROUTING) |
| 172 | .pf = NFPROTO_IPV4, | 88 | return ipt_do_table(skb, hook, in, out, |
| 173 | .hooknum = NF_INET_LOCAL_IN, | 89 | dev_net(out)->ipv4.iptable_mangle); |
| 174 | .priority = NF_IP_PRI_MANGLE, | 90 | /* PREROUTING/INPUT/FORWARD: */ |
| 175 | }, | 91 | return ipt_do_table(skb, hook, in, out, |
| 176 | { | 92 | dev_net(in)->ipv4.iptable_mangle); |
| 177 | .hook = ipt_forward_hook, | 93 | } |
| 178 | .owner = THIS_MODULE, | 94 | |
| 179 | .pf = NFPROTO_IPV4, | 95 | static struct nf_hook_ops *mangle_ops __read_mostly; |
| 180 | .hooknum = NF_INET_FORWARD, | ||
| 181 | .priority = NF_IP_PRI_MANGLE, | ||
| 182 | }, | ||
| 183 | { | ||
| 184 | .hook = ipt_local_hook, | ||
| 185 | .owner = THIS_MODULE, | ||
| 186 | .pf = NFPROTO_IPV4, | ||
| 187 | .hooknum = NF_INET_LOCAL_OUT, | ||
| 188 | .priority = NF_IP_PRI_MANGLE, | ||
| 189 | }, | ||
| 190 | { | ||
| 191 | .hook = ipt_post_routing_hook, | ||
| 192 | .owner = THIS_MODULE, | ||
| 193 | .pf = NFPROTO_IPV4, | ||
| 194 | .hooknum = NF_INET_POST_ROUTING, | ||
| 195 | .priority = NF_IP_PRI_MANGLE, | ||
| 196 | }, | ||
| 197 | }; | ||
| 198 | 96 | ||
| 199 | static int __net_init iptable_mangle_net_init(struct net *net) | 97 | static int __net_init iptable_mangle_net_init(struct net *net) |
| 200 | { | 98 | { |
| 201 | /* Register table */ | 99 | struct ipt_replace *repl; |
| 100 | |||
| 101 | repl = ipt_alloc_initial_table(&packet_mangler); | ||
| 102 | if (repl == NULL) | ||
| 103 | return -ENOMEM; | ||
| 202 | net->ipv4.iptable_mangle = | 104 | net->ipv4.iptable_mangle = |
| 203 | ipt_register_table(net, &packet_mangler, &initial_table.repl); | 105 | ipt_register_table(net, &packet_mangler, repl); |
| 106 | kfree(repl); | ||
| 204 | if (IS_ERR(net->ipv4.iptable_mangle)) | 107 | if (IS_ERR(net->ipv4.iptable_mangle)) |
| 205 | return PTR_ERR(net->ipv4.iptable_mangle); | 108 | return PTR_ERR(net->ipv4.iptable_mangle); |
| 206 | return 0; | 109 | return 0; |
| @@ -208,7 +111,7 @@ static int __net_init iptable_mangle_net_init(struct net *net) | |||
| 208 | 111 | ||
| 209 | static void __net_exit iptable_mangle_net_exit(struct net *net) | 112 | static void __net_exit iptable_mangle_net_exit(struct net *net) |
| 210 | { | 113 | { |
| 211 | ipt_unregister_table(net->ipv4.iptable_mangle); | 114 | ipt_unregister_table(net, net->ipv4.iptable_mangle); |
| 212 | } | 115 | } |
| 213 | 116 | ||
| 214 | static struct pernet_operations iptable_mangle_net_ops = { | 117 | static struct pernet_operations iptable_mangle_net_ops = { |
| @@ -225,9 +128,11 @@ static int __init iptable_mangle_init(void) | |||
| 225 | return ret; | 128 | return ret; |
| 226 | 129 | ||
| 227 | /* Register hooks */ | 130 | /* Register hooks */ |
| 228 | ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); | 131 | mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook); |
| 229 | if (ret < 0) | 132 | if (IS_ERR(mangle_ops)) { |
| 133 | ret = PTR_ERR(mangle_ops); | ||
| 230 | goto cleanup_table; | 134 | goto cleanup_table; |
| 135 | } | ||
| 231 | 136 | ||
| 232 | return ret; | 137 | return ret; |
| 233 | 138 | ||
| @@ -238,7 +143,7 @@ static int __init iptable_mangle_init(void) | |||
| 238 | 143 | ||
| 239 | static void __exit iptable_mangle_fini(void) | 144 | static void __exit iptable_mangle_fini(void) |
| 240 | { | 145 | { |
| 241 | nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); | 146 | xt_hook_unlink(&packet_mangler, mangle_ops); |
| 242 | unregister_pernet_subsys(&iptable_mangle_net_ops); | 147 | unregister_pernet_subsys(&iptable_mangle_net_ops); |
| 243 | } | 148 | } |
| 244 | 149 | ||
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 993edc23be09..07fb710cd722 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c | |||
| @@ -5,94 +5,49 @@ | |||
| 5 | */ | 5 | */ |
| 6 | #include <linux/module.h> | 6 | #include <linux/module.h> |
| 7 | #include <linux/netfilter_ipv4/ip_tables.h> | 7 | #include <linux/netfilter_ipv4/ip_tables.h> |
| 8 | #include <linux/slab.h> | ||
| 8 | #include <net/ip.h> | 9 | #include <net/ip.h> |
| 9 | 10 | ||
| 10 | #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) | 11 | #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) |
| 11 | 12 | ||
| 12 | static const struct | ||
| 13 | { | ||
| 14 | struct ipt_replace repl; | ||
| 15 | struct ipt_standard entries[2]; | ||
| 16 | struct ipt_error term; | ||
| 17 | } initial_table __net_initdata = { | ||
| 18 | .repl = { | ||
| 19 | .name = "raw", | ||
| 20 | .valid_hooks = RAW_VALID_HOOKS, | ||
| 21 | .num_entries = 3, | ||
| 22 | .size = sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error), | ||
| 23 | .hook_entry = { | ||
| 24 | [NF_INET_PRE_ROUTING] = 0, | ||
| 25 | [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) | ||
| 26 | }, | ||
| 27 | .underflow = { | ||
| 28 | [NF_INET_PRE_ROUTING] = 0, | ||
| 29 | [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) | ||
| 30 | }, | ||
| 31 | }, | ||
| 32 | .entries = { | ||
| 33 | IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ | ||
| 34 | IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ | ||
| 35 | }, | ||
| 36 | .term = IPT_ERROR_INIT, /* ERROR */ | ||
| 37 | }; | ||
| 38 | |||
| 39 | static const struct xt_table packet_raw = { | 13 | static const struct xt_table packet_raw = { |
| 40 | .name = "raw", | 14 | .name = "raw", |
| 41 | .valid_hooks = RAW_VALID_HOOKS, | 15 | .valid_hooks = RAW_VALID_HOOKS, |
| 42 | .me = THIS_MODULE, | 16 | .me = THIS_MODULE, |
| 43 | .af = NFPROTO_IPV4, | 17 | .af = NFPROTO_IPV4, |
| 18 | .priority = NF_IP_PRI_RAW, | ||
| 44 | }; | 19 | }; |
| 45 | 20 | ||
| 46 | /* The work comes in here from netfilter.c. */ | 21 | /* The work comes in here from netfilter.c. */ |
| 47 | static unsigned int | 22 | static unsigned int |
| 48 | ipt_hook(unsigned int hook, | 23 | iptable_raw_hook(unsigned int hook, struct sk_buff *skb, |
| 49 | struct sk_buff *skb, | 24 | const struct net_device *in, const struct net_device *out, |
| 50 | const struct net_device *in, | 25 | int (*okfn)(struct sk_buff *)) |
| 51 | const struct net_device *out, | ||
| 52 | int (*okfn)(struct sk_buff *)) | ||
| 53 | { | 26 | { |
| 54 | return ipt_do_table(skb, hook, in, out, | 27 | const struct net *net; |
| 55 | dev_net(in)->ipv4.iptable_raw); | ||
| 56 | } | ||
| 57 | 28 | ||
| 58 | static unsigned int | 29 | if (hook == NF_INET_LOCAL_OUT && |
| 59 | ipt_local_hook(unsigned int hook, | 30 | (skb->len < sizeof(struct iphdr) || |
| 60 | struct sk_buff *skb, | 31 | ip_hdrlen(skb) < sizeof(struct iphdr))) |
| 61 | const struct net_device *in, | 32 | /* root is playing with raw sockets. */ |
| 62 | const struct net_device *out, | ||
| 63 | int (*okfn)(struct sk_buff *)) | ||
| 64 | { | ||
| 65 | /* root is playing with raw sockets. */ | ||
| 66 | if (skb->len < sizeof(struct iphdr) || | ||
| 67 | ip_hdrlen(skb) < sizeof(struct iphdr)) | ||
| 68 | return NF_ACCEPT; | 33 | return NF_ACCEPT; |
| 69 | return ipt_do_table(skb, hook, in, out, | 34 | |
| 70 | dev_net(out)->ipv4.iptable_raw); | 35 | net = dev_net((in != NULL) ? in : out); |
| 36 | return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_raw); | ||
| 71 | } | 37 | } |
| 72 | 38 | ||
| 73 | /* 'raw' is the very first table. */ | 39 | static struct nf_hook_ops *rawtable_ops __read_mostly; |
| 74 | static struct nf_hook_ops ipt_ops[] __read_mostly = { | ||
| 75 | { | ||
| 76 | .hook = ipt_hook, | ||
| 77 | .pf = NFPROTO_IPV4, | ||
| 78 | .hooknum = NF_INET_PRE_ROUTING, | ||
| 79 | .priority = NF_IP_PRI_RAW, | ||
| 80 | .owner = THIS_MODULE, | ||
| 81 | }, | ||
| 82 | { | ||
| 83 | .hook = ipt_local_hook, | ||
| 84 | .pf = NFPROTO_IPV4, | ||
| 85 | .hooknum = NF_INET_LOCAL_OUT, | ||
| 86 | .priority = NF_IP_PRI_RAW, | ||
| 87 | .owner = THIS_MODULE, | ||
| 88 | }, | ||
| 89 | }; | ||
| 90 | 40 | ||
| 91 | static int __net_init iptable_raw_net_init(struct net *net) | 41 | static int __net_init iptable_raw_net_init(struct net *net) |
| 92 | { | 42 | { |
| 93 | /* Register table */ | 43 | struct ipt_replace *repl; |
| 44 | |||
| 45 | repl = ipt_alloc_initial_table(&packet_raw); | ||
| 46 | if (repl == NULL) | ||
| 47 | return -ENOMEM; | ||
| 94 | net->ipv4.iptable_raw = | 48 | net->ipv4.iptable_raw = |
| 95 | ipt_register_table(net, &packet_raw, &initial_table.repl); | 49 | ipt_register_table(net, &packet_raw, repl); |
| 50 | kfree(repl); | ||
| 96 | if (IS_ERR(net->ipv4.iptable_raw)) | 51 | if (IS_ERR(net->ipv4.iptable_raw)) |
| 97 | return PTR_ERR(net->ipv4.iptable_raw); | 52 | return PTR_ERR(net->ipv4.iptable_raw); |
| 98 | return 0; | 53 | return 0; |
| @@ -100,7 +55,7 @@ static int __net_init iptable_raw_net_init(struct net *net) | |||
| 100 | 55 | ||
| 101 | static void __net_exit iptable_raw_net_exit(struct net *net) | 56 | static void __net_exit iptable_raw_net_exit(struct net *net) |
| 102 | { | 57 | { |
| 103 | ipt_unregister_table(net->ipv4.iptable_raw); | 58 | ipt_unregister_table(net, net->ipv4.iptable_raw); |
| 104 | } | 59 | } |
| 105 | 60 | ||
| 106 | static struct pernet_operations iptable_raw_net_ops = { | 61 | static struct pernet_operations iptable_raw_net_ops = { |
| @@ -117,9 +72,11 @@ static int __init iptable_raw_init(void) | |||
| 117 | return ret; | 72 | return ret; |
| 118 | 73 | ||
| 119 | /* Register hooks */ | 74 | /* Register hooks */ |
| 120 | ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); | 75 | rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook); |
| 121 | if (ret < 0) | 76 | if (IS_ERR(rawtable_ops)) { |
| 77 | ret = PTR_ERR(rawtable_ops); | ||
| 122 | goto cleanup_table; | 78 | goto cleanup_table; |
| 79 | } | ||
| 123 | 80 | ||
| 124 | return ret; | 81 | return ret; |
| 125 | 82 | ||
| @@ -130,7 +87,7 @@ static int __init iptable_raw_init(void) | |||
| 130 | 87 | ||
| 131 | static void __exit iptable_raw_fini(void) | 88 | static void __exit iptable_raw_fini(void) |
| 132 | { | 89 | { |
| 133 | nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); | 90 | xt_hook_unlink(&packet_raw, rawtable_ops); |
| 134 | unregister_pernet_subsys(&iptable_raw_net_ops); | 91 | unregister_pernet_subsys(&iptable_raw_net_ops); |
| 135 | } | 92 | } |
| 136 | 93 | ||
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 3bd3d6388da5..be45bdc4c602 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | */ | 17 | */ |
| 18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
| 19 | #include <linux/netfilter_ipv4/ip_tables.h> | 19 | #include <linux/netfilter_ipv4/ip_tables.h> |
| 20 | #include <linux/slab.h> | ||
| 20 | #include <net/ip.h> | 21 | #include <net/ip.h> |
| 21 | 22 | ||
| 22 | MODULE_LICENSE("GPL"); | 23 | MODULE_LICENSE("GPL"); |
| @@ -27,109 +28,44 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules"); | |||
| 27 | (1 << NF_INET_FORWARD) | \ | 28 | (1 << NF_INET_FORWARD) | \ |
| 28 | (1 << NF_INET_LOCAL_OUT) | 29 | (1 << NF_INET_LOCAL_OUT) |
| 29 | 30 | ||
| 30 | static const struct | ||
| 31 | { | ||
| 32 | struct ipt_replace repl; | ||
| 33 | struct ipt_standard entries[3]; | ||
| 34 | struct ipt_error term; | ||
| 35 | } initial_table __net_initdata = { | ||
| 36 | .repl = { | ||
| 37 | .name = "security", | ||
| 38 | .valid_hooks = SECURITY_VALID_HOOKS, | ||
| 39 | .num_entries = 4, | ||
| 40 | .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), | ||
| 41 | .hook_entry = { | ||
| 42 | [NF_INET_LOCAL_IN] = 0, | ||
| 43 | [NF_INET_FORWARD] = sizeof(struct ipt_standard), | ||
| 44 | [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, | ||
| 45 | }, | ||
| 46 | .underflow = { | ||
| 47 | [NF_INET_LOCAL_IN] = 0, | ||
| 48 | [NF_INET_FORWARD] = sizeof(struct ipt_standard), | ||
| 49 | [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, | ||
| 50 | }, | ||
| 51 | }, | ||
| 52 | .entries = { | ||
| 53 | IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ | ||
| 54 | IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ | ||
| 55 | IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ | ||
| 56 | }, | ||
| 57 | .term = IPT_ERROR_INIT, /* ERROR */ | ||
| 58 | }; | ||
| 59 | |||
| 60 | static const struct xt_table security_table = { | 31 | static const struct xt_table security_table = { |
| 61 | .name = "security", | 32 | .name = "security", |
| 62 | .valid_hooks = SECURITY_VALID_HOOKS, | 33 | .valid_hooks = SECURITY_VALID_HOOKS, |
| 63 | .me = THIS_MODULE, | 34 | .me = THIS_MODULE, |
| 64 | .af = NFPROTO_IPV4, | 35 | .af = NFPROTO_IPV4, |
| 36 | .priority = NF_IP_PRI_SECURITY, | ||
| 65 | }; | 37 | }; |
| 66 | 38 | ||
| 67 | static unsigned int | 39 | static unsigned int |
| 68 | ipt_local_in_hook(unsigned int hook, | 40 | iptable_security_hook(unsigned int hook, struct sk_buff *skb, |
| 69 | struct sk_buff *skb, | 41 | const struct net_device *in, |
| 70 | const struct net_device *in, | 42 | const struct net_device *out, |
| 71 | const struct net_device *out, | 43 | int (*okfn)(struct sk_buff *)) |
| 72 | int (*okfn)(struct sk_buff *)) | ||
| 73 | { | ||
| 74 | return ipt_do_table(skb, hook, in, out, | ||
| 75 | dev_net(in)->ipv4.iptable_security); | ||
| 76 | } | ||
| 77 | |||
| 78 | static unsigned int | ||
| 79 | ipt_forward_hook(unsigned int hook, | ||
| 80 | struct sk_buff *skb, | ||
| 81 | const struct net_device *in, | ||
| 82 | const struct net_device *out, | ||
| 83 | int (*okfn)(struct sk_buff *)) | ||
| 84 | { | 44 | { |
| 85 | return ipt_do_table(skb, hook, in, out, | 45 | const struct net *net; |
| 86 | dev_net(in)->ipv4.iptable_security); | ||
| 87 | } | ||
| 88 | 46 | ||
| 89 | static unsigned int | 47 | if (hook == NF_INET_LOCAL_OUT && |
| 90 | ipt_local_out_hook(unsigned int hook, | 48 | (skb->len < sizeof(struct iphdr) || |
| 91 | struct sk_buff *skb, | 49 | ip_hdrlen(skb) < sizeof(struct iphdr))) |
| 92 | const struct net_device *in, | 50 | /* Somebody is playing with raw sockets. */ |
| 93 | const struct net_device *out, | ||
| 94 | int (*okfn)(struct sk_buff *)) | ||
| 95 | { | ||
| 96 | /* Somebody is playing with raw sockets. */ | ||
| 97 | if (skb->len < sizeof(struct iphdr) || | ||
| 98 | ip_hdrlen(skb) < sizeof(struct iphdr)) | ||
| 99 | return NF_ACCEPT; | 51 | return NF_ACCEPT; |
| 100 | return ipt_do_table(skb, hook, in, out, | 52 | |
| 101 | dev_net(out)->ipv4.iptable_security); | 53 | net = dev_net((in != NULL) ? in : out); |
| 54 | return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_security); | ||
| 102 | } | 55 | } |
| 103 | 56 | ||
| 104 | static struct nf_hook_ops ipt_ops[] __read_mostly = { | 57 | static struct nf_hook_ops *sectbl_ops __read_mostly; |
| 105 | { | ||
| 106 | .hook = ipt_local_in_hook, | ||
| 107 | .owner = THIS_MODULE, | ||
| 108 | .pf = NFPROTO_IPV4, | ||
| 109 | .hooknum = NF_INET_LOCAL_IN, | ||
| 110 | .priority = NF_IP_PRI_SECURITY, | ||
| 111 | }, | ||
| 112 | { | ||
| 113 | .hook = ipt_forward_hook, | ||
| 114 | .owner = THIS_MODULE, | ||
| 115 | .pf = NFPROTO_IPV4, | ||
| 116 | .hooknum = NF_INET_FORWARD, | ||
| 117 | .priority = NF_IP_PRI_SECURITY, | ||
| 118 | }, | ||
| 119 | { | ||
| 120 | .hook = ipt_local_out_hook, | ||
| 121 | .owner = THIS_MODULE, | ||
| 122 | .pf = NFPROTO_IPV4, | ||
| 123 | .hooknum = NF_INET_LOCAL_OUT, | ||
| 124 | .priority = NF_IP_PRI_SECURITY, | ||
| 125 | }, | ||
| 126 | }; | ||
| 127 | 58 | ||
| 128 | static int __net_init iptable_security_net_init(struct net *net) | 59 | static int __net_init iptable_security_net_init(struct net *net) |
| 129 | { | 60 | { |
| 130 | net->ipv4.iptable_security = | 61 | struct ipt_replace *repl; |
| 131 | ipt_register_table(net, &security_table, &initial_table.repl); | ||
| 132 | 62 | ||
| 63 | repl = ipt_alloc_initial_table(&security_table); | ||
| 64 | if (repl == NULL) | ||
| 65 | return -ENOMEM; | ||
| 66 | net->ipv4.iptable_security = | ||
| 67 | ipt_register_table(net, &security_table, repl); | ||
| 68 | kfree(repl); | ||
| 133 | if (IS_ERR(net->ipv4.iptable_security)) | 69 | if (IS_ERR(net->ipv4.iptable_security)) |
| 134 | return PTR_ERR(net->ipv4.iptable_security); | 70 | return PTR_ERR(net->ipv4.iptable_security); |
| 135 | 71 | ||
| @@ -138,7 +74,7 @@ static int __net_init iptable_security_net_init(struct net *net) | |||
| 138 | 74 | ||
| 139 | static void __net_exit iptable_security_net_exit(struct net *net) | 75 | static void __net_exit iptable_security_net_exit(struct net *net) |
| 140 | { | 76 | { |
| 141 | ipt_unregister_table(net->ipv4.iptable_security); | 77 | ipt_unregister_table(net, net->ipv4.iptable_security); |
| 142 | } | 78 | } |
| 143 | 79 | ||
| 144 | static struct pernet_operations iptable_security_net_ops = { | 80 | static struct pernet_operations iptable_security_net_ops = { |
| @@ -154,9 +90,11 @@ static int __init iptable_security_init(void) | |||
| 154 | if (ret < 0) | 90 | if (ret < 0) |
| 155 | return ret; | 91 | return ret; |
| 156 | 92 | ||
| 157 | ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); | 93 | sectbl_ops = xt_hook_link(&security_table, iptable_security_hook); |
| 158 | if (ret < 0) | 94 | if (IS_ERR(sectbl_ops)) { |
| 95 | ret = PTR_ERR(sectbl_ops); | ||
| 159 | goto cleanup_table; | 96 | goto cleanup_table; |
| 97 | } | ||
| 160 | 98 | ||
| 161 | return ret; | 99 | return ret; |
| 162 | 100 | ||
| @@ -167,7 +105,7 @@ cleanup_table: | |||
| 167 | 105 | ||
| 168 | static void __exit iptable_security_fini(void) | 106 | static void __exit iptable_security_fini(void) |
| 169 | { | 107 | { |
| 170 | nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); | 108 | xt_hook_unlink(&security_table, sectbl_ops); |
| 171 | unregister_pernet_subsys(&iptable_security_net_ops); | 109 | unregister_pernet_subsys(&iptable_security_net_ops); |
| 172 | } | 110 | } |
| 173 | 111 | ||
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index d171b123a656..5a03c02af999 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <net/netfilter/nf_conntrack_helper.h> | 22 | #include <net/netfilter/nf_conntrack_helper.h> |
| 23 | #include <net/netfilter/nf_conntrack_l4proto.h> | 23 | #include <net/netfilter/nf_conntrack_l4proto.h> |
| 24 | #include <net/netfilter/nf_conntrack_l3proto.h> | 24 | #include <net/netfilter/nf_conntrack_l3proto.h> |
| 25 | #include <net/netfilter/nf_conntrack_zones.h> | ||
| 25 | #include <net/netfilter/nf_conntrack_core.h> | 26 | #include <net/netfilter/nf_conntrack_core.h> |
| 26 | #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> | 27 | #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> |
| 27 | #include <net/netfilter/nf_nat_helper.h> | 28 | #include <net/netfilter/nf_nat_helper.h> |
| @@ -210,7 +211,7 @@ static ctl_table ip_ct_sysctl_table[] = { | |||
| 210 | }, | 211 | }, |
| 211 | { | 212 | { |
| 212 | .procname = "ip_conntrack_buckets", | 213 | .procname = "ip_conntrack_buckets", |
| 213 | .data = &nf_conntrack_htable_size, | 214 | .data = &init_net.ct.htable_size, |
| 214 | .maxlen = sizeof(unsigned int), | 215 | .maxlen = sizeof(unsigned int), |
| 215 | .mode = 0444, | 216 | .mode = 0444, |
| 216 | .proc_handler = proc_dointvec, | 217 | .proc_handler = proc_dointvec, |
| @@ -266,7 +267,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) | |||
| 266 | return -EINVAL; | 267 | return -EINVAL; |
| 267 | } | 268 | } |
| 268 | 269 | ||
| 269 | h = nf_conntrack_find_get(sock_net(sk), &tuple); | 270 | h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple); |
| 270 | if (h) { | 271 | if (h) { |
| 271 | struct sockaddr_in sin; | 272 | struct sockaddr_in sin; |
| 272 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); | 273 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); |
| @@ -381,32 +382,32 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) | |||
| 381 | 382 | ||
| 382 | ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4); | 383 | ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4); |
| 383 | if (ret < 0) { | 384 | if (ret < 0) { |
| 384 | printk("nf_conntrack_ipv4: can't register tcp.\n"); | 385 | pr_err("nf_conntrack_ipv4: can't register tcp.\n"); |
| 385 | goto cleanup_sockopt; | 386 | goto cleanup_sockopt; |
| 386 | } | 387 | } |
| 387 | 388 | ||
| 388 | ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4); | 389 | ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4); |
| 389 | if (ret < 0) { | 390 | if (ret < 0) { |
| 390 | printk("nf_conntrack_ipv4: can't register udp.\n"); | 391 | pr_err("nf_conntrack_ipv4: can't register udp.\n"); |
| 391 | goto cleanup_tcp; | 392 | goto cleanup_tcp; |
| 392 | } | 393 | } |
| 393 | 394 | ||
| 394 | ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp); | 395 | ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp); |
| 395 | if (ret < 0) { | 396 | if (ret < 0) { |
| 396 | printk("nf_conntrack_ipv4: can't register icmp.\n"); | 397 | pr_err("nf_conntrack_ipv4: can't register icmp.\n"); |
| 397 | goto cleanup_udp; | 398 | goto cleanup_udp; |
| 398 | } | 399 | } |
| 399 | 400 | ||
| 400 | ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4); | 401 | ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4); |
| 401 | if (ret < 0) { | 402 | if (ret < 0) { |
| 402 | printk("nf_conntrack_ipv4: can't register ipv4\n"); | 403 | pr_err("nf_conntrack_ipv4: can't register ipv4\n"); |
| 403 | goto cleanup_icmp; | 404 | goto cleanup_icmp; |
| 404 | } | 405 | } |
| 405 | 406 | ||
| 406 | ret = nf_register_hooks(ipv4_conntrack_ops, | 407 | ret = nf_register_hooks(ipv4_conntrack_ops, |
| 407 | ARRAY_SIZE(ipv4_conntrack_ops)); | 408 | ARRAY_SIZE(ipv4_conntrack_ops)); |
| 408 | if (ret < 0) { | 409 | if (ret < 0) { |
| 409 | printk("nf_conntrack_ipv4: can't register hooks.\n"); | 410 | pr_err("nf_conntrack_ipv4: can't register hooks.\n"); |
| 410 | goto cleanup_ipv4; | 411 | goto cleanup_ipv4; |
| 411 | } | 412 | } |
| 412 | #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) | 413 | #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 8668a3defda6..244f7cb08d68 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | |||
| @@ -32,7 +32,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) | |||
| 32 | struct hlist_nulls_node *n; | 32 | struct hlist_nulls_node *n; |
| 33 | 33 | ||
| 34 | for (st->bucket = 0; | 34 | for (st->bucket = 0; |
| 35 | st->bucket < nf_conntrack_htable_size; | 35 | st->bucket < net->ct.htable_size; |
| 36 | st->bucket++) { | 36 | st->bucket++) { |
| 37 | n = rcu_dereference(net->ct.hash[st->bucket].first); | 37 | n = rcu_dereference(net->ct.hash[st->bucket].first); |
| 38 | if (!is_a_nulls(n)) | 38 | if (!is_a_nulls(n)) |
| @@ -50,7 +50,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, | |||
| 50 | head = rcu_dereference(head->next); | 50 | head = rcu_dereference(head->next); |
| 51 | while (is_a_nulls(head)) { | 51 | while (is_a_nulls(head)) { |
| 52 | if (likely(get_nulls_value(head) == st->bucket)) { | 52 | if (likely(get_nulls_value(head) == st->bucket)) { |
| 53 | if (++st->bucket >= nf_conntrack_htable_size) | 53 | if (++st->bucket >= net->ct.htable_size) |
| 54 | return NULL; | 54 | return NULL; |
| 55 | } | 55 | } |
| 56 | head = rcu_dereference(net->ct.hash[st->bucket].first); | 56 | head = rcu_dereference(net->ct.hash[st->bucket].first); |
| @@ -336,12 +336,12 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v) | |||
| 336 | const struct ip_conntrack_stat *st = v; | 336 | const struct ip_conntrack_stat *st = v; |
| 337 | 337 | ||
| 338 | if (v == SEQ_START_TOKEN) { | 338 | if (v == SEQ_START_TOKEN) { |
| 339 | seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n"); | 339 | seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n"); |
| 340 | return 0; | 340 | return 0; |
| 341 | } | 341 | } |
| 342 | 342 | ||
| 343 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " | 343 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " |
| 344 | "%08x %08x %08x %08x %08x %08x %08x %08x \n", | 344 | "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", |
| 345 | nr_conntracks, | 345 | nr_conntracks, |
| 346 | st->searched, | 346 | st->searched, |
| 347 | st->found, | 347 | st->found, |
| @@ -358,7 +358,8 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v) | |||
| 358 | 358 | ||
| 359 | st->expect_new, | 359 | st->expect_new, |
| 360 | st->expect_create, | 360 | st->expect_create, |
| 361 | st->expect_delete | 361 | st->expect_delete, |
| 362 | st->search_restart | ||
| 362 | ); | 363 | ); |
| 363 | return 0; | 364 | return 0; |
| 364 | } | 365 | } |
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 7afd39b5b781..7404bde95994 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <net/netfilter/nf_conntrack_tuple.h> | 18 | #include <net/netfilter/nf_conntrack_tuple.h> |
| 19 | #include <net/netfilter/nf_conntrack_l4proto.h> | 19 | #include <net/netfilter/nf_conntrack_l4proto.h> |
| 20 | #include <net/netfilter/nf_conntrack_core.h> | 20 | #include <net/netfilter/nf_conntrack_core.h> |
| 21 | #include <net/netfilter/nf_conntrack_zones.h> | ||
| 21 | #include <net/netfilter/nf_log.h> | 22 | #include <net/netfilter/nf_log.h> |
| 22 | 23 | ||
| 23 | static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; | 24 | static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; |
| @@ -114,13 +115,14 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, | |||
| 114 | 115 | ||
| 115 | /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ | 116 | /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ |
| 116 | static int | 117 | static int |
| 117 | icmp_error_message(struct net *net, struct sk_buff *skb, | 118 | icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, |
| 118 | enum ip_conntrack_info *ctinfo, | 119 | enum ip_conntrack_info *ctinfo, |
| 119 | unsigned int hooknum) | 120 | unsigned int hooknum) |
| 120 | { | 121 | { |
| 121 | struct nf_conntrack_tuple innertuple, origtuple; | 122 | struct nf_conntrack_tuple innertuple, origtuple; |
| 122 | const struct nf_conntrack_l4proto *innerproto; | 123 | const struct nf_conntrack_l4proto *innerproto; |
| 123 | const struct nf_conntrack_tuple_hash *h; | 124 | const struct nf_conntrack_tuple_hash *h; |
| 125 | u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; | ||
| 124 | 126 | ||
| 125 | NF_CT_ASSERT(skb->nfct == NULL); | 127 | NF_CT_ASSERT(skb->nfct == NULL); |
| 126 | 128 | ||
| @@ -146,7 +148,7 @@ icmp_error_message(struct net *net, struct sk_buff *skb, | |||
| 146 | 148 | ||
| 147 | *ctinfo = IP_CT_RELATED; | 149 | *ctinfo = IP_CT_RELATED; |
| 148 | 150 | ||
| 149 | h = nf_conntrack_find_get(net, &innertuple); | 151 | h = nf_conntrack_find_get(net, zone, &innertuple); |
| 150 | if (!h) { | 152 | if (!h) { |
| 151 | pr_debug("icmp_error_message: no match\n"); | 153 | pr_debug("icmp_error_message: no match\n"); |
| 152 | return -NF_ACCEPT; | 154 | return -NF_ACCEPT; |
| @@ -163,7 +165,8 @@ icmp_error_message(struct net *net, struct sk_buff *skb, | |||
| 163 | 165 | ||
| 164 | /* Small and modified version of icmp_rcv */ | 166 | /* Small and modified version of icmp_rcv */ |
| 165 | static int | 167 | static int |
| 166 | icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, | 168 | icmp_error(struct net *net, struct nf_conn *tmpl, |
| 169 | struct sk_buff *skb, unsigned int dataoff, | ||
| 167 | enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) | 170 | enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) |
| 168 | { | 171 | { |
| 169 | const struct icmphdr *icmph; | 172 | const struct icmphdr *icmph; |
| @@ -208,7 +211,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, | |||
| 208 | icmph->type != ICMP_REDIRECT) | 211 | icmph->type != ICMP_REDIRECT) |
| 209 | return NF_ACCEPT; | 212 | return NF_ACCEPT; |
| 210 | 213 | ||
| 211 | return icmp_error_message(net, skb, ctinfo, hooknum); | 214 | return icmp_error_message(net, tmpl, skb, ctinfo, hooknum); |
| 212 | } | 215 | } |
| 213 | 216 | ||
| 214 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 217 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) |
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 331ead3ebd1b..f3a9b42b16c6 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c | |||
| @@ -17,6 +17,10 @@ | |||
| 17 | #include <linux/netfilter_bridge.h> | 17 | #include <linux/netfilter_bridge.h> |
| 18 | #include <linux/netfilter_ipv4.h> | 18 | #include <linux/netfilter_ipv4.h> |
| 19 | #include <net/netfilter/ipv4/nf_defrag_ipv4.h> | 19 | #include <net/netfilter/ipv4/nf_defrag_ipv4.h> |
| 20 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
| 21 | #include <net/netfilter/nf_conntrack.h> | ||
| 22 | #endif | ||
| 23 | #include <net/netfilter/nf_conntrack_zones.h> | ||
| 20 | 24 | ||
| 21 | /* Returns new sk_buff, or NULL */ | 25 | /* Returns new sk_buff, or NULL */ |
| 22 | static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) | 26 | static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) |
| @@ -38,15 +42,22 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) | |||
| 38 | static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, | 42 | static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, |
| 39 | struct sk_buff *skb) | 43 | struct sk_buff *skb) |
| 40 | { | 44 | { |
| 45 | u16 zone = NF_CT_DEFAULT_ZONE; | ||
| 46 | |||
| 47 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
| 48 | if (skb->nfct) | ||
| 49 | zone = nf_ct_zone((struct nf_conn *)skb->nfct); | ||
| 50 | #endif | ||
| 51 | |||
| 41 | #ifdef CONFIG_BRIDGE_NETFILTER | 52 | #ifdef CONFIG_BRIDGE_NETFILTER |
| 42 | if (skb->nf_bridge && | 53 | if (skb->nf_bridge && |
| 43 | skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) | 54 | skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) |
| 44 | return IP_DEFRAG_CONNTRACK_BRIDGE_IN; | 55 | return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone; |
| 45 | #endif | 56 | #endif |
| 46 | if (hooknum == NF_INET_PRE_ROUTING) | 57 | if (hooknum == NF_INET_PRE_ROUTING) |
| 47 | return IP_DEFRAG_CONNTRACK_IN; | 58 | return IP_DEFRAG_CONNTRACK_IN + zone; |
| 48 | else | 59 | else |
| 49 | return IP_DEFRAG_CONNTRACK_OUT; | 60 | return IP_DEFRAG_CONNTRACK_OUT + zone; |
| 50 | } | 61 | } |
| 51 | 62 | ||
| 52 | static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, | 63 | static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, |
| @@ -55,11 +66,18 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, | |||
| 55 | const struct net_device *out, | 66 | const struct net_device *out, |
| 56 | int (*okfn)(struct sk_buff *)) | 67 | int (*okfn)(struct sk_buff *)) |
| 57 | { | 68 | { |
| 69 | struct sock *sk = skb->sk; | ||
| 70 | struct inet_sock *inet = inet_sk(skb->sk); | ||
| 71 | |||
| 72 | if (sk && (sk->sk_family == PF_INET) && | ||
| 73 | inet->nodefrag) | ||
| 74 | return NF_ACCEPT; | ||
| 75 | |||
| 58 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | 76 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) |
| 59 | #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) | 77 | #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) |
| 60 | /* Previously seen (loopback)? Ignore. Do this before | 78 | /* Previously seen (loopback)? Ignore. Do this before |
| 61 | fragment check. */ | 79 | fragment check. */ |
| 62 | if (skb->nfct) | 80 | if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) |
| 63 | return NF_ACCEPT; | 81 | return NF_ACCEPT; |
| 64 | #endif | 82 | #endif |
| 65 | #endif | 83 | #endif |
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index fe1a64479dd0..8c8632d9b93c 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include <linux/types.h> | 12 | #include <linux/types.h> |
| 13 | #include <linux/timer.h> | 13 | #include <linux/timer.h> |
| 14 | #include <linux/skbuff.h> | 14 | #include <linux/skbuff.h> |
| 15 | #include <linux/gfp.h> | ||
| 15 | #include <net/checksum.h> | 16 | #include <net/checksum.h> |
| 16 | #include <net/icmp.h> | 17 | #include <net/icmp.h> |
| 17 | #include <net/ip.h> | 18 | #include <net/ip.h> |
| @@ -30,14 +31,12 @@ | |||
| 30 | #include <net/netfilter/nf_conntrack_helper.h> | 31 | #include <net/netfilter/nf_conntrack_helper.h> |
| 31 | #include <net/netfilter/nf_conntrack_l3proto.h> | 32 | #include <net/netfilter/nf_conntrack_l3proto.h> |
| 32 | #include <net/netfilter/nf_conntrack_l4proto.h> | 33 | #include <net/netfilter/nf_conntrack_l4proto.h> |
| 34 | #include <net/netfilter/nf_conntrack_zones.h> | ||
| 33 | 35 | ||
| 34 | static DEFINE_SPINLOCK(nf_nat_lock); | 36 | static DEFINE_SPINLOCK(nf_nat_lock); |
| 35 | 37 | ||
| 36 | static struct nf_conntrack_l3proto *l3proto __read_mostly; | 38 | static struct nf_conntrack_l3proto *l3proto __read_mostly; |
| 37 | 39 | ||
| 38 | /* Calculated at init based on memory size */ | ||
| 39 | static unsigned int nf_nat_htable_size __read_mostly; | ||
| 40 | |||
| 41 | #define MAX_IP_NAT_PROTO 256 | 40 | #define MAX_IP_NAT_PROTO 256 |
| 42 | static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] | 41 | static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] |
| 43 | __read_mostly; | 42 | __read_mostly; |
| @@ -72,15 +71,16 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put); | |||
| 72 | 71 | ||
| 73 | /* We keep an extra hash for each conntrack, for fast searching. */ | 72 | /* We keep an extra hash for each conntrack, for fast searching. */ |
| 74 | static inline unsigned int | 73 | static inline unsigned int |
| 75 | hash_by_src(const struct nf_conntrack_tuple *tuple) | 74 | hash_by_src(const struct net *net, u16 zone, |
| 75 | const struct nf_conntrack_tuple *tuple) | ||
| 76 | { | 76 | { |
| 77 | unsigned int hash; | 77 | unsigned int hash; |
| 78 | 78 | ||
| 79 | /* Original src, to ensure we map it consistently if poss. */ | 79 | /* Original src, to ensure we map it consistently if poss. */ |
| 80 | hash = jhash_3words((__force u32)tuple->src.u3.ip, | 80 | hash = jhash_3words((__force u32)tuple->src.u3.ip, |
| 81 | (__force u32)tuple->src.u.all, | 81 | (__force u32)tuple->src.u.all ^ zone, |
| 82 | tuple->dst.protonum, 0); | 82 | tuple->dst.protonum, 0); |
| 83 | return ((u64)hash * nf_nat_htable_size) >> 32; | 83 | return ((u64)hash * net->ipv4.nat_htable_size) >> 32; |
| 84 | } | 84 | } |
| 85 | 85 | ||
| 86 | /* Is this tuple already taken? (not by us) */ | 86 | /* Is this tuple already taken? (not by us) */ |
| @@ -142,12 +142,12 @@ same_src(const struct nf_conn *ct, | |||
| 142 | 142 | ||
| 143 | /* Only called for SRC manip */ | 143 | /* Only called for SRC manip */ |
| 144 | static int | 144 | static int |
| 145 | find_appropriate_src(struct net *net, | 145 | find_appropriate_src(struct net *net, u16 zone, |
| 146 | const struct nf_conntrack_tuple *tuple, | 146 | const struct nf_conntrack_tuple *tuple, |
| 147 | struct nf_conntrack_tuple *result, | 147 | struct nf_conntrack_tuple *result, |
| 148 | const struct nf_nat_range *range) | 148 | const struct nf_nat_range *range) |
| 149 | { | 149 | { |
| 150 | unsigned int h = hash_by_src(tuple); | 150 | unsigned int h = hash_by_src(net, zone, tuple); |
| 151 | const struct nf_conn_nat *nat; | 151 | const struct nf_conn_nat *nat; |
| 152 | const struct nf_conn *ct; | 152 | const struct nf_conn *ct; |
| 153 | const struct hlist_node *n; | 153 | const struct hlist_node *n; |
| @@ -155,7 +155,7 @@ find_appropriate_src(struct net *net, | |||
| 155 | rcu_read_lock(); | 155 | rcu_read_lock(); |
| 156 | hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) { | 156 | hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) { |
| 157 | ct = nat->ct; | 157 | ct = nat->ct; |
| 158 | if (same_src(ct, tuple)) { | 158 | if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) { |
| 159 | /* Copy source part from reply tuple. */ | 159 | /* Copy source part from reply tuple. */ |
| 160 | nf_ct_invert_tuplepr(result, | 160 | nf_ct_invert_tuplepr(result, |
| 161 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple); | 161 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple); |
| @@ -178,7 +178,7 @@ find_appropriate_src(struct net *net, | |||
| 178 | the ip with the lowest src-ip/dst-ip/proto usage. | 178 | the ip with the lowest src-ip/dst-ip/proto usage. |
| 179 | */ | 179 | */ |
| 180 | static void | 180 | static void |
| 181 | find_best_ips_proto(struct nf_conntrack_tuple *tuple, | 181 | find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, |
| 182 | const struct nf_nat_range *range, | 182 | const struct nf_nat_range *range, |
| 183 | const struct nf_conn *ct, | 183 | const struct nf_conn *ct, |
| 184 | enum nf_nat_manip_type maniptype) | 184 | enum nf_nat_manip_type maniptype) |
| @@ -212,7 +212,7 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple, | |||
| 212 | maxip = ntohl(range->max_ip); | 212 | maxip = ntohl(range->max_ip); |
| 213 | j = jhash_2words((__force u32)tuple->src.u3.ip, | 213 | j = jhash_2words((__force u32)tuple->src.u3.ip, |
| 214 | range->flags & IP_NAT_RANGE_PERSISTENT ? | 214 | range->flags & IP_NAT_RANGE_PERSISTENT ? |
| 215 | 0 : (__force u32)tuple->dst.u3.ip, 0); | 215 | 0 : (__force u32)tuple->dst.u3.ip ^ zone, 0); |
| 216 | j = ((u64)j * (maxip - minip + 1)) >> 32; | 216 | j = ((u64)j * (maxip - minip + 1)) >> 32; |
| 217 | *var_ipp = htonl(minip + j); | 217 | *var_ipp = htonl(minip + j); |
| 218 | } | 218 | } |
| @@ -232,6 +232,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
| 232 | { | 232 | { |
| 233 | struct net *net = nf_ct_net(ct); | 233 | struct net *net = nf_ct_net(ct); |
| 234 | const struct nf_nat_protocol *proto; | 234 | const struct nf_nat_protocol *proto; |
| 235 | u16 zone = nf_ct_zone(ct); | ||
| 235 | 236 | ||
| 236 | /* 1) If this srcip/proto/src-proto-part is currently mapped, | 237 | /* 1) If this srcip/proto/src-proto-part is currently mapped, |
| 237 | and that same mapping gives a unique tuple within the given | 238 | and that same mapping gives a unique tuple within the given |
| @@ -242,7 +243,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
| 242 | manips not an issue. */ | 243 | manips not an issue. */ |
| 243 | if (maniptype == IP_NAT_MANIP_SRC && | 244 | if (maniptype == IP_NAT_MANIP_SRC && |
| 244 | !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { | 245 | !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { |
| 245 | if (find_appropriate_src(net, orig_tuple, tuple, range)) { | 246 | if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) { |
| 246 | pr_debug("get_unique_tuple: Found current src map\n"); | 247 | pr_debug("get_unique_tuple: Found current src map\n"); |
| 247 | if (!nf_nat_used_tuple(tuple, ct)) | 248 | if (!nf_nat_used_tuple(tuple, ct)) |
| 248 | return; | 249 | return; |
| @@ -252,7 +253,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
| 252 | /* 2) Select the least-used IP/proto combination in the given | 253 | /* 2) Select the least-used IP/proto combination in the given |
| 253 | range. */ | 254 | range. */ |
| 254 | *tuple = *orig_tuple; | 255 | *tuple = *orig_tuple; |
| 255 | find_best_ips_proto(tuple, range, ct, maniptype); | 256 | find_best_ips_proto(zone, tuple, range, ct, maniptype); |
| 256 | 257 | ||
| 257 | /* 3) The per-protocol part of the manip is made to map into | 258 | /* 3) The per-protocol part of the manip is made to map into |
| 258 | the range to make a unique tuple. */ | 259 | the range to make a unique tuple. */ |
| @@ -260,14 +261,9 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
| 260 | rcu_read_lock(); | 261 | rcu_read_lock(); |
| 261 | proto = __nf_nat_proto_find(orig_tuple->dst.protonum); | 262 | proto = __nf_nat_proto_find(orig_tuple->dst.protonum); |
| 262 | 263 | ||
| 263 | /* Change protocol info to have some randomization */ | ||
| 264 | if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) { | ||
| 265 | proto->unique_tuple(tuple, range, maniptype, ct); | ||
| 266 | goto out; | ||
| 267 | } | ||
| 268 | |||
| 269 | /* Only bother mapping if it's not already in range and unique */ | 264 | /* Only bother mapping if it's not already in range and unique */ |
| 270 | if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || | 265 | if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM) && |
| 266 | (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || | ||
| 271 | proto->in_range(tuple, maniptype, &range->min, &range->max)) && | 267 | proto->in_range(tuple, maniptype, &range->min, &range->max)) && |
| 272 | !nf_nat_used_tuple(tuple, ct)) | 268 | !nf_nat_used_tuple(tuple, ct)) |
| 273 | goto out; | 269 | goto out; |
| @@ -330,7 +326,8 @@ nf_nat_setup_info(struct nf_conn *ct, | |||
| 330 | if (have_to_hash) { | 326 | if (have_to_hash) { |
| 331 | unsigned int srchash; | 327 | unsigned int srchash; |
| 332 | 328 | ||
| 333 | srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | 329 | srchash = hash_by_src(net, nf_ct_zone(ct), |
| 330 | &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | ||
| 334 | spin_lock_bh(&nf_nat_lock); | 331 | spin_lock_bh(&nf_nat_lock); |
| 335 | /* nf_conntrack_alter_reply might re-allocate exntension aera */ | 332 | /* nf_conntrack_alter_reply might re-allocate exntension aera */ |
| 336 | nat = nfct_nat(ct); | 333 | nat = nfct_nat(ct); |
| @@ -438,7 +435,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, | |||
| 438 | if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) | 435 | if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) |
| 439 | return 0; | 436 | return 0; |
| 440 | 437 | ||
| 441 | inside = (void *)skb->data + ip_hdrlen(skb); | 438 | inside = (void *)skb->data + hdrlen; |
| 442 | 439 | ||
| 443 | /* We're actually going to mangle it beyond trivial checksum | 440 | /* We're actually going to mangle it beyond trivial checksum |
| 444 | adjustment, so make sure the current checksum is correct. */ | 441 | adjustment, so make sure the current checksum is correct. */ |
| @@ -468,12 +465,10 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, | |||
| 468 | /* rcu_read_lock()ed by nf_hook_slow */ | 465 | /* rcu_read_lock()ed by nf_hook_slow */ |
| 469 | l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); | 466 | l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); |
| 470 | 467 | ||
| 471 | if (!nf_ct_get_tuple(skb, | 468 | if (!nf_ct_get_tuple(skb, hdrlen + sizeof(struct icmphdr), |
| 472 | ip_hdrlen(skb) + sizeof(struct icmphdr), | 469 | (hdrlen + |
| 473 | (ip_hdrlen(skb) + | ||
| 474 | sizeof(struct icmphdr) + inside->ip.ihl * 4), | 470 | sizeof(struct icmphdr) + inside->ip.ihl * 4), |
| 475 | (u_int16_t)AF_INET, | 471 | (u_int16_t)AF_INET, inside->ip.protocol, |
| 476 | inside->ip.protocol, | ||
| 477 | &inner, l3proto, l4proto)) | 472 | &inner, l3proto, l4proto)) |
| 478 | return 0; | 473 | return 0; |
| 479 | 474 | ||
| @@ -482,15 +477,13 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, | |||
| 482 | pass all hooks (locally-generated ICMP). Consider incoming | 477 | pass all hooks (locally-generated ICMP). Consider incoming |
| 483 | packet: PREROUTING (DST manip), routing produces ICMP, goes | 478 | packet: PREROUTING (DST manip), routing produces ICMP, goes |
| 484 | through POSTROUTING (which must correct the DST manip). */ | 479 | through POSTROUTING (which must correct the DST manip). */ |
| 485 | if (!manip_pkt(inside->ip.protocol, skb, | 480 | if (!manip_pkt(inside->ip.protocol, skb, hdrlen + sizeof(inside->icmp), |
| 486 | ip_hdrlen(skb) + sizeof(inside->icmp), | 481 | &ct->tuplehash[!dir].tuple, !manip)) |
| 487 | &ct->tuplehash[!dir].tuple, | ||
| 488 | !manip)) | ||
| 489 | return 0; | 482 | return 0; |
| 490 | 483 | ||
| 491 | if (skb->ip_summed != CHECKSUM_PARTIAL) { | 484 | if (skb->ip_summed != CHECKSUM_PARTIAL) { |
| 492 | /* Reloading "inside" here since manip_pkt inner. */ | 485 | /* Reloading "inside" here since manip_pkt inner. */ |
| 493 | inside = (void *)skb->data + ip_hdrlen(skb); | 486 | inside = (void *)skb->data + hdrlen; |
| 494 | inside->icmp.checksum = 0; | 487 | inside->icmp.checksum = 0; |
| 495 | inside->icmp.checksum = | 488 | inside->icmp.checksum = |
| 496 | csum_fold(skb_checksum(skb, hdrlen, | 489 | csum_fold(skb_checksum(skb, hdrlen, |
| @@ -679,8 +672,10 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, | |||
| 679 | 672 | ||
| 680 | static int __net_init nf_nat_net_init(struct net *net) | 673 | static int __net_init nf_nat_net_init(struct net *net) |
| 681 | { | 674 | { |
| 682 | net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, | 675 | /* Leave them the same for the moment. */ |
| 683 | &net->ipv4.nat_vmalloced, 0); | 676 | net->ipv4.nat_htable_size = net->ct.htable_size; |
| 677 | net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, | ||
| 678 | &net->ipv4.nat_vmalloced, 0); | ||
| 684 | if (!net->ipv4.nat_bysource) | 679 | if (!net->ipv4.nat_bysource) |
| 685 | return -ENOMEM; | 680 | return -ENOMEM; |
| 686 | return 0; | 681 | return 0; |
| @@ -703,7 +698,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) | |||
| 703 | nf_ct_iterate_cleanup(net, &clean_nat, NULL); | 698 | nf_ct_iterate_cleanup(net, &clean_nat, NULL); |
| 704 | synchronize_rcu(); | 699 | synchronize_rcu(); |
| 705 | nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, | 700 | nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, |
| 706 | nf_nat_htable_size); | 701 | net->ipv4.nat_htable_size); |
| 707 | } | 702 | } |
| 708 | 703 | ||
| 709 | static struct pernet_operations nf_nat_net_ops = { | 704 | static struct pernet_operations nf_nat_net_ops = { |
| @@ -724,9 +719,6 @@ static int __init nf_nat_init(void) | |||
| 724 | return ret; | 719 | return ret; |
| 725 | } | 720 | } |
| 726 | 721 | ||
| 727 | /* Leave them the same for the moment. */ | ||
| 728 | nf_nat_htable_size = nf_conntrack_htable_size; | ||
| 729 | |||
| 730 | ret = register_pernet_subsys(&nf_nat_net_ops); | 722 | ret = register_pernet_subsys(&nf_nat_net_ops); |
| 731 | if (ret < 0) | 723 | if (ret < 0) |
| 732 | goto cleanup_extend; | 724 | goto cleanup_extend; |
| @@ -741,7 +733,7 @@ static int __init nf_nat_init(void) | |||
| 741 | spin_unlock_bh(&nf_nat_lock); | 733 | spin_unlock_bh(&nf_nat_lock); |
| 742 | 734 | ||
| 743 | /* Initialize fake conntrack so that NAT will skip it */ | 735 | /* Initialize fake conntrack so that NAT will skip it */ |
| 744 | nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; | 736 | nf_ct_untracked_status_or(IPS_NAT_DONE_MASK); |
| 745 | 737 | ||
| 746 | l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); | 738 | l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); |
| 747 | 739 | ||
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c index a1d5d58a58bf..86e0e84ff0a0 100644 --- a/net/ipv4/netfilter/nf_nat_ftp.c +++ b/net/ipv4/netfilter/nf_nat_ftp.c | |||
| @@ -27,76 +27,29 @@ MODULE_ALIAS("ip_nat_ftp"); | |||
| 27 | 27 | ||
| 28 | /* FIXME: Time out? --RR */ | 28 | /* FIXME: Time out? --RR */ |
| 29 | 29 | ||
| 30 | static int | 30 | static int nf_nat_ftp_fmt_cmd(enum nf_ct_ftp_type type, |
| 31 | mangle_rfc959_packet(struct sk_buff *skb, | 31 | char *buffer, size_t buflen, |
| 32 | __be32 newip, | 32 | __be32 addr, u16 port) |
| 33 | u_int16_t port, | ||
| 34 | unsigned int matchoff, | ||
| 35 | unsigned int matchlen, | ||
| 36 | struct nf_conn *ct, | ||
| 37 | enum ip_conntrack_info ctinfo) | ||
| 38 | { | 33 | { |
| 39 | char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")]; | 34 | switch (type) { |
| 40 | 35 | case NF_CT_FTP_PORT: | |
| 41 | sprintf(buffer, "%u,%u,%u,%u,%u,%u", | 36 | case NF_CT_FTP_PASV: |
| 42 | NIPQUAD(newip), port>>8, port&0xFF); | 37 | return snprintf(buffer, buflen, "%u,%u,%u,%u,%u,%u", |
| 43 | 38 | ((unsigned char *)&addr)[0], | |
| 44 | pr_debug("calling nf_nat_mangle_tcp_packet\n"); | 39 | ((unsigned char *)&addr)[1], |
| 45 | 40 | ((unsigned char *)&addr)[2], | |
| 46 | return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, | 41 | ((unsigned char *)&addr)[3], |
| 47 | matchlen, buffer, strlen(buffer)); | 42 | port >> 8, |
| 48 | } | 43 | port & 0xFF); |
| 49 | 44 | case NF_CT_FTP_EPRT: | |
| 50 | /* |1|132.235.1.2|6275| */ | 45 | return snprintf(buffer, buflen, "|1|%pI4|%u|", &addr, port); |
| 51 | static int | 46 | case NF_CT_FTP_EPSV: |
| 52 | mangle_eprt_packet(struct sk_buff *skb, | 47 | return snprintf(buffer, buflen, "|||%u|", port); |
| 53 | __be32 newip, | 48 | } |
| 54 | u_int16_t port, | ||
| 55 | unsigned int matchoff, | ||
| 56 | unsigned int matchlen, | ||
| 57 | struct nf_conn *ct, | ||
| 58 | enum ip_conntrack_info ctinfo) | ||
| 59 | { | ||
| 60 | char buffer[sizeof("|1|255.255.255.255|65535|")]; | ||
| 61 | |||
| 62 | sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port); | ||
| 63 | |||
| 64 | pr_debug("calling nf_nat_mangle_tcp_packet\n"); | ||
| 65 | |||
| 66 | return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, | ||
| 67 | matchlen, buffer, strlen(buffer)); | ||
| 68 | } | ||
| 69 | |||
| 70 | /* |1|132.235.1.2|6275| */ | ||
| 71 | static int | ||
| 72 | mangle_epsv_packet(struct sk_buff *skb, | ||
| 73 | __be32 newip, | ||
| 74 | u_int16_t port, | ||
| 75 | unsigned int matchoff, | ||
| 76 | unsigned int matchlen, | ||
| 77 | struct nf_conn *ct, | ||
| 78 | enum ip_conntrack_info ctinfo) | ||
| 79 | { | ||
| 80 | char buffer[sizeof("|||65535|")]; | ||
| 81 | |||
| 82 | sprintf(buffer, "|||%u|", port); | ||
| 83 | |||
| 84 | pr_debug("calling nf_nat_mangle_tcp_packet\n"); | ||
| 85 | 49 | ||
| 86 | return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, | 50 | return 0; |
| 87 | matchlen, buffer, strlen(buffer)); | ||
| 88 | } | 51 | } |
| 89 | 52 | ||
| 90 | static int (*mangle[])(struct sk_buff *, __be32, u_int16_t, | ||
| 91 | unsigned int, unsigned int, struct nf_conn *, | ||
| 92 | enum ip_conntrack_info) | ||
| 93 | = { | ||
| 94 | [NF_CT_FTP_PORT] = mangle_rfc959_packet, | ||
| 95 | [NF_CT_FTP_PASV] = mangle_rfc959_packet, | ||
| 96 | [NF_CT_FTP_EPRT] = mangle_eprt_packet, | ||
| 97 | [NF_CT_FTP_EPSV] = mangle_epsv_packet | ||
| 98 | }; | ||
| 99 | |||
| 100 | /* So, this packet has hit the connection tracking matching code. | 53 | /* So, this packet has hit the connection tracking matching code. |
| 101 | Mangle it, and change the expectation to match the new version. */ | 54 | Mangle it, and change the expectation to match the new version. */ |
| 102 | static unsigned int nf_nat_ftp(struct sk_buff *skb, | 55 | static unsigned int nf_nat_ftp(struct sk_buff *skb, |
| @@ -110,6 +63,8 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, | |||
| 110 | u_int16_t port; | 63 | u_int16_t port; |
| 111 | int dir = CTINFO2DIR(ctinfo); | 64 | int dir = CTINFO2DIR(ctinfo); |
| 112 | struct nf_conn *ct = exp->master; | 65 | struct nf_conn *ct = exp->master; |
| 66 | char buffer[sizeof("|1|255.255.255.255|65535|")]; | ||
| 67 | unsigned int buflen; | ||
| 113 | 68 | ||
| 114 | pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen); | 69 | pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen); |
| 115 | 70 | ||
| @@ -132,11 +87,21 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, | |||
| 132 | if (port == 0) | 87 | if (port == 0) |
| 133 | return NF_DROP; | 88 | return NF_DROP; |
| 134 | 89 | ||
| 135 | if (!mangle[type](skb, newip, port, matchoff, matchlen, ct, ctinfo)) { | 90 | buflen = nf_nat_ftp_fmt_cmd(type, buffer, sizeof(buffer), newip, port); |
| 136 | nf_ct_unexpect_related(exp); | 91 | if (!buflen) |
| 137 | return NF_DROP; | 92 | goto out; |
| 138 | } | 93 | |
| 94 | pr_debug("calling nf_nat_mangle_tcp_packet\n"); | ||
| 95 | |||
| 96 | if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, | ||
| 97 | matchlen, buffer, buflen)) | ||
| 98 | goto out; | ||
| 99 | |||
| 139 | return NF_ACCEPT; | 100 | return NF_ACCEPT; |
| 101 | |||
| 102 | out: | ||
| 103 | nf_ct_unexpect_related(exp); | ||
| 104 | return NF_DROP; | ||
| 140 | } | 105 | } |
| 141 | 106 | ||
| 142 | static void __exit nf_nat_ftp_fini(void) | 107 | static void __exit nf_nat_ftp_fini(void) |
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 7e8e6fc75413..5045196d853c 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c | |||
| @@ -10,7 +10,6 @@ | |||
| 10 | */ | 10 | */ |
| 11 | 11 | ||
| 12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
| 13 | #include <linux/moduleparam.h> | ||
| 14 | #include <linux/tcp.h> | 13 | #include <linux/tcp.h> |
| 15 | #include <net/tcp.h> | 14 | #include <net/tcp.h> |
| 16 | 15 | ||
| @@ -44,7 +43,7 @@ static int set_addr(struct sk_buff *skb, | |||
| 44 | addroff, sizeof(buf), | 43 | addroff, sizeof(buf), |
| 45 | (char *) &buf, sizeof(buf))) { | 44 | (char *) &buf, sizeof(buf))) { |
| 46 | if (net_ratelimit()) | 45 | if (net_ratelimit()) |
| 47 | printk("nf_nat_h323: nf_nat_mangle_tcp_packet" | 46 | pr_notice("nf_nat_h323: nf_nat_mangle_tcp_packet" |
| 48 | " error\n"); | 47 | " error\n"); |
| 49 | return -1; | 48 | return -1; |
| 50 | } | 49 | } |
| @@ -60,7 +59,7 @@ static int set_addr(struct sk_buff *skb, | |||
| 60 | addroff, sizeof(buf), | 59 | addroff, sizeof(buf), |
| 61 | (char *) &buf, sizeof(buf))) { | 60 | (char *) &buf, sizeof(buf))) { |
| 62 | if (net_ratelimit()) | 61 | if (net_ratelimit()) |
| 63 | printk("nf_nat_h323: nf_nat_mangle_udp_packet" | 62 | pr_notice("nf_nat_h323: nf_nat_mangle_udp_packet" |
| 64 | " error\n"); | 63 | " error\n"); |
| 65 | return -1; | 64 | return -1; |
| 66 | } | 65 | } |
| @@ -216,7 +215,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, | |||
| 216 | /* Run out of expectations */ | 215 | /* Run out of expectations */ |
| 217 | if (i >= H323_RTP_CHANNEL_MAX) { | 216 | if (i >= H323_RTP_CHANNEL_MAX) { |
| 218 | if (net_ratelimit()) | 217 | if (net_ratelimit()) |
| 219 | printk("nf_nat_h323: out of expectations\n"); | 218 | pr_notice("nf_nat_h323: out of expectations\n"); |
| 220 | return 0; | 219 | return 0; |
| 221 | } | 220 | } |
| 222 | 221 | ||
| @@ -235,7 +234,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, | |||
| 235 | 234 | ||
| 236 | if (nated_port == 0) { /* No port available */ | 235 | if (nated_port == 0) { /* No port available */ |
| 237 | if (net_ratelimit()) | 236 | if (net_ratelimit()) |
| 238 | printk("nf_nat_h323: out of RTP ports\n"); | 237 | pr_notice("nf_nat_h323: out of RTP ports\n"); |
| 239 | return 0; | 238 | return 0; |
| 240 | } | 239 | } |
| 241 | 240 | ||
| @@ -292,7 +291,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, | |||
| 292 | 291 | ||
| 293 | if (nated_port == 0) { /* No port available */ | 292 | if (nated_port == 0) { /* No port available */ |
| 294 | if (net_ratelimit()) | 293 | if (net_ratelimit()) |
| 295 | printk("nf_nat_h323: out of TCP ports\n"); | 294 | pr_notice("nf_nat_h323: out of TCP ports\n"); |
| 296 | return 0; | 295 | return 0; |
| 297 | } | 296 | } |
| 298 | 297 | ||
| @@ -342,7 +341,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, | |||
| 342 | 341 | ||
| 343 | if (nated_port == 0) { /* No port available */ | 342 | if (nated_port == 0) { /* No port available */ |
| 344 | if (net_ratelimit()) | 343 | if (net_ratelimit()) |
| 345 | printk("nf_nat_q931: out of TCP ports\n"); | 344 | pr_notice("nf_nat_q931: out of TCP ports\n"); |
| 346 | return 0; | 345 | return 0; |
| 347 | } | 346 | } |
| 348 | 347 | ||
| @@ -426,7 +425,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, | |||
| 426 | 425 | ||
| 427 | if (nated_port == 0) { /* No port available */ | 426 | if (nated_port == 0) { /* No port available */ |
| 428 | if (net_ratelimit()) | 427 | if (net_ratelimit()) |
| 429 | printk("nf_nat_ras: out of TCP ports\n"); | 428 | pr_notice("nf_nat_ras: out of TCP ports\n"); |
| 430 | return 0; | 429 | return 0; |
| 431 | } | 430 | } |
| 432 | 431 | ||
| @@ -508,7 +507,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, | |||
| 508 | 507 | ||
| 509 | if (nated_port == 0) { /* No port available */ | 508 | if (nated_port == 0) { /* No port available */ |
| 510 | if (net_ratelimit()) | 509 | if (net_ratelimit()) |
| 511 | printk("nf_nat_q931: out of TCP ports\n"); | 510 | pr_notice("nf_nat_q931: out of TCP ports\n"); |
| 512 | return 0; | 511 | return 0; |
| 513 | } | 512 | } |
| 514 | 513 | ||
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 7f10a6be0191..4a0c6b548eee 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | * published by the Free Software Foundation. | 8 | * published by the Free Software Foundation. |
| 9 | */ | 9 | */ |
| 10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
| 11 | #include <linux/gfp.h> | ||
| 11 | #include <linux/kmod.h> | 12 | #include <linux/kmod.h> |
| 12 | #include <linux/types.h> | 13 | #include <linux/types.h> |
| 13 | #include <linux/timer.h> | 14 | #include <linux/timer.h> |
| @@ -141,6 +142,17 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra) | |||
| 141 | return 1; | 142 | return 1; |
| 142 | } | 143 | } |
| 143 | 144 | ||
| 145 | void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, | ||
| 146 | __be32 seq, s16 off) | ||
| 147 | { | ||
| 148 | if (!off) | ||
| 149 | return; | ||
| 150 | set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); | ||
| 151 | adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo); | ||
| 152 | nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); | ||
| 153 | } | ||
| 154 | EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); | ||
| 155 | |||
| 144 | /* Generic function for mangling variable-length address changes inside | 156 | /* Generic function for mangling variable-length address changes inside |
| 145 | * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX | 157 | * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX |
| 146 | * command in FTP). | 158 | * command in FTP). |
| @@ -149,14 +161,13 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra) | |||
| 149 | * skb enlargement, ... | 161 | * skb enlargement, ... |
| 150 | * | 162 | * |
| 151 | * */ | 163 | * */ |
| 152 | int | 164 | int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, |
| 153 | nf_nat_mangle_tcp_packet(struct sk_buff *skb, | 165 | struct nf_conn *ct, |
| 154 | struct nf_conn *ct, | 166 | enum ip_conntrack_info ctinfo, |
| 155 | enum ip_conntrack_info ctinfo, | 167 | unsigned int match_offset, |
| 156 | unsigned int match_offset, | 168 | unsigned int match_len, |
| 157 | unsigned int match_len, | 169 | const char *rep_buffer, |
| 158 | const char *rep_buffer, | 170 | unsigned int rep_len, bool adjust) |
| 159 | unsigned int rep_len) | ||
| 160 | { | 171 | { |
| 161 | struct rtable *rt = skb_rtable(skb); | 172 | struct rtable *rt = skb_rtable(skb); |
| 162 | struct iphdr *iph; | 173 | struct iphdr *iph; |
| @@ -202,16 +213,13 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, | |||
| 202 | inet_proto_csum_replace2(&tcph->check, skb, | 213 | inet_proto_csum_replace2(&tcph->check, skb, |
| 203 | htons(oldlen), htons(datalen), 1); | 214 | htons(oldlen), htons(datalen), 1); |
| 204 | 215 | ||
| 205 | if (rep_len != match_len) { | 216 | if (adjust && rep_len != match_len) |
| 206 | set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); | 217 | nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq, |
| 207 | adjust_tcp_sequence(ntohl(tcph->seq), | 218 | (int)rep_len - (int)match_len); |
| 208 | (int)rep_len - (int)match_len, | 219 | |
| 209 | ct, ctinfo); | ||
| 210 | nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); | ||
| 211 | } | ||
| 212 | return 1; | 220 | return 1; |
| 213 | } | 221 | } |
| 214 | EXPORT_SYMBOL(nf_nat_mangle_tcp_packet); | 222 | EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet); |
| 215 | 223 | ||
| 216 | /* Generic function for mangling variable-length address changes inside | 224 | /* Generic function for mangling variable-length address changes inside |
| 217 | * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX | 225 | * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX |
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 9eb171056c63..4c060038d29f 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | #include <net/netfilter/nf_nat_rule.h> | 25 | #include <net/netfilter/nf_nat_rule.h> |
| 26 | #include <net/netfilter/nf_conntrack_helper.h> | 26 | #include <net/netfilter/nf_conntrack_helper.h> |
| 27 | #include <net/netfilter/nf_conntrack_expect.h> | 27 | #include <net/netfilter/nf_conntrack_expect.h> |
| 28 | #include <net/netfilter/nf_conntrack_zones.h> | ||
| 28 | #include <linux/netfilter/nf_conntrack_proto_gre.h> | 29 | #include <linux/netfilter/nf_conntrack_proto_gre.h> |
| 29 | #include <linux/netfilter/nf_conntrack_pptp.h> | 30 | #include <linux/netfilter/nf_conntrack_pptp.h> |
| 30 | 31 | ||
| @@ -74,7 +75,7 @@ static void pptp_nat_expected(struct nf_conn *ct, | |||
| 74 | 75 | ||
| 75 | pr_debug("trying to unexpect other dir: "); | 76 | pr_debug("trying to unexpect other dir: "); |
| 76 | nf_ct_dump_tuple_ip(&t); | 77 | nf_ct_dump_tuple_ip(&t); |
| 77 | other_exp = nf_ct_expect_find_get(net, &t); | 78 | other_exp = nf_ct_expect_find_get(net, nf_ct_zone(ct), &t); |
| 78 | if (other_exp) { | 79 | if (other_exp) { |
| 79 | nf_ct_unexpect_related(other_exp); | 80 | nf_ct_unexpect_related(other_exp); |
| 80 | nf_ct_expect_put(other_exp); | 81 | nf_ct_expect_put(other_exp); |
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index 6c4f11f51446..3e61faf23a9a 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c | |||
| @@ -34,7 +34,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple, | |||
| 34 | } | 34 | } |
| 35 | EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); | 35 | EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); |
| 36 | 36 | ||
| 37 | bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, | 37 | void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, |
| 38 | const struct nf_nat_range *range, | 38 | const struct nf_nat_range *range, |
| 39 | enum nf_nat_manip_type maniptype, | 39 | enum nf_nat_manip_type maniptype, |
| 40 | const struct nf_conn *ct, | 40 | const struct nf_conn *ct, |
| @@ -53,7 +53,7 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
| 53 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { | 53 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { |
| 54 | /* If it's dst rewrite, can't change port */ | 54 | /* If it's dst rewrite, can't change port */ |
| 55 | if (maniptype == IP_NAT_MANIP_DST) | 55 | if (maniptype == IP_NAT_MANIP_DST) |
| 56 | return false; | 56 | return; |
| 57 | 57 | ||
| 58 | if (ntohs(*portptr) < 1024) { | 58 | if (ntohs(*portptr) < 1024) { |
| 59 | /* Loose convention: >> 512 is credential passing */ | 59 | /* Loose convention: >> 512 is credential passing */ |
| @@ -81,15 +81,15 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
| 81 | else | 81 | else |
| 82 | off = *rover; | 82 | off = *rover; |
| 83 | 83 | ||
| 84 | for (i = 0; i < range_size; i++, off++) { | 84 | for (i = 0; ; ++off) { |
| 85 | *portptr = htons(min + off % range_size); | 85 | *portptr = htons(min + off % range_size); |
| 86 | if (nf_nat_used_tuple(tuple, ct)) | 86 | if (++i != range_size && nf_nat_used_tuple(tuple, ct)) |
| 87 | continue; | 87 | continue; |
| 88 | if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) | 88 | if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) |
| 89 | *rover = off; | 89 | *rover = off; |
| 90 | return true; | 90 | return; |
| 91 | } | 91 | } |
| 92 | return false; | 92 | return; |
| 93 | } | 93 | } |
| 94 | EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); | 94 | EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); |
| 95 | 95 | ||
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c index 22485ce306d4..570faf2667b2 100644 --- a/net/ipv4/netfilter/nf_nat_proto_dccp.c +++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c | |||
| @@ -22,14 +22,14 @@ | |||
| 22 | 22 | ||
| 23 | static u_int16_t dccp_port_rover; | 23 | static u_int16_t dccp_port_rover; |
| 24 | 24 | ||
| 25 | static bool | 25 | static void |
| 26 | dccp_unique_tuple(struct nf_conntrack_tuple *tuple, | 26 | dccp_unique_tuple(struct nf_conntrack_tuple *tuple, |
| 27 | const struct nf_nat_range *range, | 27 | const struct nf_nat_range *range, |
| 28 | enum nf_nat_manip_type maniptype, | 28 | enum nf_nat_manip_type maniptype, |
| 29 | const struct nf_conn *ct) | 29 | const struct nf_conn *ct) |
| 30 | { | 30 | { |
| 31 | return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, | 31 | nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, |
| 32 | &dccp_port_rover); | 32 | &dccp_port_rover); |
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | static bool | 35 | static bool |
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index d7e89201351e..bc8d83a31c73 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c | |||
| @@ -37,7 +37,7 @@ MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); | |||
| 37 | MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); | 37 | MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); |
| 38 | 38 | ||
| 39 | /* generate unique tuple ... */ | 39 | /* generate unique tuple ... */ |
| 40 | static bool | 40 | static void |
| 41 | gre_unique_tuple(struct nf_conntrack_tuple *tuple, | 41 | gre_unique_tuple(struct nf_conntrack_tuple *tuple, |
| 42 | const struct nf_nat_range *range, | 42 | const struct nf_nat_range *range, |
| 43 | enum nf_nat_manip_type maniptype, | 43 | enum nf_nat_manip_type maniptype, |
| @@ -50,7 +50,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
| 50 | /* If there is no master conntrack we are not PPTP, | 50 | /* If there is no master conntrack we are not PPTP, |
| 51 | do not change tuples */ | 51 | do not change tuples */ |
| 52 | if (!ct->master) | 52 | if (!ct->master) |
| 53 | return false; | 53 | return; |
| 54 | 54 | ||
| 55 | if (maniptype == IP_NAT_MANIP_SRC) | 55 | if (maniptype == IP_NAT_MANIP_SRC) |
| 56 | keyptr = &tuple->src.u.gre.key; | 56 | keyptr = &tuple->src.u.gre.key; |
| @@ -68,14 +68,14 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
| 68 | 68 | ||
| 69 | pr_debug("min = %u, range_size = %u\n", min, range_size); | 69 | pr_debug("min = %u, range_size = %u\n", min, range_size); |
| 70 | 70 | ||
| 71 | for (i = 0; i < range_size; i++, key++) { | 71 | for (i = 0; ; ++key) { |
| 72 | *keyptr = htons(min + key % range_size); | 72 | *keyptr = htons(min + key % range_size); |
| 73 | if (!nf_nat_used_tuple(tuple, ct)) | 73 | if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) |
| 74 | return true; | 74 | return; |
| 75 | } | 75 | } |
| 76 | 76 | ||
| 77 | pr_debug("%p: no NAT mapping\n", ct); | 77 | pr_debug("%p: no NAT mapping\n", ct); |
| 78 | return false; | 78 | return; |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | /* manipulate a GRE packet according to maniptype */ | 81 | /* manipulate a GRE packet according to maniptype */ |
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index 19a8b0b07d8e..5744c3ec847c 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c | |||
| @@ -27,7 +27,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple, | |||
| 27 | ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); | 27 | ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); |
| 28 | } | 28 | } |
| 29 | 29 | ||
| 30 | static bool | 30 | static void |
| 31 | icmp_unique_tuple(struct nf_conntrack_tuple *tuple, | 31 | icmp_unique_tuple(struct nf_conntrack_tuple *tuple, |
| 32 | const struct nf_nat_range *range, | 32 | const struct nf_nat_range *range, |
| 33 | enum nf_nat_manip_type maniptype, | 33 | enum nf_nat_manip_type maniptype, |
| @@ -42,13 +42,13 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
| 42 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) | 42 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) |
| 43 | range_size = 0xFFFF; | 43 | range_size = 0xFFFF; |
| 44 | 44 | ||
| 45 | for (i = 0; i < range_size; i++, id++) { | 45 | for (i = 0; ; ++id) { |
| 46 | tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + | 46 | tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + |
| 47 | (id % range_size)); | 47 | (id % range_size)); |
| 48 | if (!nf_nat_used_tuple(tuple, ct)) | 48 | if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) |
| 49 | return true; | 49 | return; |
| 50 | } | 50 | } |
| 51 | return false; | 51 | return; |
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | static bool | 54 | static bool |
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c index 3fc598eeeb1a..756331d42661 100644 --- a/net/ipv4/netfilter/nf_nat_proto_sctp.c +++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c | |||
| @@ -16,14 +16,14 @@ | |||
| 16 | 16 | ||
| 17 | static u_int16_t nf_sctp_port_rover; | 17 | static u_int16_t nf_sctp_port_rover; |
| 18 | 18 | ||
| 19 | static bool | 19 | static void |
| 20 | sctp_unique_tuple(struct nf_conntrack_tuple *tuple, | 20 | sctp_unique_tuple(struct nf_conntrack_tuple *tuple, |
| 21 | const struct nf_nat_range *range, | 21 | const struct nf_nat_range *range, |
| 22 | enum nf_nat_manip_type maniptype, | 22 | enum nf_nat_manip_type maniptype, |
| 23 | const struct nf_conn *ct) | 23 | const struct nf_conn *ct) |
| 24 | { | 24 | { |
| 25 | return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, | 25 | nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, |
| 26 | &nf_sctp_port_rover); | 26 | &nf_sctp_port_rover); |
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | static bool | 29 | static bool |
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c index 399e2cfa263b..aa460a595d5d 100644 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c | |||
| @@ -20,14 +20,13 @@ | |||
| 20 | 20 | ||
| 21 | static u_int16_t tcp_port_rover; | 21 | static u_int16_t tcp_port_rover; |
| 22 | 22 | ||
| 23 | static bool | 23 | static void |
| 24 | tcp_unique_tuple(struct nf_conntrack_tuple *tuple, | 24 | tcp_unique_tuple(struct nf_conntrack_tuple *tuple, |
| 25 | const struct nf_nat_range *range, | 25 | const struct nf_nat_range *range, |
| 26 | enum nf_nat_manip_type maniptype, | 26 | enum nf_nat_manip_type maniptype, |
| 27 | const struct nf_conn *ct) | 27 | const struct nf_conn *ct) |
| 28 | { | 28 | { |
| 29 | return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, | 29 | nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &tcp_port_rover); |
| 30 | &tcp_port_rover); | ||
| 31 | } | 30 | } |
| 32 | 31 | ||
| 33 | static bool | 32 | static bool |
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c index 9e61c79492e4..dfe65c7e2925 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c | |||
| @@ -19,14 +19,13 @@ | |||
| 19 | 19 | ||
| 20 | static u_int16_t udp_port_rover; | 20 | static u_int16_t udp_port_rover; |
| 21 | 21 | ||
| 22 | static bool | 22 | static void |
| 23 | udp_unique_tuple(struct nf_conntrack_tuple *tuple, | 23 | udp_unique_tuple(struct nf_conntrack_tuple *tuple, |
| 24 | const struct nf_nat_range *range, | 24 | const struct nf_nat_range *range, |
| 25 | enum nf_nat_manip_type maniptype, | 25 | enum nf_nat_manip_type maniptype, |
| 26 | const struct nf_conn *ct) | 26 | const struct nf_conn *ct) |
| 27 | { | 27 | { |
| 28 | return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, | 28 | nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &udp_port_rover); |
| 29 | &udp_port_rover); | ||
| 30 | } | 29 | } |
| 31 | 30 | ||
| 32 | static bool | 31 | static bool |
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c index 440a229bbd87..3cc8c8af39ef 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udplite.c +++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c | |||
| @@ -18,14 +18,14 @@ | |||
| 18 | 18 | ||
| 19 | static u_int16_t udplite_port_rover; | 19 | static u_int16_t udplite_port_rover; |
| 20 | 20 | ||
| 21 | static bool | 21 | static void |
| 22 | udplite_unique_tuple(struct nf_conntrack_tuple *tuple, | 22 | udplite_unique_tuple(struct nf_conntrack_tuple *tuple, |
| 23 | const struct nf_nat_range *range, | 23 | const struct nf_nat_range *range, |
| 24 | enum nf_nat_manip_type maniptype, | 24 | enum nf_nat_manip_type maniptype, |
| 25 | const struct nf_conn *ct) | 25 | const struct nf_conn *ct) |
| 26 | { | 26 | { |
| 27 | return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, | 27 | nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, |
| 28 | &udplite_port_rover); | 28 | &udplite_port_rover); |
| 29 | } | 29 | } |
| 30 | 30 | ||
| 31 | static bool | 31 | static bool |
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c index 14381c62acea..a50f2bc1c732 100644 --- a/net/ipv4/netfilter/nf_nat_proto_unknown.c +++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c | |||
| @@ -26,14 +26,14 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple, | |||
| 26 | return true; | 26 | return true; |
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | static bool unknown_unique_tuple(struct nf_conntrack_tuple *tuple, | 29 | static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple, |
| 30 | const struct nf_nat_range *range, | 30 | const struct nf_nat_range *range, |
| 31 | enum nf_nat_manip_type maniptype, | 31 | enum nf_nat_manip_type maniptype, |
| 32 | const struct nf_conn *ct) | 32 | const struct nf_conn *ct) |
| 33 | { | 33 | { |
| 34 | /* Sorry: we can't help you; if it's not unique, we can't frob | 34 | /* Sorry: we can't help you; if it's not unique, we can't frob |
| 35 | anything. */ | 35 | anything. */ |
| 36 | return false; | 36 | return; |
| 37 | } | 37 | } |
| 38 | 38 | ||
| 39 | static bool | 39 | static bool |
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 9e81e0dfb4ec..ebbd319f62f5 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | */ | 7 | */ |
| 8 | 8 | ||
| 9 | /* Everything about the rules for NAT. */ | 9 | /* Everything about the rules for NAT. */ |
| 10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 10 | #include <linux/types.h> | 11 | #include <linux/types.h> |
| 11 | #include <linux/ip.h> | 12 | #include <linux/ip.h> |
| 12 | #include <linux/netfilter.h> | 13 | #include <linux/netfilter.h> |
| @@ -15,6 +16,7 @@ | |||
| 15 | #include <linux/kmod.h> | 16 | #include <linux/kmod.h> |
| 16 | #include <linux/skbuff.h> | 17 | #include <linux/skbuff.h> |
| 17 | #include <linux/proc_fs.h> | 18 | #include <linux/proc_fs.h> |
| 19 | #include <linux/slab.h> | ||
| 18 | #include <net/checksum.h> | 20 | #include <net/checksum.h> |
| 19 | #include <net/route.h> | 21 | #include <net/route.h> |
| 20 | #include <linux/bitops.h> | 22 | #include <linux/bitops.h> |
| @@ -26,37 +28,8 @@ | |||
| 26 | 28 | ||
| 27 | #define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ | 29 | #define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ |
| 28 | (1 << NF_INET_POST_ROUTING) | \ | 30 | (1 << NF_INET_POST_ROUTING) | \ |
| 29 | (1 << NF_INET_LOCAL_OUT)) | 31 | (1 << NF_INET_LOCAL_OUT) | \ |
| 30 | 32 | (1 << NF_INET_LOCAL_IN)) | |
| 31 | static const struct | ||
| 32 | { | ||
| 33 | struct ipt_replace repl; | ||
| 34 | struct ipt_standard entries[3]; | ||
| 35 | struct ipt_error term; | ||
| 36 | } nat_initial_table __net_initdata = { | ||
| 37 | .repl = { | ||
| 38 | .name = "nat", | ||
| 39 | .valid_hooks = NAT_VALID_HOOKS, | ||
| 40 | .num_entries = 4, | ||
| 41 | .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), | ||
| 42 | .hook_entry = { | ||
| 43 | [NF_INET_PRE_ROUTING] = 0, | ||
| 44 | [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard), | ||
| 45 | [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 | ||
| 46 | }, | ||
| 47 | .underflow = { | ||
| 48 | [NF_INET_PRE_ROUTING] = 0, | ||
| 49 | [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard), | ||
| 50 | [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 | ||
| 51 | }, | ||
| 52 | }, | ||
| 53 | .entries = { | ||
| 54 | IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ | ||
| 55 | IPT_STANDARD_INIT(NF_ACCEPT), /* POST_ROUTING */ | ||
| 56 | IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ | ||
| 57 | }, | ||
| 58 | .term = IPT_ERROR_INIT, /* ERROR */ | ||
| 59 | }; | ||
| 60 | 33 | ||
| 61 | static const struct xt_table nat_table = { | 34 | static const struct xt_table nat_table = { |
| 62 | .name = "nat", | 35 | .name = "nat", |
| @@ -67,13 +40,14 @@ static const struct xt_table nat_table = { | |||
| 67 | 40 | ||
| 68 | /* Source NAT */ | 41 | /* Source NAT */ |
| 69 | static unsigned int | 42 | static unsigned int |
| 70 | ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par) | 43 | ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par) |
| 71 | { | 44 | { |
| 72 | struct nf_conn *ct; | 45 | struct nf_conn *ct; |
| 73 | enum ip_conntrack_info ctinfo; | 46 | enum ip_conntrack_info ctinfo; |
| 74 | const struct nf_nat_multi_range_compat *mr = par->targinfo; | 47 | const struct nf_nat_multi_range_compat *mr = par->targinfo; |
| 75 | 48 | ||
| 76 | NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); | 49 | NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING || |
| 50 | par->hooknum == NF_INET_LOCAL_IN); | ||
| 77 | 51 | ||
| 78 | ct = nf_ct_get(skb, &ctinfo); | 52 | ct = nf_ct_get(skb, &ctinfo); |
| 79 | 53 | ||
| @@ -86,7 +60,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par) | |||
| 86 | } | 60 | } |
| 87 | 61 | ||
| 88 | static unsigned int | 62 | static unsigned int |
| 89 | ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par) | 63 | ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par) |
| 90 | { | 64 | { |
| 91 | struct nf_conn *ct; | 65 | struct nf_conn *ct; |
| 92 | enum ip_conntrack_info ctinfo; | 66 | enum ip_conntrack_info ctinfo; |
| @@ -103,31 +77,31 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par) | |||
| 103 | return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST); | 77 | return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST); |
| 104 | } | 78 | } |
| 105 | 79 | ||
| 106 | static bool ipt_snat_checkentry(const struct xt_tgchk_param *par) | 80 | static int ipt_snat_checkentry(const struct xt_tgchk_param *par) |
| 107 | { | 81 | { |
| 108 | const struct nf_nat_multi_range_compat *mr = par->targinfo; | 82 | const struct nf_nat_multi_range_compat *mr = par->targinfo; |
| 109 | 83 | ||
| 110 | /* Must be a valid range */ | 84 | /* Must be a valid range */ |
| 111 | if (mr->rangesize != 1) { | 85 | if (mr->rangesize != 1) { |
| 112 | printk("SNAT: multiple ranges no longer supported\n"); | 86 | pr_info("SNAT: multiple ranges no longer supported\n"); |
| 113 | return false; | 87 | return -EINVAL; |
| 114 | } | 88 | } |
| 115 | return true; | 89 | return 0; |
| 116 | } | 90 | } |
| 117 | 91 | ||
| 118 | static bool ipt_dnat_checkentry(const struct xt_tgchk_param *par) | 92 | static int ipt_dnat_checkentry(const struct xt_tgchk_param *par) |
| 119 | { | 93 | { |
| 120 | const struct nf_nat_multi_range_compat *mr = par->targinfo; | 94 | const struct nf_nat_multi_range_compat *mr = par->targinfo; |
| 121 | 95 | ||
| 122 | /* Must be a valid range */ | 96 | /* Must be a valid range */ |
| 123 | if (mr->rangesize != 1) { | 97 | if (mr->rangesize != 1) { |
| 124 | printk("DNAT: multiple ranges no longer supported\n"); | 98 | pr_info("DNAT: multiple ranges no longer supported\n"); |
| 125 | return false; | 99 | return -EINVAL; |
| 126 | } | 100 | } |
| 127 | return true; | 101 | return 0; |
| 128 | } | 102 | } |
| 129 | 103 | ||
| 130 | unsigned int | 104 | static unsigned int |
| 131 | alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) | 105 | alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) |
| 132 | { | 106 | { |
| 133 | /* Force range to this IP; let proto decide mapping for | 107 | /* Force range to this IP; let proto decide mapping for |
| @@ -169,7 +143,7 @@ static struct xt_target ipt_snat_reg __read_mostly = { | |||
| 169 | .target = ipt_snat_target, | 143 | .target = ipt_snat_target, |
| 170 | .targetsize = sizeof(struct nf_nat_multi_range_compat), | 144 | .targetsize = sizeof(struct nf_nat_multi_range_compat), |
| 171 | .table = "nat", | 145 | .table = "nat", |
| 172 | .hooks = 1 << NF_INET_POST_ROUTING, | 146 | .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), |
| 173 | .checkentry = ipt_snat_checkentry, | 147 | .checkentry = ipt_snat_checkentry, |
| 174 | .family = AF_INET, | 148 | .family = AF_INET, |
| 175 | }; | 149 | }; |
| @@ -186,8 +160,13 @@ static struct xt_target ipt_dnat_reg __read_mostly = { | |||
| 186 | 160 | ||
| 187 | static int __net_init nf_nat_rule_net_init(struct net *net) | 161 | static int __net_init nf_nat_rule_net_init(struct net *net) |
| 188 | { | 162 | { |
| 189 | net->ipv4.nat_table = ipt_register_table(net, &nat_table, | 163 | struct ipt_replace *repl; |
| 190 | &nat_initial_table.repl); | 164 | |
| 165 | repl = ipt_alloc_initial_table(&nat_table); | ||
| 166 | if (repl == NULL) | ||
| 167 | return -ENOMEM; | ||
| 168 | net->ipv4.nat_table = ipt_register_table(net, &nat_table, repl); | ||
| 169 | kfree(repl); | ||
| 191 | if (IS_ERR(net->ipv4.nat_table)) | 170 | if (IS_ERR(net->ipv4.nat_table)) |
| 192 | return PTR_ERR(net->ipv4.nat_table); | 171 | return PTR_ERR(net->ipv4.nat_table); |
| 193 | return 0; | 172 | return 0; |
| @@ -195,7 +174,7 @@ static int __net_init nf_nat_rule_net_init(struct net *net) | |||
| 195 | 174 | ||
| 196 | static void __net_exit nf_nat_rule_net_exit(struct net *net) | 175 | static void __net_exit nf_nat_rule_net_exit(struct net *net) |
| 197 | { | 176 | { |
| 198 | ipt_unregister_table(net->ipv4.nat_table); | 177 | ipt_unregister_table(net, net->ipv4.nat_table); |
| 199 | } | 178 | } |
| 200 | 179 | ||
| 201 | static struct pernet_operations nf_nat_rule_net_ops = { | 180 | static struct pernet_operations nf_nat_rule_net_ops = { |
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 07d61a57613c..11b538deaaec 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* SIP extension for UDP NAT alteration. | 1 | /* SIP extension for NAT alteration. |
| 2 | * | 2 | * |
| 3 | * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar> | 3 | * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar> |
| 4 | * based on RR's ip_nat_ftp.c and other modules. | 4 | * based on RR's ip_nat_ftp.c and other modules. |
| @@ -15,6 +15,7 @@ | |||
| 15 | #include <linux/ip.h> | 15 | #include <linux/ip.h> |
| 16 | #include <net/ip.h> | 16 | #include <net/ip.h> |
| 17 | #include <linux/udp.h> | 17 | #include <linux/udp.h> |
| 18 | #include <linux/tcp.h> | ||
| 18 | 19 | ||
| 19 | #include <net/netfilter/nf_nat.h> | 20 | #include <net/netfilter/nf_nat.h> |
| 20 | #include <net/netfilter/nf_nat_helper.h> | 21 | #include <net/netfilter/nf_nat_helper.h> |
| @@ -29,25 +30,42 @@ MODULE_DESCRIPTION("SIP NAT helper"); | |||
| 29 | MODULE_ALIAS("ip_nat_sip"); | 30 | MODULE_ALIAS("ip_nat_sip"); |
| 30 | 31 | ||
| 31 | 32 | ||
| 32 | static unsigned int mangle_packet(struct sk_buff *skb, | 33 | static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff, |
| 33 | const char **dptr, unsigned int *datalen, | 34 | const char **dptr, unsigned int *datalen, |
| 34 | unsigned int matchoff, unsigned int matchlen, | 35 | unsigned int matchoff, unsigned int matchlen, |
| 35 | const char *buffer, unsigned int buflen) | 36 | const char *buffer, unsigned int buflen) |
| 36 | { | 37 | { |
| 37 | enum ip_conntrack_info ctinfo; | 38 | enum ip_conntrack_info ctinfo; |
| 38 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | 39 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
| 39 | 40 | struct tcphdr *th; | |
| 40 | if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen, | 41 | unsigned int baseoff; |
| 41 | buffer, buflen)) | 42 | |
| 42 | return 0; | 43 | if (nf_ct_protonum(ct) == IPPROTO_TCP) { |
| 44 | th = (struct tcphdr *)(skb->data + ip_hdrlen(skb)); | ||
| 45 | baseoff = ip_hdrlen(skb) + th->doff * 4; | ||
| 46 | matchoff += dataoff - baseoff; | ||
| 47 | |||
| 48 | if (!__nf_nat_mangle_tcp_packet(skb, ct, ctinfo, | ||
| 49 | matchoff, matchlen, | ||
| 50 | buffer, buflen, false)) | ||
| 51 | return 0; | ||
| 52 | } else { | ||
| 53 | baseoff = ip_hdrlen(skb) + sizeof(struct udphdr); | ||
| 54 | matchoff += dataoff - baseoff; | ||
| 55 | |||
| 56 | if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, | ||
| 57 | matchoff, matchlen, | ||
| 58 | buffer, buflen)) | ||
| 59 | return 0; | ||
| 60 | } | ||
| 43 | 61 | ||
| 44 | /* Reload data pointer and adjust datalen value */ | 62 | /* Reload data pointer and adjust datalen value */ |
| 45 | *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); | 63 | *dptr = skb->data + dataoff; |
| 46 | *datalen += buflen - matchlen; | 64 | *datalen += buflen - matchlen; |
| 47 | return 1; | 65 | return 1; |
| 48 | } | 66 | } |
| 49 | 67 | ||
| 50 | static int map_addr(struct sk_buff *skb, | 68 | static int map_addr(struct sk_buff *skb, unsigned int dataoff, |
| 51 | const char **dptr, unsigned int *datalen, | 69 | const char **dptr, unsigned int *datalen, |
| 52 | unsigned int matchoff, unsigned int matchlen, | 70 | unsigned int matchoff, unsigned int matchlen, |
| 53 | union nf_inet_addr *addr, __be16 port) | 71 | union nf_inet_addr *addr, __be16 port) |
| @@ -76,11 +94,11 @@ static int map_addr(struct sk_buff *skb, | |||
| 76 | 94 | ||
| 77 | buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport)); | 95 | buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport)); |
| 78 | 96 | ||
| 79 | return mangle_packet(skb, dptr, datalen, matchoff, matchlen, | 97 | return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, |
| 80 | buffer, buflen); | 98 | buffer, buflen); |
| 81 | } | 99 | } |
| 82 | 100 | ||
| 83 | static int map_sip_addr(struct sk_buff *skb, | 101 | static int map_sip_addr(struct sk_buff *skb, unsigned int dataoff, |
| 84 | const char **dptr, unsigned int *datalen, | 102 | const char **dptr, unsigned int *datalen, |
| 85 | enum sip_header_types type) | 103 | enum sip_header_types type) |
| 86 | { | 104 | { |
| @@ -93,16 +111,18 @@ static int map_sip_addr(struct sk_buff *skb, | |||
| 93 | if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL, | 111 | if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL, |
| 94 | &matchoff, &matchlen, &addr, &port) <= 0) | 112 | &matchoff, &matchlen, &addr, &port) <= 0) |
| 95 | return 1; | 113 | return 1; |
| 96 | return map_addr(skb, dptr, datalen, matchoff, matchlen, &addr, port); | 114 | return map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, |
| 115 | &addr, port); | ||
| 97 | } | 116 | } |
| 98 | 117 | ||
| 99 | static unsigned int ip_nat_sip(struct sk_buff *skb, | 118 | static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, |
| 100 | const char **dptr, unsigned int *datalen) | 119 | const char **dptr, unsigned int *datalen) |
| 101 | { | 120 | { |
| 102 | enum ip_conntrack_info ctinfo; | 121 | enum ip_conntrack_info ctinfo; |
| 103 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | 122 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
| 104 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | 123 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
| 105 | unsigned int dataoff, matchoff, matchlen; | 124 | unsigned int coff, matchoff, matchlen; |
| 125 | enum sip_header_types hdr; | ||
| 106 | union nf_inet_addr addr; | 126 | union nf_inet_addr addr; |
| 107 | __be16 port; | 127 | __be16 port; |
| 108 | int request, in_header; | 128 | int request, in_header; |
| @@ -112,16 +132,21 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, | |||
| 112 | if (ct_sip_parse_request(ct, *dptr, *datalen, | 132 | if (ct_sip_parse_request(ct, *dptr, *datalen, |
| 113 | &matchoff, &matchlen, | 133 | &matchoff, &matchlen, |
| 114 | &addr, &port) > 0 && | 134 | &addr, &port) > 0 && |
| 115 | !map_addr(skb, dptr, datalen, matchoff, matchlen, | 135 | !map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, |
| 116 | &addr, port)) | 136 | &addr, port)) |
| 117 | return NF_DROP; | 137 | return NF_DROP; |
| 118 | request = 1; | 138 | request = 1; |
| 119 | } else | 139 | } else |
| 120 | request = 0; | 140 | request = 0; |
| 121 | 141 | ||
| 142 | if (nf_ct_protonum(ct) == IPPROTO_TCP) | ||
| 143 | hdr = SIP_HDR_VIA_TCP; | ||
| 144 | else | ||
| 145 | hdr = SIP_HDR_VIA_UDP; | ||
| 146 | |||
| 122 | /* Translate topmost Via header and parameters */ | 147 | /* Translate topmost Via header and parameters */ |
| 123 | if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, | 148 | if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, |
| 124 | SIP_HDR_VIA, NULL, &matchoff, &matchlen, | 149 | hdr, NULL, &matchoff, &matchlen, |
| 125 | &addr, &port) > 0) { | 150 | &addr, &port) > 0) { |
| 126 | unsigned int matchend, poff, plen, buflen, n; | 151 | unsigned int matchend, poff, plen, buflen, n; |
| 127 | char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; | 152 | char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; |
| @@ -138,7 +163,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, | |||
| 138 | goto next; | 163 | goto next; |
| 139 | } | 164 | } |
| 140 | 165 | ||
| 141 | if (!map_addr(skb, dptr, datalen, matchoff, matchlen, | 166 | if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, |
| 142 | &addr, port)) | 167 | &addr, port)) |
| 143 | return NF_DROP; | 168 | return NF_DROP; |
| 144 | 169 | ||
| @@ -153,8 +178,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, | |||
| 153 | addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { | 178 | addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { |
| 154 | buflen = sprintf(buffer, "%pI4", | 179 | buflen = sprintf(buffer, "%pI4", |
| 155 | &ct->tuplehash[!dir].tuple.dst.u3.ip); | 180 | &ct->tuplehash[!dir].tuple.dst.u3.ip); |
| 156 | if (!mangle_packet(skb, dptr, datalen, poff, plen, | 181 | if (!mangle_packet(skb, dataoff, dptr, datalen, |
| 157 | buffer, buflen)) | 182 | poff, plen, buffer, buflen)) |
| 158 | return NF_DROP; | 183 | return NF_DROP; |
| 159 | } | 184 | } |
| 160 | 185 | ||
| @@ -167,8 +192,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, | |||
| 167 | addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { | 192 | addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { |
| 168 | buflen = sprintf(buffer, "%pI4", | 193 | buflen = sprintf(buffer, "%pI4", |
| 169 | &ct->tuplehash[!dir].tuple.src.u3.ip); | 194 | &ct->tuplehash[!dir].tuple.src.u3.ip); |
| 170 | if (!mangle_packet(skb, dptr, datalen, poff, plen, | 195 | if (!mangle_packet(skb, dataoff, dptr, datalen, |
| 171 | buffer, buflen)) | 196 | poff, plen, buffer, buflen)) |
| 172 | return NF_DROP; | 197 | return NF_DROP; |
| 173 | } | 198 | } |
| 174 | 199 | ||
| @@ -181,31 +206,45 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, | |||
| 181 | htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) { | 206 | htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) { |
| 182 | __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port; | 207 | __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port; |
| 183 | buflen = sprintf(buffer, "%u", ntohs(p)); | 208 | buflen = sprintf(buffer, "%u", ntohs(p)); |
| 184 | if (!mangle_packet(skb, dptr, datalen, poff, plen, | 209 | if (!mangle_packet(skb, dataoff, dptr, datalen, |
| 185 | buffer, buflen)) | 210 | poff, plen, buffer, buflen)) |
| 186 | return NF_DROP; | 211 | return NF_DROP; |
| 187 | } | 212 | } |
| 188 | } | 213 | } |
| 189 | 214 | ||
| 190 | next: | 215 | next: |
| 191 | /* Translate Contact headers */ | 216 | /* Translate Contact headers */ |
| 192 | dataoff = 0; | 217 | coff = 0; |
| 193 | in_header = 0; | 218 | in_header = 0; |
| 194 | while (ct_sip_parse_header_uri(ct, *dptr, &dataoff, *datalen, | 219 | while (ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen, |
| 195 | SIP_HDR_CONTACT, &in_header, | 220 | SIP_HDR_CONTACT, &in_header, |
| 196 | &matchoff, &matchlen, | 221 | &matchoff, &matchlen, |
| 197 | &addr, &port) > 0) { | 222 | &addr, &port) > 0) { |
| 198 | if (!map_addr(skb, dptr, datalen, matchoff, matchlen, | 223 | if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, |
| 199 | &addr, port)) | 224 | &addr, port)) |
| 200 | return NF_DROP; | 225 | return NF_DROP; |
| 201 | } | 226 | } |
| 202 | 227 | ||
| 203 | if (!map_sip_addr(skb, dptr, datalen, SIP_HDR_FROM) || | 228 | if (!map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_FROM) || |
| 204 | !map_sip_addr(skb, dptr, datalen, SIP_HDR_TO)) | 229 | !map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_TO)) |
| 205 | return NF_DROP; | 230 | return NF_DROP; |
| 231 | |||
| 206 | return NF_ACCEPT; | 232 | return NF_ACCEPT; |
| 207 | } | 233 | } |
| 208 | 234 | ||
| 235 | static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off) | ||
| 236 | { | ||
| 237 | enum ip_conntrack_info ctinfo; | ||
| 238 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | ||
| 239 | const struct tcphdr *th; | ||
| 240 | |||
| 241 | if (nf_ct_protonum(ct) != IPPROTO_TCP || off == 0) | ||
| 242 | return; | ||
| 243 | |||
| 244 | th = (struct tcphdr *)(skb->data + ip_hdrlen(skb)); | ||
| 245 | nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); | ||
| 246 | } | ||
| 247 | |||
| 209 | /* Handles expected signalling connections and media streams */ | 248 | /* Handles expected signalling connections and media streams */ |
| 210 | static void ip_nat_sip_expected(struct nf_conn *ct, | 249 | static void ip_nat_sip_expected(struct nf_conn *ct, |
| 211 | struct nf_conntrack_expect *exp) | 250 | struct nf_conntrack_expect *exp) |
| @@ -232,7 +271,7 @@ static void ip_nat_sip_expected(struct nf_conn *ct, | |||
| 232 | } | 271 | } |
| 233 | } | 272 | } |
| 234 | 273 | ||
| 235 | static unsigned int ip_nat_sip_expect(struct sk_buff *skb, | 274 | static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff, |
| 236 | const char **dptr, unsigned int *datalen, | 275 | const char **dptr, unsigned int *datalen, |
| 237 | struct nf_conntrack_expect *exp, | 276 | struct nf_conntrack_expect *exp, |
| 238 | unsigned int matchoff, | 277 | unsigned int matchoff, |
| @@ -279,8 +318,8 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, | |||
| 279 | if (exp->tuple.dst.u3.ip != exp->saved_ip || | 318 | if (exp->tuple.dst.u3.ip != exp->saved_ip || |
| 280 | exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { | 319 | exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { |
| 281 | buflen = sprintf(buffer, "%pI4:%u", &newip, port); | 320 | buflen = sprintf(buffer, "%pI4:%u", &newip, port); |
| 282 | if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen, | 321 | if (!mangle_packet(skb, dataoff, dptr, datalen, |
| 283 | buffer, buflen)) | 322 | matchoff, matchlen, buffer, buflen)) |
| 284 | goto err; | 323 | goto err; |
| 285 | } | 324 | } |
| 286 | return NF_ACCEPT; | 325 | return NF_ACCEPT; |
| @@ -290,7 +329,7 @@ err: | |||
| 290 | return NF_DROP; | 329 | return NF_DROP; |
| 291 | } | 330 | } |
| 292 | 331 | ||
| 293 | static int mangle_content_len(struct sk_buff *skb, | 332 | static int mangle_content_len(struct sk_buff *skb, unsigned int dataoff, |
| 294 | const char **dptr, unsigned int *datalen) | 333 | const char **dptr, unsigned int *datalen) |
| 295 | { | 334 | { |
| 296 | enum ip_conntrack_info ctinfo; | 335 | enum ip_conntrack_info ctinfo; |
| @@ -312,12 +351,13 @@ static int mangle_content_len(struct sk_buff *skb, | |||
| 312 | return 0; | 351 | return 0; |
| 313 | 352 | ||
| 314 | buflen = sprintf(buffer, "%u", c_len); | 353 | buflen = sprintf(buffer, "%u", c_len); |
| 315 | return mangle_packet(skb, dptr, datalen, matchoff, matchlen, | 354 | return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, |
| 316 | buffer, buflen); | 355 | buffer, buflen); |
| 317 | } | 356 | } |
| 318 | 357 | ||
| 319 | static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr, | 358 | static int mangle_sdp_packet(struct sk_buff *skb, unsigned int dataoff, |
| 320 | unsigned int dataoff, unsigned int *datalen, | 359 | const char **dptr, unsigned int *datalen, |
| 360 | unsigned int sdpoff, | ||
| 321 | enum sdp_header_types type, | 361 | enum sdp_header_types type, |
| 322 | enum sdp_header_types term, | 362 | enum sdp_header_types term, |
| 323 | char *buffer, int buflen) | 363 | char *buffer, int buflen) |
| @@ -326,16 +366,16 @@ static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr, | |||
| 326 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | 366 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
| 327 | unsigned int matchlen, matchoff; | 367 | unsigned int matchlen, matchoff; |
| 328 | 368 | ||
| 329 | if (ct_sip_get_sdp_header(ct, *dptr, dataoff, *datalen, type, term, | 369 | if (ct_sip_get_sdp_header(ct, *dptr, sdpoff, *datalen, type, term, |
| 330 | &matchoff, &matchlen) <= 0) | 370 | &matchoff, &matchlen) <= 0) |
| 331 | return -ENOENT; | 371 | return -ENOENT; |
| 332 | return mangle_packet(skb, dptr, datalen, matchoff, matchlen, | 372 | return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, |
| 333 | buffer, buflen) ? 0 : -EINVAL; | 373 | buffer, buflen) ? 0 : -EINVAL; |
| 334 | } | 374 | } |
| 335 | 375 | ||
| 336 | static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, | 376 | static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, unsigned int dataoff, |
| 337 | unsigned int dataoff, | 377 | const char **dptr, unsigned int *datalen, |
| 338 | unsigned int *datalen, | 378 | unsigned int sdpoff, |
| 339 | enum sdp_header_types type, | 379 | enum sdp_header_types type, |
| 340 | enum sdp_header_types term, | 380 | enum sdp_header_types term, |
| 341 | const union nf_inet_addr *addr) | 381 | const union nf_inet_addr *addr) |
| @@ -344,16 +384,15 @@ static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, | |||
| 344 | unsigned int buflen; | 384 | unsigned int buflen; |
| 345 | 385 | ||
| 346 | buflen = sprintf(buffer, "%pI4", &addr->ip); | 386 | buflen = sprintf(buffer, "%pI4", &addr->ip); |
| 347 | if (mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term, | 387 | if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, type, term, |
| 348 | buffer, buflen)) | 388 | buffer, buflen)) |
| 349 | return 0; | 389 | return 0; |
| 350 | 390 | ||
| 351 | return mangle_content_len(skb, dptr, datalen); | 391 | return mangle_content_len(skb, dataoff, dptr, datalen); |
| 352 | } | 392 | } |
| 353 | 393 | ||
| 354 | static unsigned int ip_nat_sdp_port(struct sk_buff *skb, | 394 | static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int dataoff, |
| 355 | const char **dptr, | 395 | const char **dptr, unsigned int *datalen, |
| 356 | unsigned int *datalen, | ||
| 357 | unsigned int matchoff, | 396 | unsigned int matchoff, |
| 358 | unsigned int matchlen, | 397 | unsigned int matchlen, |
| 359 | u_int16_t port) | 398 | u_int16_t port) |
| @@ -362,16 +401,16 @@ static unsigned int ip_nat_sdp_port(struct sk_buff *skb, | |||
| 362 | unsigned int buflen; | 401 | unsigned int buflen; |
| 363 | 402 | ||
| 364 | buflen = sprintf(buffer, "%u", port); | 403 | buflen = sprintf(buffer, "%u", port); |
| 365 | if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen, | 404 | if (!mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, |
| 366 | buffer, buflen)) | 405 | buffer, buflen)) |
| 367 | return 0; | 406 | return 0; |
| 368 | 407 | ||
| 369 | return mangle_content_len(skb, dptr, datalen); | 408 | return mangle_content_len(skb, dataoff, dptr, datalen); |
| 370 | } | 409 | } |
| 371 | 410 | ||
| 372 | static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, | 411 | static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int dataoff, |
| 373 | unsigned int dataoff, | 412 | const char **dptr, unsigned int *datalen, |
| 374 | unsigned int *datalen, | 413 | unsigned int sdpoff, |
| 375 | const union nf_inet_addr *addr) | 414 | const union nf_inet_addr *addr) |
| 376 | { | 415 | { |
| 377 | char buffer[sizeof("nnn.nnn.nnn.nnn")]; | 416 | char buffer[sizeof("nnn.nnn.nnn.nnn")]; |
| @@ -379,12 +418,12 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, | |||
| 379 | 418 | ||
| 380 | /* Mangle session description owner and contact addresses */ | 419 | /* Mangle session description owner and contact addresses */ |
| 381 | buflen = sprintf(buffer, "%pI4", &addr->ip); | 420 | buflen = sprintf(buffer, "%pI4", &addr->ip); |
| 382 | if (mangle_sdp_packet(skb, dptr, dataoff, datalen, | 421 | if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, |
| 383 | SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, | 422 | SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, |
| 384 | buffer, buflen)) | 423 | buffer, buflen)) |
| 385 | return 0; | 424 | return 0; |
| 386 | 425 | ||
| 387 | switch (mangle_sdp_packet(skb, dptr, dataoff, datalen, | 426 | switch (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, |
| 388 | SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, | 427 | SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, |
| 389 | buffer, buflen)) { | 428 | buffer, buflen)) { |
| 390 | case 0: | 429 | case 0: |
| @@ -401,14 +440,13 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, | |||
| 401 | return 0; | 440 | return 0; |
| 402 | } | 441 | } |
| 403 | 442 | ||
| 404 | return mangle_content_len(skb, dptr, datalen); | 443 | return mangle_content_len(skb, dataoff, dptr, datalen); |
| 405 | } | 444 | } |
| 406 | 445 | ||
| 407 | /* So, this packet has hit the connection tracking matching code. | 446 | /* So, this packet has hit the connection tracking matching code. |
| 408 | Mangle it, and change the expectation to match the new version. */ | 447 | Mangle it, and change the expectation to match the new version. */ |
| 409 | static unsigned int ip_nat_sdp_media(struct sk_buff *skb, | 448 | static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff, |
| 410 | const char **dptr, | 449 | const char **dptr, unsigned int *datalen, |
| 411 | unsigned int *datalen, | ||
| 412 | struct nf_conntrack_expect *rtp_exp, | 450 | struct nf_conntrack_expect *rtp_exp, |
| 413 | struct nf_conntrack_expect *rtcp_exp, | 451 | struct nf_conntrack_expect *rtcp_exp, |
| 414 | unsigned int mediaoff, | 452 | unsigned int mediaoff, |
| @@ -456,7 +494,8 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, | |||
| 456 | 494 | ||
| 457 | /* Update media port. */ | 495 | /* Update media port. */ |
| 458 | if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port && | 496 | if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port && |
| 459 | !ip_nat_sdp_port(skb, dptr, datalen, mediaoff, medialen, port)) | 497 | !ip_nat_sdp_port(skb, dataoff, dptr, datalen, |
| 498 | mediaoff, medialen, port)) | ||
| 460 | goto err2; | 499 | goto err2; |
| 461 | 500 | ||
| 462 | return NF_ACCEPT; | 501 | return NF_ACCEPT; |
| @@ -471,6 +510,7 @@ err1: | |||
| 471 | static void __exit nf_nat_sip_fini(void) | 510 | static void __exit nf_nat_sip_fini(void) |
| 472 | { | 511 | { |
| 473 | rcu_assign_pointer(nf_nat_sip_hook, NULL); | 512 | rcu_assign_pointer(nf_nat_sip_hook, NULL); |
| 513 | rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, NULL); | ||
| 474 | rcu_assign_pointer(nf_nat_sip_expect_hook, NULL); | 514 | rcu_assign_pointer(nf_nat_sip_expect_hook, NULL); |
| 475 | rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL); | 515 | rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL); |
| 476 | rcu_assign_pointer(nf_nat_sdp_port_hook, NULL); | 516 | rcu_assign_pointer(nf_nat_sdp_port_hook, NULL); |
| @@ -482,12 +522,14 @@ static void __exit nf_nat_sip_fini(void) | |||
| 482 | static int __init nf_nat_sip_init(void) | 522 | static int __init nf_nat_sip_init(void) |
| 483 | { | 523 | { |
| 484 | BUG_ON(nf_nat_sip_hook != NULL); | 524 | BUG_ON(nf_nat_sip_hook != NULL); |
| 525 | BUG_ON(nf_nat_sip_seq_adjust_hook != NULL); | ||
| 485 | BUG_ON(nf_nat_sip_expect_hook != NULL); | 526 | BUG_ON(nf_nat_sip_expect_hook != NULL); |
| 486 | BUG_ON(nf_nat_sdp_addr_hook != NULL); | 527 | BUG_ON(nf_nat_sdp_addr_hook != NULL); |
| 487 | BUG_ON(nf_nat_sdp_port_hook != NULL); | 528 | BUG_ON(nf_nat_sdp_port_hook != NULL); |
| 488 | BUG_ON(nf_nat_sdp_session_hook != NULL); | 529 | BUG_ON(nf_nat_sdp_session_hook != NULL); |
| 489 | BUG_ON(nf_nat_sdp_media_hook != NULL); | 530 | BUG_ON(nf_nat_sdp_media_hook != NULL); |
| 490 | rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); | 531 | rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); |
| 532 | rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust); | ||
| 491 | rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect); | 533 | rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect); |
| 492 | rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr); | 534 | rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr); |
| 493 | rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port); | 535 | rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port); |
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index d9521f6f9ed0..ee5f419d0a56 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c | |||
| @@ -43,6 +43,7 @@ | |||
| 43 | #include <linux/moduleparam.h> | 43 | #include <linux/moduleparam.h> |
| 44 | #include <linux/types.h> | 44 | #include <linux/types.h> |
| 45 | #include <linux/kernel.h> | 45 | #include <linux/kernel.h> |
| 46 | #include <linux/slab.h> | ||
| 46 | #include <linux/in.h> | 47 | #include <linux/in.h> |
| 47 | #include <linux/ip.h> | 48 | #include <linux/ip.h> |
| 48 | #include <linux/udp.h> | 49 | #include <linux/udp.h> |
| @@ -400,7 +401,7 @@ static unsigned char asn1_octets_decode(struct asn1_ctx *ctx, | |||
| 400 | *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC); | 401 | *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC); |
| 401 | if (*octets == NULL) { | 402 | if (*octets == NULL) { |
| 402 | if (net_ratelimit()) | 403 | if (net_ratelimit()) |
| 403 | printk("OOM in bsalg (%d)\n", __LINE__); | 404 | pr_notice("OOM in bsalg (%d)\n", __LINE__); |
| 404 | return 0; | 405 | return 0; |
| 405 | } | 406 | } |
| 406 | 407 | ||
| @@ -451,7 +452,7 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx, | |||
| 451 | *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); | 452 | *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); |
| 452 | if (*oid == NULL) { | 453 | if (*oid == NULL) { |
| 453 | if (net_ratelimit()) | 454 | if (net_ratelimit()) |
| 454 | printk("OOM in bsalg (%d)\n", __LINE__); | 455 | pr_notice("OOM in bsalg (%d)\n", __LINE__); |
| 455 | return 0; | 456 | return 0; |
| 456 | } | 457 | } |
| 457 | 458 | ||
| @@ -728,7 +729,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, | |||
| 728 | if (*obj == NULL) { | 729 | if (*obj == NULL) { |
| 729 | kfree(id); | 730 | kfree(id); |
| 730 | if (net_ratelimit()) | 731 | if (net_ratelimit()) |
| 731 | printk("OOM in bsalg (%d)\n", __LINE__); | 732 | pr_notice("OOM in bsalg (%d)\n", __LINE__); |
| 732 | return 0; | 733 | return 0; |
| 733 | } | 734 | } |
| 734 | (*obj)->syntax.l[0] = l; | 735 | (*obj)->syntax.l[0] = l; |
| @@ -745,7 +746,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, | |||
| 745 | kfree(p); | 746 | kfree(p); |
| 746 | kfree(id); | 747 | kfree(id); |
| 747 | if (net_ratelimit()) | 748 | if (net_ratelimit()) |
| 748 | printk("OOM in bsalg (%d)\n", __LINE__); | 749 | pr_notice("OOM in bsalg (%d)\n", __LINE__); |
| 749 | return 0; | 750 | return 0; |
| 750 | } | 751 | } |
| 751 | memcpy((*obj)->syntax.c, p, len); | 752 | memcpy((*obj)->syntax.c, p, len); |
| @@ -760,7 +761,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, | |||
| 760 | if (*obj == NULL) { | 761 | if (*obj == NULL) { |
| 761 | kfree(id); | 762 | kfree(id); |
| 762 | if (net_ratelimit()) | 763 | if (net_ratelimit()) |
| 763 | printk("OOM in bsalg (%d)\n", __LINE__); | 764 | pr_notice("OOM in bsalg (%d)\n", __LINE__); |
| 764 | return 0; | 765 | return 0; |
| 765 | } | 766 | } |
| 766 | if (!asn1_null_decode(ctx, end)) { | 767 | if (!asn1_null_decode(ctx, end)) { |
| @@ -781,7 +782,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, | |||
| 781 | kfree(lp); | 782 | kfree(lp); |
| 782 | kfree(id); | 783 | kfree(id); |
| 783 | if (net_ratelimit()) | 784 | if (net_ratelimit()) |
| 784 | printk("OOM in bsalg (%d)\n", __LINE__); | 785 | pr_notice("OOM in bsalg (%d)\n", __LINE__); |
| 785 | return 0; | 786 | return 0; |
| 786 | } | 787 | } |
| 787 | memcpy((*obj)->syntax.ul, lp, len); | 788 | memcpy((*obj)->syntax.ul, lp, len); |
| @@ -802,7 +803,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, | |||
| 802 | kfree(p); | 803 | kfree(p); |
| 803 | kfree(id); | 804 | kfree(id); |
| 804 | if (net_ratelimit()) | 805 | if (net_ratelimit()) |
| 805 | printk("OOM in bsalg (%d)\n", __LINE__); | 806 | pr_notice("OOM in bsalg (%d)\n", __LINE__); |
| 806 | return 0; | 807 | return 0; |
| 807 | } | 808 | } |
| 808 | memcpy((*obj)->syntax.uc, p, len); | 809 | memcpy((*obj)->syntax.uc, p, len); |
| @@ -820,7 +821,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, | |||
| 820 | if (*obj == NULL) { | 821 | if (*obj == NULL) { |
| 821 | kfree(id); | 822 | kfree(id); |
| 822 | if (net_ratelimit()) | 823 | if (net_ratelimit()) |
| 823 | printk("OOM in bsalg (%d)\n", __LINE__); | 824 | pr_notice("OOM in bsalg (%d)\n", __LINE__); |
| 824 | return 0; | 825 | return 0; |
| 825 | } | 826 | } |
| 826 | (*obj)->syntax.ul[0] = ul; | 827 | (*obj)->syntax.ul[0] = ul; |
| @@ -892,13 +893,15 @@ static void fast_csum(__sum16 *csum, | |||
| 892 | unsigned char s[4]; | 893 | unsigned char s[4]; |
| 893 | 894 | ||
| 894 | if (offset & 1) { | 895 | if (offset & 1) { |
| 895 | s[0] = s[2] = 0; | 896 | s[0] = ~0; |
| 896 | s[1] = ~*optr; | 897 | s[1] = ~*optr; |
| 898 | s[2] = 0; | ||
| 897 | s[3] = *nptr; | 899 | s[3] = *nptr; |
| 898 | } else { | 900 | } else { |
| 899 | s[1] = s[3] = 0; | ||
| 900 | s[0] = ~*optr; | 901 | s[0] = ~*optr; |
| 902 | s[1] = ~0; | ||
| 901 | s[2] = *nptr; | 903 | s[2] = *nptr; |
| 904 | s[3] = 0; | ||
| 902 | } | 905 | } |
| 903 | 906 | ||
| 904 | *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum))); | 907 | *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum))); |
| @@ -1038,7 +1041,7 @@ static int snmp_parse_mangle(unsigned char *msg, | |||
| 1038 | unsigned int cls, con, tag, vers, pdutype; | 1041 | unsigned int cls, con, tag, vers, pdutype; |
| 1039 | struct asn1_ctx ctx; | 1042 | struct asn1_ctx ctx; |
| 1040 | struct asn1_octstr comm; | 1043 | struct asn1_octstr comm; |
| 1041 | struct snmp_object **obj; | 1044 | struct snmp_object *obj; |
| 1042 | 1045 | ||
| 1043 | if (debug > 1) | 1046 | if (debug > 1) |
| 1044 | hex_dump(msg, len); | 1047 | hex_dump(msg, len); |
| @@ -1148,43 +1151,34 @@ static int snmp_parse_mangle(unsigned char *msg, | |||
| 1148 | if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) | 1151 | if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) |
| 1149 | return 0; | 1152 | return 0; |
| 1150 | 1153 | ||
| 1151 | obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); | ||
| 1152 | if (obj == NULL) { | ||
| 1153 | if (net_ratelimit()) | ||
| 1154 | printk(KERN_WARNING "OOM in bsalg(%d)\n", __LINE__); | ||
| 1155 | return 0; | ||
| 1156 | } | ||
| 1157 | |||
| 1158 | while (!asn1_eoc_decode(&ctx, eoc)) { | 1154 | while (!asn1_eoc_decode(&ctx, eoc)) { |
| 1159 | unsigned int i; | 1155 | unsigned int i; |
| 1160 | 1156 | ||
| 1161 | if (!snmp_object_decode(&ctx, obj)) { | 1157 | if (!snmp_object_decode(&ctx, &obj)) { |
| 1162 | if (*obj) { | 1158 | if (obj) { |
| 1163 | kfree((*obj)->id); | 1159 | kfree(obj->id); |
| 1164 | kfree(*obj); | 1160 | kfree(obj); |
| 1165 | } | 1161 | } |
| 1166 | kfree(obj); | ||
| 1167 | return 0; | 1162 | return 0; |
| 1168 | } | 1163 | } |
| 1169 | 1164 | ||
| 1170 | if (debug > 1) { | 1165 | if (debug > 1) { |
| 1171 | printk(KERN_DEBUG "bsalg: object: "); | 1166 | printk(KERN_DEBUG "bsalg: object: "); |
| 1172 | for (i = 0; i < (*obj)->id_len; i++) { | 1167 | for (i = 0; i < obj->id_len; i++) { |
| 1173 | if (i > 0) | 1168 | if (i > 0) |
| 1174 | printk("."); | 1169 | printk("."); |
| 1175 | printk("%lu", (*obj)->id[i]); | 1170 | printk("%lu", obj->id[i]); |
| 1176 | } | 1171 | } |
| 1177 | printk(": type=%u\n", (*obj)->type); | 1172 | printk(": type=%u\n", obj->type); |
| 1178 | 1173 | ||
| 1179 | } | 1174 | } |
| 1180 | 1175 | ||
| 1181 | if ((*obj)->type == SNMP_IPADDR) | 1176 | if (obj->type == SNMP_IPADDR) |
| 1182 | mangle_address(ctx.begin, ctx.pointer - 4 , map, check); | 1177 | mangle_address(ctx.begin, ctx.pointer - 4 , map, check); |
| 1183 | 1178 | ||
| 1184 | kfree((*obj)->id); | 1179 | kfree(obj->id); |
| 1185 | kfree(*obj); | 1180 | kfree(obj); |
| 1186 | } | 1181 | } |
| 1187 | kfree(obj); | ||
| 1188 | 1182 | ||
| 1189 | if (!asn1_eoc_decode(&ctx, eoc)) | 1183 | if (!asn1_eoc_decode(&ctx, eoc)) |
| 1190 | return 0; | 1184 | return 0; |
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 5678e9562c15..95481fee8bdb 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | */ | 7 | */ |
| 8 | #include <linux/types.h> | 8 | #include <linux/types.h> |
| 9 | #include <linux/icmp.h> | 9 | #include <linux/icmp.h> |
| 10 | #include <linux/gfp.h> | ||
| 10 | #include <linux/ip.h> | 11 | #include <linux/ip.h> |
| 11 | #include <linux/netfilter.h> | 12 | #include <linux/netfilter.h> |
| 12 | #include <linux/netfilter_ipv4.h> | 13 | #include <linux/netfilter_ipv4.h> |
| @@ -97,7 +98,7 @@ nf_nat_fn(unsigned int hooknum, | |||
| 97 | return NF_ACCEPT; | 98 | return NF_ACCEPT; |
| 98 | 99 | ||
| 99 | /* Don't try to NAT if this packet is not conntracked */ | 100 | /* Don't try to NAT if this packet is not conntracked */ |
| 100 | if (ct == &nf_conntrack_untracked) | 101 | if (nf_ct_is_untracked(ct)) |
| 101 | return NF_ACCEPT; | 102 | return NF_ACCEPT; |
| 102 | 103 | ||
| 103 | nat = nfct_nat(ct); | 104 | nat = nfct_nat(ct); |
| @@ -130,16 +131,9 @@ nf_nat_fn(unsigned int hooknum, | |||
| 130 | if (!nf_nat_initialized(ct, maniptype)) { | 131 | if (!nf_nat_initialized(ct, maniptype)) { |
| 131 | unsigned int ret; | 132 | unsigned int ret; |
| 132 | 133 | ||
| 133 | if (hooknum == NF_INET_LOCAL_IN) | 134 | ret = nf_nat_rule_find(skb, hooknum, in, out, ct); |
| 134 | /* LOCAL_IN hook doesn't have a chain! */ | 135 | if (ret != NF_ACCEPT) |
| 135 | ret = alloc_null_binding(ct, hooknum); | ||
| 136 | else | ||
| 137 | ret = nf_nat_rule_find(skb, hooknum, in, out, | ||
| 138 | ct); | ||
| 139 | |||
| 140 | if (ret != NF_ACCEPT) { | ||
| 141 | return ret; | 136 | return ret; |
| 142 | } | ||
| 143 | } else | 137 | } else |
| 144 | pr_debug("Already setup manip %s for ct %p\n", | 138 | pr_debug("Already setup manip %s for ct %p\n", |
| 145 | maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", | 139 | maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", |
| @@ -293,12 +287,12 @@ static int __init nf_nat_standalone_init(void) | |||
| 293 | #endif | 287 | #endif |
| 294 | ret = nf_nat_rule_init(); | 288 | ret = nf_nat_rule_init(); |
| 295 | if (ret < 0) { | 289 | if (ret < 0) { |
| 296 | printk("nf_nat_init: can't setup rules.\n"); | 290 | pr_err("nf_nat_init: can't setup rules.\n"); |
| 297 | goto cleanup_decode_session; | 291 | goto cleanup_decode_session; |
| 298 | } | 292 | } |
| 299 | ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); | 293 | ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); |
| 300 | if (ret < 0) { | 294 | if (ret < 0) { |
| 301 | printk("nf_nat_init: can't register hooks.\n"); | 295 | pr_err("nf_nat_init: can't register hooks.\n"); |
| 302 | goto cleanup_rule_init; | 296 | goto cleanup_rule_init; |
| 303 | } | 297 | } |
| 304 | return ret; | 298 | return ret; |
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c index b096e81500ae..7274a43c7a12 100644 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ b/net/ipv4/netfilter/nf_nat_tftp.c | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | */ | 6 | */ |
| 7 | 7 | ||
| 8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
| 9 | #include <linux/moduleparam.h> | ||
| 10 | #include <linux/udp.h> | 9 | #include <linux/udp.h> |
| 11 | 10 | ||
| 12 | #include <net/netfilter/nf_nat_helper.h> | 11 | #include <net/netfilter/nf_nat_helper.h> |
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index f25542c48b7d..4ae1f203f7cb 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
| @@ -127,8 +127,8 @@ static const struct snmp_mib snmp4_ipextstats_list[] = { | |||
| 127 | SNMP_MIB_SENTINEL | 127 | SNMP_MIB_SENTINEL |
| 128 | }; | 128 | }; |
| 129 | 129 | ||
| 130 | static struct { | 130 | static const struct { |
| 131 | char *name; | 131 | const char *name; |
| 132 | int index; | 132 | int index; |
| 133 | } icmpmibmap[] = { | 133 | } icmpmibmap[] = { |
| 134 | { "DestUnreachs", ICMP_DEST_UNREACH }, | 134 | { "DestUnreachs", ICMP_DEST_UNREACH }, |
| @@ -249,6 +249,10 @@ static const struct snmp_mib snmp4_net_list[] = { | |||
| 249 | SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED), | 249 | SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED), |
| 250 | SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED), | 250 | SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED), |
| 251 | SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK), | 251 | SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK), |
| 252 | SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP), | ||
| 253 | SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), | ||
| 254 | SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), | ||
| 255 | SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), | ||
| 252 | SNMP_MIB_SENTINEL | 256 | SNMP_MIB_SENTINEL |
| 253 | }; | 257 | }; |
| 254 | 258 | ||
| @@ -280,7 +284,7 @@ static void icmpmsg_put(struct seq_file *seq) | |||
| 280 | 284 | ||
| 281 | count = 0; | 285 | count = 0; |
| 282 | for (i = 0; i < ICMPMSG_MIB_MAX; i++) { | 286 | for (i = 0; i < ICMPMSG_MIB_MAX; i++) { |
| 283 | val = snmp_fold_field((void **) net->mib.icmpmsg_statistics, i); | 287 | val = snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, i); |
| 284 | if (val) { | 288 | if (val) { |
| 285 | type[count] = i; | 289 | type[count] = i; |
| 286 | vals[count++] = val; | 290 | vals[count++] = val; |
| @@ -307,18 +311,18 @@ static void icmp_put(struct seq_file *seq) | |||
| 307 | for (i=0; icmpmibmap[i].name != NULL; i++) | 311 | for (i=0; icmpmibmap[i].name != NULL; i++) |
| 308 | seq_printf(seq, " Out%s", icmpmibmap[i].name); | 312 | seq_printf(seq, " Out%s", icmpmibmap[i].name); |
| 309 | seq_printf(seq, "\nIcmp: %lu %lu", | 313 | seq_printf(seq, "\nIcmp: %lu %lu", |
| 310 | snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), | 314 | snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), |
| 311 | snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INERRORS)); | 315 | snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS)); |
| 312 | for (i=0; icmpmibmap[i].name != NULL; i++) | 316 | for (i=0; icmpmibmap[i].name != NULL; i++) |
| 313 | seq_printf(seq, " %lu", | 317 | seq_printf(seq, " %lu", |
| 314 | snmp_fold_field((void **) net->mib.icmpmsg_statistics, | 318 | snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, |
| 315 | icmpmibmap[i].index)); | 319 | icmpmibmap[i].index)); |
| 316 | seq_printf(seq, " %lu %lu", | 320 | seq_printf(seq, " %lu %lu", |
| 317 | snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), | 321 | snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), |
| 318 | snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); | 322 | snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); |
| 319 | for (i=0; icmpmibmap[i].name != NULL; i++) | 323 | for (i=0; icmpmibmap[i].name != NULL; i++) |
| 320 | seq_printf(seq, " %lu", | 324 | seq_printf(seq, " %lu", |
| 321 | snmp_fold_field((void **) net->mib.icmpmsg_statistics, | 325 | snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, |
| 322 | icmpmibmap[i].index | 0x100)); | 326 | icmpmibmap[i].index | 0x100)); |
| 323 | } | 327 | } |
| 324 | 328 | ||
| @@ -339,10 +343,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
| 339 | IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, | 343 | IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, |
| 340 | sysctl_ip_default_ttl); | 344 | sysctl_ip_default_ttl); |
| 341 | 345 | ||
| 346 | BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); | ||
| 342 | for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) | 347 | for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) |
| 343 | seq_printf(seq, " %lu", | 348 | seq_printf(seq, " %llu", |
| 344 | snmp_fold_field((void **)net->mib.ip_statistics, | 349 | snmp_fold_field64((void __percpu **)net->mib.ip_statistics, |
| 345 | snmp4_ipstats_list[i].entry)); | 350 | snmp4_ipstats_list[i].entry, |
| 351 | offsetof(struct ipstats_mib, syncp))); | ||
| 346 | 352 | ||
| 347 | icmp_put(seq); /* RFC 2011 compatibility */ | 353 | icmp_put(seq); /* RFC 2011 compatibility */ |
| 348 | icmpmsg_put(seq); | 354 | icmpmsg_put(seq); |
| @@ -356,11 +362,11 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
| 356 | /* MaxConn field is signed, RFC 2012 */ | 362 | /* MaxConn field is signed, RFC 2012 */ |
| 357 | if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) | 363 | if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) |
| 358 | seq_printf(seq, " %ld", | 364 | seq_printf(seq, " %ld", |
| 359 | snmp_fold_field((void **)net->mib.tcp_statistics, | 365 | snmp_fold_field((void __percpu **)net->mib.tcp_statistics, |
| 360 | snmp4_tcp_list[i].entry)); | 366 | snmp4_tcp_list[i].entry)); |
| 361 | else | 367 | else |
| 362 | seq_printf(seq, " %lu", | 368 | seq_printf(seq, " %lu", |
| 363 | snmp_fold_field((void **)net->mib.tcp_statistics, | 369 | snmp_fold_field((void __percpu **)net->mib.tcp_statistics, |
| 364 | snmp4_tcp_list[i].entry)); | 370 | snmp4_tcp_list[i].entry)); |
| 365 | } | 371 | } |
| 366 | 372 | ||
| @@ -371,7 +377,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
| 371 | seq_puts(seq, "\nUdp:"); | 377 | seq_puts(seq, "\nUdp:"); |
| 372 | for (i = 0; snmp4_udp_list[i].name != NULL; i++) | 378 | for (i = 0; snmp4_udp_list[i].name != NULL; i++) |
| 373 | seq_printf(seq, " %lu", | 379 | seq_printf(seq, " %lu", |
| 374 | snmp_fold_field((void **)net->mib.udp_statistics, | 380 | snmp_fold_field((void __percpu **)net->mib.udp_statistics, |
| 375 | snmp4_udp_list[i].entry)); | 381 | snmp4_udp_list[i].entry)); |
| 376 | 382 | ||
| 377 | /* the UDP and UDP-Lite MIBs are the same */ | 383 | /* the UDP and UDP-Lite MIBs are the same */ |
| @@ -382,7 +388,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
| 382 | seq_puts(seq, "\nUdpLite:"); | 388 | seq_puts(seq, "\nUdpLite:"); |
| 383 | for (i = 0; snmp4_udp_list[i].name != NULL; i++) | 389 | for (i = 0; snmp4_udp_list[i].name != NULL; i++) |
| 384 | seq_printf(seq, " %lu", | 390 | seq_printf(seq, " %lu", |
| 385 | snmp_fold_field((void **)net->mib.udplite_statistics, | 391 | snmp_fold_field((void __percpu **)net->mib.udplite_statistics, |
| 386 | snmp4_udp_list[i].entry)); | 392 | snmp4_udp_list[i].entry)); |
| 387 | 393 | ||
| 388 | seq_putc(seq, '\n'); | 394 | seq_putc(seq, '\n'); |
| @@ -419,7 +425,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) | |||
| 419 | seq_puts(seq, "\nTcpExt:"); | 425 | seq_puts(seq, "\nTcpExt:"); |
| 420 | for (i = 0; snmp4_net_list[i].name != NULL; i++) | 426 | for (i = 0; snmp4_net_list[i].name != NULL; i++) |
| 421 | seq_printf(seq, " %lu", | 427 | seq_printf(seq, " %lu", |
| 422 | snmp_fold_field((void **)net->mib.net_statistics, | 428 | snmp_fold_field((void __percpu **)net->mib.net_statistics, |
| 423 | snmp4_net_list[i].entry)); | 429 | snmp4_net_list[i].entry)); |
| 424 | 430 | ||
| 425 | seq_puts(seq, "\nIpExt:"); | 431 | seq_puts(seq, "\nIpExt:"); |
| @@ -428,9 +434,10 @@ static int netstat_seq_show(struct seq_file *seq, void *v) | |||
| 428 | 434 | ||
| 429 | seq_puts(seq, "\nIpExt:"); | 435 | seq_puts(seq, "\nIpExt:"); |
| 430 | for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) | 436 | for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) |
| 431 | seq_printf(seq, " %lu", | 437 | seq_printf(seq, " %llu", |
| 432 | snmp_fold_field((void **)net->mib.ip_statistics, | 438 | snmp_fold_field64((void __percpu **)net->mib.ip_statistics, |
| 433 | snmp4_ipextstats_list[i].entry)); | 439 | snmp4_ipextstats_list[i].entry, |
| 440 | offsetof(struct ipstats_mib, syncp))); | ||
| 434 | 441 | ||
| 435 | seq_putc(seq, '\n'); | 442 | seq_putc(seq, '\n'); |
| 436 | return 0; | 443 | return 0; |
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 542f22fc98b3..f2d297351405 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c | |||
| @@ -52,6 +52,7 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) | |||
| 52 | 52 | ||
| 53 | return ret; | 53 | return ret; |
| 54 | } | 54 | } |
| 55 | EXPORT_SYMBOL(inet_add_protocol); | ||
| 55 | 56 | ||
| 56 | /* | 57 | /* |
| 57 | * Remove a protocol from the hash tables. | 58 | * Remove a protocol from the hash tables. |
| @@ -76,6 +77,4 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) | |||
| 76 | 77 | ||
| 77 | return ret; | 78 | return ret; |
| 78 | } | 79 | } |
| 79 | |||
| 80 | EXPORT_SYMBOL(inet_add_protocol); | ||
| 81 | EXPORT_SYMBOL(inet_del_protocol); | 80 | EXPORT_SYMBOL(inet_del_protocol); |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index ce154b47f1da..009a7b2aa1ef 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
| @@ -60,7 +60,6 @@ | |||
| 60 | #include <net/net_namespace.h> | 60 | #include <net/net_namespace.h> |
| 61 | #include <net/dst.h> | 61 | #include <net/dst.h> |
| 62 | #include <net/sock.h> | 62 | #include <net/sock.h> |
| 63 | #include <linux/gfp.h> | ||
| 64 | #include <linux/ip.h> | 63 | #include <linux/ip.h> |
| 65 | #include <linux/net.h> | 64 | #include <linux/net.h> |
| 66 | #include <net/ip.h> | 65 | #include <net/ip.h> |
| @@ -291,7 +290,7 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb) | |||
| 291 | { | 290 | { |
| 292 | /* Charge it to the socket. */ | 291 | /* Charge it to the socket. */ |
| 293 | 292 | ||
| 294 | if (sock_queue_rcv_skb(sk, skb) < 0) { | 293 | if (ip_queue_rcv_skb(sk, skb) < 0) { |
| 295 | kfree_skb(skb); | 294 | kfree_skb(skb); |
| 296 | return NET_RX_DROP; | 295 | return NET_RX_DROP; |
| 297 | } | 296 | } |
| @@ -315,7 +314,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) | |||
| 315 | } | 314 | } |
| 316 | 315 | ||
| 317 | static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, | 316 | static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, |
| 318 | struct rtable *rt, | 317 | struct rtable **rtp, |
| 319 | unsigned int flags) | 318 | unsigned int flags) |
| 320 | { | 319 | { |
| 321 | struct inet_sock *inet = inet_sk(sk); | 320 | struct inet_sock *inet = inet_sk(sk); |
| @@ -324,25 +323,27 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, | |||
| 324 | struct sk_buff *skb; | 323 | struct sk_buff *skb; |
| 325 | unsigned int iphlen; | 324 | unsigned int iphlen; |
| 326 | int err; | 325 | int err; |
| 326 | struct rtable *rt = *rtp; | ||
| 327 | 327 | ||
| 328 | if (length > rt->u.dst.dev->mtu) { | 328 | if (length > rt->dst.dev->mtu) { |
| 329 | ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, | 329 | ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, |
| 330 | rt->u.dst.dev->mtu); | 330 | rt->dst.dev->mtu); |
| 331 | return -EMSGSIZE; | 331 | return -EMSGSIZE; |
| 332 | } | 332 | } |
| 333 | if (flags&MSG_PROBE) | 333 | if (flags&MSG_PROBE) |
| 334 | goto out; | 334 | goto out; |
| 335 | 335 | ||
| 336 | skb = sock_alloc_send_skb(sk, | 336 | skb = sock_alloc_send_skb(sk, |
| 337 | length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, | 337 | length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15, |
| 338 | flags & MSG_DONTWAIT, &err); | 338 | flags & MSG_DONTWAIT, &err); |
| 339 | if (skb == NULL) | 339 | if (skb == NULL) |
| 340 | goto error; | 340 | goto error; |
| 341 | skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); | 341 | skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev)); |
| 342 | 342 | ||
| 343 | skb->priority = sk->sk_priority; | 343 | skb->priority = sk->sk_priority; |
| 344 | skb->mark = sk->sk_mark; | 344 | skb->mark = sk->sk_mark; |
| 345 | skb_dst_set(skb, dst_clone(&rt->u.dst)); | 345 | skb_dst_set(skb, &rt->dst); |
| 346 | *rtp = NULL; | ||
| 346 | 347 | ||
| 347 | skb_reset_network_header(skb); | 348 | skb_reset_network_header(skb); |
| 348 | iph = ip_hdr(skb); | 349 | iph = ip_hdr(skb); |
| @@ -374,7 +375,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, | |||
| 374 | iph->check = 0; | 375 | iph->check = 0; |
| 375 | iph->tot_len = htons(length); | 376 | iph->tot_len = htons(length); |
| 376 | if (!iph->id) | 377 | if (!iph->id) |
| 377 | ip_select_ident(iph, &rt->u.dst, NULL); | 378 | ip_select_ident(iph, &rt->dst, NULL); |
| 378 | 379 | ||
| 379 | iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); | 380 | iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); |
| 380 | } | 381 | } |
| @@ -382,8 +383,8 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, | |||
| 382 | icmp_out_count(net, ((struct icmphdr *) | 383 | icmp_out_count(net, ((struct icmphdr *) |
| 383 | skb_transport_header(skb))->type); | 384 | skb_transport_header(skb))->type); |
| 384 | 385 | ||
| 385 | err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev, | 386 | err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL, |
| 386 | dst_output); | 387 | rt->dst.dev, dst_output); |
| 387 | if (err > 0) | 388 | if (err > 0) |
| 388 | err = net_xmit_errno(err); | 389 | err = net_xmit_errno(err); |
| 389 | if (err) | 390 | if (err) |
| @@ -577,7 +578,7 @@ back_from_confirm: | |||
| 577 | 578 | ||
| 578 | if (inet->hdrincl) | 579 | if (inet->hdrincl) |
| 579 | err = raw_send_hdrinc(sk, msg->msg_iov, len, | 580 | err = raw_send_hdrinc(sk, msg->msg_iov, len, |
| 580 | rt, msg->msg_flags); | 581 | &rt, msg->msg_flags); |
| 581 | 582 | ||
| 582 | else { | 583 | else { |
| 583 | if (!ipc.addr) | 584 | if (!ipc.addr) |
| @@ -605,7 +606,7 @@ out: | |||
| 605 | return len; | 606 | return len; |
| 606 | 607 | ||
| 607 | do_confirm: | 608 | do_confirm: |
| 608 | dst_confirm(&rt->u.dst); | 609 | dst_confirm(&rt->dst); |
| 609 | if (!(msg->msg_flags & MSG_PROBE) || len) | 610 | if (!(msg->msg_flags & MSG_PROBE) || len) |
| 610 | goto back_from_confirm; | 611 | goto back_from_confirm; |
| 611 | err = 0; | 612 | err = 0; |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d62b05d33384..ac6559cb54f9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
| @@ -90,6 +90,7 @@ | |||
| 90 | #include <linux/jhash.h> | 90 | #include <linux/jhash.h> |
| 91 | #include <linux/rcupdate.h> | 91 | #include <linux/rcupdate.h> |
| 92 | #include <linux/times.h> | 92 | #include <linux/times.h> |
| 93 | #include <linux/slab.h> | ||
| 93 | #include <net/dst.h> | 94 | #include <net/dst.h> |
| 94 | #include <net/net_namespace.h> | 95 | #include <net/net_namespace.h> |
| 95 | #include <net/protocol.h> | 96 | #include <net/protocol.h> |
| @@ -128,7 +129,6 @@ static int ip_rt_gc_elasticity __read_mostly = 8; | |||
| 128 | static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; | 129 | static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; |
| 129 | static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; | 130 | static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; |
| 130 | static int ip_rt_min_advmss __read_mostly = 256; | 131 | static int ip_rt_min_advmss __read_mostly = 256; |
| 131 | static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; | ||
| 132 | static int rt_chain_length_max __read_mostly = 20; | 132 | static int rt_chain_length_max __read_mostly = 20; |
| 133 | 133 | ||
| 134 | static struct delayed_work expires_work; | 134 | static struct delayed_work expires_work; |
| @@ -146,7 +146,6 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); | |||
| 146 | static void ipv4_link_failure(struct sk_buff *skb); | 146 | static void ipv4_link_failure(struct sk_buff *skb); |
| 147 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); | 147 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); |
| 148 | static int rt_garbage_collect(struct dst_ops *ops); | 148 | static int rt_garbage_collect(struct dst_ops *ops); |
| 149 | static void rt_emergency_hash_rebuild(struct net *net); | ||
| 150 | 149 | ||
| 151 | 150 | ||
| 152 | static struct dst_ops ipv4_dst_ops = { | 151 | static struct dst_ops ipv4_dst_ops = { |
| @@ -254,14 +253,12 @@ static unsigned rt_hash_mask __read_mostly; | |||
| 254 | static unsigned int rt_hash_log __read_mostly; | 253 | static unsigned int rt_hash_log __read_mostly; |
| 255 | 254 | ||
| 256 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); | 255 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); |
| 257 | #define RT_CACHE_STAT_INC(field) \ | 256 | #define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) |
| 258 | (__raw_get_cpu_var(rt_cache_stat).field++) | ||
| 259 | 257 | ||
| 260 | static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, | 258 | static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, |
| 261 | int genid) | 259 | int genid) |
| 262 | { | 260 | { |
| 263 | return jhash_3words((__force u32)(__be32)(daddr), | 261 | return jhash_3words((__force u32)daddr, (__force u32)saddr, |
| 264 | (__force u32)(__be32)(saddr), | ||
| 265 | idx, genid) | 262 | idx, genid) |
| 266 | & rt_hash_mask; | 263 | & rt_hash_mask; |
| 267 | } | 264 | } |
| @@ -287,12 +284,12 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) | |||
| 287 | if (!rt_hash_table[st->bucket].chain) | 284 | if (!rt_hash_table[st->bucket].chain) |
| 288 | continue; | 285 | continue; |
| 289 | rcu_read_lock_bh(); | 286 | rcu_read_lock_bh(); |
| 290 | r = rcu_dereference(rt_hash_table[st->bucket].chain); | 287 | r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); |
| 291 | while (r) { | 288 | while (r) { |
| 292 | if (dev_net(r->u.dst.dev) == seq_file_net(seq) && | 289 | if (dev_net(r->dst.dev) == seq_file_net(seq) && |
| 293 | r->rt_genid == st->genid) | 290 | r->rt_genid == st->genid) |
| 294 | return r; | 291 | return r; |
| 295 | r = rcu_dereference(r->u.dst.rt_next); | 292 | r = rcu_dereference_bh(r->dst.rt_next); |
| 296 | } | 293 | } |
| 297 | rcu_read_unlock_bh(); | 294 | rcu_read_unlock_bh(); |
| 298 | } | 295 | } |
| @@ -304,7 +301,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, | |||
| 304 | { | 301 | { |
| 305 | struct rt_cache_iter_state *st = seq->private; | 302 | struct rt_cache_iter_state *st = seq->private; |
| 306 | 303 | ||
| 307 | r = r->u.dst.rt_next; | 304 | r = r->dst.rt_next; |
| 308 | while (!r) { | 305 | while (!r) { |
| 309 | rcu_read_unlock_bh(); | 306 | rcu_read_unlock_bh(); |
| 310 | do { | 307 | do { |
| @@ -314,7 +311,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, | |||
| 314 | rcu_read_lock_bh(); | 311 | rcu_read_lock_bh(); |
| 315 | r = rt_hash_table[st->bucket].chain; | 312 | r = rt_hash_table[st->bucket].chain; |
| 316 | } | 313 | } |
| 317 | return rcu_dereference(r); | 314 | return rcu_dereference_bh(r); |
| 318 | } | 315 | } |
| 319 | 316 | ||
| 320 | static struct rtable *rt_cache_get_next(struct seq_file *seq, | 317 | static struct rtable *rt_cache_get_next(struct seq_file *seq, |
| @@ -322,7 +319,7 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq, | |||
| 322 | { | 319 | { |
| 323 | struct rt_cache_iter_state *st = seq->private; | 320 | struct rt_cache_iter_state *st = seq->private; |
| 324 | while ((r = __rt_cache_get_next(seq, r)) != NULL) { | 321 | while ((r = __rt_cache_get_next(seq, r)) != NULL) { |
| 325 | if (dev_net(r->u.dst.dev) != seq_file_net(seq)) | 322 | if (dev_net(r->dst.dev) != seq_file_net(seq)) |
| 326 | continue; | 323 | continue; |
| 327 | if (r->rt_genid == st->genid) | 324 | if (r->rt_genid == st->genid) |
| 328 | break; | 325 | break; |
| @@ -378,20 +375,21 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) | |||
| 378 | struct rtable *r = v; | 375 | struct rtable *r = v; |
| 379 | int len; | 376 | int len; |
| 380 | 377 | ||
| 381 | seq_printf(seq, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t" | 378 | seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" |
| 382 | "%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", | 379 | "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", |
| 383 | r->u.dst.dev ? r->u.dst.dev->name : "*", | 380 | r->dst.dev ? r->dst.dev->name : "*", |
| 384 | (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway, | 381 | (__force u32)r->rt_dst, |
| 385 | r->rt_flags, atomic_read(&r->u.dst.__refcnt), | 382 | (__force u32)r->rt_gateway, |
| 386 | r->u.dst.__use, 0, (unsigned long)r->rt_src, | 383 | r->rt_flags, atomic_read(&r->dst.__refcnt), |
| 387 | (dst_metric(&r->u.dst, RTAX_ADVMSS) ? | 384 | r->dst.__use, 0, (__force u32)r->rt_src, |
| 388 | (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0), | 385 | (dst_metric(&r->dst, RTAX_ADVMSS) ? |
| 389 | dst_metric(&r->u.dst, RTAX_WINDOW), | 386 | (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0), |
| 390 | (int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3) + | 387 | dst_metric(&r->dst, RTAX_WINDOW), |
| 391 | dst_metric(&r->u.dst, RTAX_RTTVAR)), | 388 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + |
| 389 | dst_metric(&r->dst, RTAX_RTTVAR)), | ||
| 392 | r->fl.fl4_tos, | 390 | r->fl.fl4_tos, |
| 393 | r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1, | 391 | r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, |
| 394 | r->u.dst.hh ? (r->u.dst.hh->hh_output == | 392 | r->dst.hh ? (r->dst.hh->hh_output == |
| 395 | dev_queue_xmit) : 0, | 393 | dev_queue_xmit) : 0, |
| 396 | r->rt_spec_dst, &len); | 394 | r->rt_spec_dst, &len); |
| 397 | 395 | ||
| @@ -610,13 +608,13 @@ static inline int ip_rt_proc_init(void) | |||
| 610 | 608 | ||
| 611 | static inline void rt_free(struct rtable *rt) | 609 | static inline void rt_free(struct rtable *rt) |
| 612 | { | 610 | { |
| 613 | call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); | 611 | call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); |
| 614 | } | 612 | } |
| 615 | 613 | ||
| 616 | static inline void rt_drop(struct rtable *rt) | 614 | static inline void rt_drop(struct rtable *rt) |
| 617 | { | 615 | { |
| 618 | ip_rt_put(rt); | 616 | ip_rt_put(rt); |
| 619 | call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); | 617 | call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); |
| 620 | } | 618 | } |
| 621 | 619 | ||
| 622 | static inline int rt_fast_clean(struct rtable *rth) | 620 | static inline int rt_fast_clean(struct rtable *rth) |
| @@ -624,13 +622,13 @@ static inline int rt_fast_clean(struct rtable *rth) | |||
| 624 | /* Kill broadcast/multicast entries very aggresively, if they | 622 | /* Kill broadcast/multicast entries very aggresively, if they |
| 625 | collide in hash table with more useful entries */ | 623 | collide in hash table with more useful entries */ |
| 626 | return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && | 624 | return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && |
| 627 | rth->fl.iif && rth->u.dst.rt_next; | 625 | rth->fl.iif && rth->dst.rt_next; |
| 628 | } | 626 | } |
| 629 | 627 | ||
| 630 | static inline int rt_valuable(struct rtable *rth) | 628 | static inline int rt_valuable(struct rtable *rth) |
| 631 | { | 629 | { |
| 632 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || | 630 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || |
| 633 | rth->u.dst.expires; | 631 | rth->dst.expires; |
| 634 | } | 632 | } |
| 635 | 633 | ||
| 636 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) | 634 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) |
| @@ -638,15 +636,15 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t | |||
| 638 | unsigned long age; | 636 | unsigned long age; |
| 639 | int ret = 0; | 637 | int ret = 0; |
| 640 | 638 | ||
| 641 | if (atomic_read(&rth->u.dst.__refcnt)) | 639 | if (atomic_read(&rth->dst.__refcnt)) |
| 642 | goto out; | 640 | goto out; |
| 643 | 641 | ||
| 644 | ret = 1; | 642 | ret = 1; |
| 645 | if (rth->u.dst.expires && | 643 | if (rth->dst.expires && |
| 646 | time_after_eq(jiffies, rth->u.dst.expires)) | 644 | time_after_eq(jiffies, rth->dst.expires)) |
| 647 | goto out; | 645 | goto out; |
| 648 | 646 | ||
| 649 | age = jiffies - rth->u.dst.lastuse; | 647 | age = jiffies - rth->dst.lastuse; |
| 650 | ret = 0; | 648 | ret = 0; |
| 651 | if ((age <= tmo1 && !rt_fast_clean(rth)) || | 649 | if ((age <= tmo1 && !rt_fast_clean(rth)) || |
| 652 | (age <= tmo2 && rt_valuable(rth))) | 650 | (age <= tmo2 && rt_valuable(rth))) |
| @@ -662,7 +660,7 @@ out: return ret; | |||
| 662 | */ | 660 | */ |
| 663 | static inline u32 rt_score(struct rtable *rt) | 661 | static inline u32 rt_score(struct rtable *rt) |
| 664 | { | 662 | { |
| 665 | u32 score = jiffies - rt->u.dst.lastuse; | 663 | u32 score = jiffies - rt->dst.lastuse; |
| 666 | 664 | ||
| 667 | score = ~score & ~(3<<30); | 665 | score = ~score & ~(3<<30); |
| 668 | 666 | ||
| @@ -685,30 +683,29 @@ static inline bool rt_caching(const struct net *net) | |||
| 685 | static inline bool compare_hash_inputs(const struct flowi *fl1, | 683 | static inline bool compare_hash_inputs(const struct flowi *fl1, |
| 686 | const struct flowi *fl2) | 684 | const struct flowi *fl2) |
| 687 | { | 685 | { |
| 688 | return (__force u32)(((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | | 686 | return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | |
| 689 | (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr) | | 687 | ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | |
| 690 | (fl1->iif ^ fl2->iif)) == 0); | 688 | (fl1->iif ^ fl2->iif)) == 0); |
| 691 | } | 689 | } |
| 692 | 690 | ||
| 693 | static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) | 691 | static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) |
| 694 | { | 692 | { |
| 695 | return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | | 693 | return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | |
| 696 | (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) | | 694 | ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | |
| 697 | (fl1->mark ^ fl2->mark) | | 695 | (fl1->mark ^ fl2->mark) | |
| 698 | (*(u16 *)&fl1->nl_u.ip4_u.tos ^ | 696 | (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) | |
| 699 | *(u16 *)&fl2->nl_u.ip4_u.tos) | | ||
| 700 | (fl1->oif ^ fl2->oif) | | 697 | (fl1->oif ^ fl2->oif) | |
| 701 | (fl1->iif ^ fl2->iif)) == 0; | 698 | (fl1->iif ^ fl2->iif)) == 0; |
| 702 | } | 699 | } |
| 703 | 700 | ||
| 704 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) | 701 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) |
| 705 | { | 702 | { |
| 706 | return net_eq(dev_net(rt1->u.dst.dev), dev_net(rt2->u.dst.dev)); | 703 | return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev)); |
| 707 | } | 704 | } |
| 708 | 705 | ||
| 709 | static inline int rt_is_expired(struct rtable *rth) | 706 | static inline int rt_is_expired(struct rtable *rth) |
| 710 | { | 707 | { |
| 711 | return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev)); | 708 | return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); |
| 712 | } | 709 | } |
| 713 | 710 | ||
| 714 | /* | 711 | /* |
| @@ -737,7 +734,7 @@ static void rt_do_flush(int process_context) | |||
| 737 | rth = rt_hash_table[i].chain; | 734 | rth = rt_hash_table[i].chain; |
| 738 | 735 | ||
| 739 | /* defer releasing the head of the list after spin_unlock */ | 736 | /* defer releasing the head of the list after spin_unlock */ |
| 740 | for (tail = rth; tail; tail = tail->u.dst.rt_next) | 737 | for (tail = rth; tail; tail = tail->dst.rt_next) |
| 741 | if (!rt_is_expired(tail)) | 738 | if (!rt_is_expired(tail)) |
| 742 | break; | 739 | break; |
| 743 | if (rth != tail) | 740 | if (rth != tail) |
| @@ -746,9 +743,9 @@ static void rt_do_flush(int process_context) | |||
| 746 | /* call rt_free on entries after the tail requiring flush */ | 743 | /* call rt_free on entries after the tail requiring flush */ |
| 747 | prev = &rt_hash_table[i].chain; | 744 | prev = &rt_hash_table[i].chain; |
| 748 | for (p = *prev; p; p = next) { | 745 | for (p = *prev; p; p = next) { |
| 749 | next = p->u.dst.rt_next; | 746 | next = p->dst.rt_next; |
| 750 | if (!rt_is_expired(p)) { | 747 | if (!rt_is_expired(p)) { |
| 751 | prev = &p->u.dst.rt_next; | 748 | prev = &p->dst.rt_next; |
| 752 | } else { | 749 | } else { |
| 753 | *prev = next; | 750 | *prev = next; |
| 754 | rt_free(p); | 751 | rt_free(p); |
| @@ -763,7 +760,7 @@ static void rt_do_flush(int process_context) | |||
| 763 | spin_unlock_bh(rt_hash_lock_addr(i)); | 760 | spin_unlock_bh(rt_hash_lock_addr(i)); |
| 764 | 761 | ||
| 765 | for (; rth != tail; rth = next) { | 762 | for (; rth != tail; rth = next) { |
| 766 | next = rth->u.dst.rt_next; | 763 | next = rth->dst.rt_next; |
| 767 | rt_free(rth); | 764 | rt_free(rth); |
| 768 | } | 765 | } |
| 769 | } | 766 | } |
| @@ -780,11 +777,30 @@ static void rt_do_flush(int process_context) | |||
| 780 | #define FRACT_BITS 3 | 777 | #define FRACT_BITS 3 |
| 781 | #define ONE (1UL << FRACT_BITS) | 778 | #define ONE (1UL << FRACT_BITS) |
| 782 | 779 | ||
| 780 | /* | ||
| 781 | * Given a hash chain and an item in this hash chain, | ||
| 782 | * find if a previous entry has the same hash_inputs | ||
| 783 | * (but differs on tos, mark or oif) | ||
| 784 | * Returns 0 if an alias is found. | ||
| 785 | * Returns ONE if rth has no alias before itself. | ||
| 786 | */ | ||
| 787 | static int has_noalias(const struct rtable *head, const struct rtable *rth) | ||
| 788 | { | ||
| 789 | const struct rtable *aux = head; | ||
| 790 | |||
| 791 | while (aux != rth) { | ||
| 792 | if (compare_hash_inputs(&aux->fl, &rth->fl)) | ||
| 793 | return 0; | ||
| 794 | aux = aux->dst.rt_next; | ||
| 795 | } | ||
| 796 | return ONE; | ||
| 797 | } | ||
| 798 | |||
| 783 | static void rt_check_expire(void) | 799 | static void rt_check_expire(void) |
| 784 | { | 800 | { |
| 785 | static unsigned int rover; | 801 | static unsigned int rover; |
| 786 | unsigned int i = rover, goal; | 802 | unsigned int i = rover, goal; |
| 787 | struct rtable *rth, *aux, **rthp; | 803 | struct rtable *rth, **rthp; |
| 788 | unsigned long samples = 0; | 804 | unsigned long samples = 0; |
| 789 | unsigned long sum = 0, sum2 = 0; | 805 | unsigned long sum = 0, sum2 = 0; |
| 790 | unsigned long delta; | 806 | unsigned long delta; |
| @@ -815,18 +831,18 @@ static void rt_check_expire(void) | |||
| 815 | length = 0; | 831 | length = 0; |
| 816 | spin_lock_bh(rt_hash_lock_addr(i)); | 832 | spin_lock_bh(rt_hash_lock_addr(i)); |
| 817 | while ((rth = *rthp) != NULL) { | 833 | while ((rth = *rthp) != NULL) { |
| 818 | prefetch(rth->u.dst.rt_next); | 834 | prefetch(rth->dst.rt_next); |
| 819 | if (rt_is_expired(rth)) { | 835 | if (rt_is_expired(rth)) { |
| 820 | *rthp = rth->u.dst.rt_next; | 836 | *rthp = rth->dst.rt_next; |
| 821 | rt_free(rth); | 837 | rt_free(rth); |
| 822 | continue; | 838 | continue; |
| 823 | } | 839 | } |
| 824 | if (rth->u.dst.expires) { | 840 | if (rth->dst.expires) { |
| 825 | /* Entry is expired even if it is in use */ | 841 | /* Entry is expired even if it is in use */ |
| 826 | if (time_before_eq(jiffies, rth->u.dst.expires)) { | 842 | if (time_before_eq(jiffies, rth->dst.expires)) { |
| 827 | nofree: | 843 | nofree: |
| 828 | tmo >>= 1; | 844 | tmo >>= 1; |
| 829 | rthp = &rth->u.dst.rt_next; | 845 | rthp = &rth->dst.rt_next; |
| 830 | /* | 846 | /* |
| 831 | * We only count entries on | 847 | * We only count entries on |
| 832 | * a chain with equal hash inputs once | 848 | * a chain with equal hash inputs once |
| @@ -835,22 +851,14 @@ nofree: | |||
| 835 | * attributes don't unfairly skew | 851 | * attributes don't unfairly skew |
| 836 | * the length computation | 852 | * the length computation |
| 837 | */ | 853 | */ |
| 838 | for (aux = rt_hash_table[i].chain;;) { | 854 | length += has_noalias(rt_hash_table[i].chain, rth); |
| 839 | if (aux == rth) { | ||
| 840 | length += ONE; | ||
| 841 | break; | ||
| 842 | } | ||
| 843 | if (compare_hash_inputs(&aux->fl, &rth->fl)) | ||
| 844 | break; | ||
| 845 | aux = aux->u.dst.rt_next; | ||
| 846 | } | ||
| 847 | continue; | 855 | continue; |
| 848 | } | 856 | } |
| 849 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) | 857 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) |
| 850 | goto nofree; | 858 | goto nofree; |
| 851 | 859 | ||
| 852 | /* Cleanup aged off entries. */ | 860 | /* Cleanup aged off entries. */ |
| 853 | *rthp = rth->u.dst.rt_next; | 861 | *rthp = rth->dst.rt_next; |
| 854 | rt_free(rth); | 862 | rt_free(rth); |
| 855 | } | 863 | } |
| 856 | spin_unlock_bh(rt_hash_lock_addr(i)); | 864 | spin_unlock_bh(rt_hash_lock_addr(i)); |
| @@ -908,34 +916,11 @@ void rt_cache_flush_batch(void) | |||
| 908 | rt_do_flush(!in_softirq()); | 916 | rt_do_flush(!in_softirq()); |
| 909 | } | 917 | } |
| 910 | 918 | ||
| 911 | /* | ||
| 912 | * We change rt_genid and let gc do the cleanup | ||
| 913 | */ | ||
| 914 | static void rt_secret_rebuild(unsigned long __net) | ||
| 915 | { | ||
| 916 | struct net *net = (struct net *)__net; | ||
| 917 | rt_cache_invalidate(net); | ||
| 918 | mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); | ||
| 919 | } | ||
| 920 | |||
| 921 | static void rt_secret_rebuild_oneshot(struct net *net) | ||
| 922 | { | ||
| 923 | del_timer_sync(&net->ipv4.rt_secret_timer); | ||
| 924 | rt_cache_invalidate(net); | ||
| 925 | if (ip_rt_secret_interval) { | ||
| 926 | net->ipv4.rt_secret_timer.expires += ip_rt_secret_interval; | ||
| 927 | add_timer(&net->ipv4.rt_secret_timer); | ||
| 928 | } | ||
| 929 | } | ||
| 930 | |||
| 931 | static void rt_emergency_hash_rebuild(struct net *net) | 919 | static void rt_emergency_hash_rebuild(struct net *net) |
| 932 | { | 920 | { |
| 933 | if (net_ratelimit()) { | 921 | if (net_ratelimit()) |
| 934 | printk(KERN_WARNING "Route hash chain too long!\n"); | 922 | printk(KERN_WARNING "Route hash chain too long!\n"); |
| 935 | printk(KERN_WARNING "Adjust your secret_interval!\n"); | 923 | rt_cache_invalidate(net); |
| 936 | } | ||
| 937 | |||
| 938 | rt_secret_rebuild_oneshot(net); | ||
| 939 | } | 924 | } |
| 940 | 925 | ||
| 941 | /* | 926 | /* |
| @@ -1014,10 +999,10 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
| 1014 | if (!rt_is_expired(rth) && | 999 | if (!rt_is_expired(rth) && |
| 1015 | !rt_may_expire(rth, tmo, expire)) { | 1000 | !rt_may_expire(rth, tmo, expire)) { |
| 1016 | tmo >>= 1; | 1001 | tmo >>= 1; |
| 1017 | rthp = &rth->u.dst.rt_next; | 1002 | rthp = &rth->dst.rt_next; |
| 1018 | continue; | 1003 | continue; |
| 1019 | } | 1004 | } |
| 1020 | *rthp = rth->u.dst.rt_next; | 1005 | *rthp = rth->dst.rt_next; |
| 1021 | rt_free(rth); | 1006 | rt_free(rth); |
| 1022 | goal--; | 1007 | goal--; |
| 1023 | } | 1008 | } |
| @@ -1073,8 +1058,23 @@ work_done: | |||
| 1073 | out: return 0; | 1058 | out: return 0; |
| 1074 | } | 1059 | } |
| 1075 | 1060 | ||
| 1061 | /* | ||
| 1062 | * Returns number of entries in a hash chain that have different hash_inputs | ||
| 1063 | */ | ||
| 1064 | static int slow_chain_length(const struct rtable *head) | ||
| 1065 | { | ||
| 1066 | int length = 0; | ||
| 1067 | const struct rtable *rth = head; | ||
| 1068 | |||
| 1069 | while (rth) { | ||
| 1070 | length += has_noalias(head, rth); | ||
| 1071 | rth = rth->dst.rt_next; | ||
| 1072 | } | ||
| 1073 | return length >> FRACT_BITS; | ||
| 1074 | } | ||
| 1075 | |||
| 1076 | static int rt_intern_hash(unsigned hash, struct rtable *rt, | 1076 | static int rt_intern_hash(unsigned hash, struct rtable *rt, |
| 1077 | struct rtable **rp, struct sk_buff *skb) | 1077 | struct rtable **rp, struct sk_buff *skb, int ifindex) |
| 1078 | { | 1078 | { |
| 1079 | struct rtable *rth, **rthp; | 1079 | struct rtable *rth, **rthp; |
| 1080 | unsigned long now; | 1080 | unsigned long now; |
| @@ -1090,7 +1090,7 @@ restart: | |||
| 1090 | candp = NULL; | 1090 | candp = NULL; |
| 1091 | now = jiffies; | 1091 | now = jiffies; |
| 1092 | 1092 | ||
| 1093 | if (!rt_caching(dev_net(rt->u.dst.dev))) { | 1093 | if (!rt_caching(dev_net(rt->dst.dev))) { |
| 1094 | /* | 1094 | /* |
| 1095 | * If we're not caching, just tell the caller we | 1095 | * If we're not caching, just tell the caller we |
| 1096 | * were successful and don't touch the route. The | 1096 | * were successful and don't touch the route. The |
| @@ -1108,7 +1108,7 @@ restart: | |||
| 1108 | */ | 1108 | */ |
| 1109 | 1109 | ||
| 1110 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { | 1110 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { |
| 1111 | int err = arp_bind_neighbour(&rt->u.dst); | 1111 | int err = arp_bind_neighbour(&rt->dst); |
| 1112 | if (err) { | 1112 | if (err) { |
| 1113 | if (net_ratelimit()) | 1113 | if (net_ratelimit()) |
| 1114 | printk(KERN_WARNING | 1114 | printk(KERN_WARNING |
| @@ -1127,19 +1127,19 @@ restart: | |||
| 1127 | spin_lock_bh(rt_hash_lock_addr(hash)); | 1127 | spin_lock_bh(rt_hash_lock_addr(hash)); |
| 1128 | while ((rth = *rthp) != NULL) { | 1128 | while ((rth = *rthp) != NULL) { |
| 1129 | if (rt_is_expired(rth)) { | 1129 | if (rt_is_expired(rth)) { |
| 1130 | *rthp = rth->u.dst.rt_next; | 1130 | *rthp = rth->dst.rt_next; |
| 1131 | rt_free(rth); | 1131 | rt_free(rth); |
| 1132 | continue; | 1132 | continue; |
| 1133 | } | 1133 | } |
| 1134 | if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { | 1134 | if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { |
| 1135 | /* Put it first */ | 1135 | /* Put it first */ |
| 1136 | *rthp = rth->u.dst.rt_next; | 1136 | *rthp = rth->dst.rt_next; |
| 1137 | /* | 1137 | /* |
| 1138 | * Since lookup is lockfree, the deletion | 1138 | * Since lookup is lockfree, the deletion |
| 1139 | * must be visible to another weakly ordered CPU before | 1139 | * must be visible to another weakly ordered CPU before |
| 1140 | * the insertion at the start of the hash chain. | 1140 | * the insertion at the start of the hash chain. |
| 1141 | */ | 1141 | */ |
| 1142 | rcu_assign_pointer(rth->u.dst.rt_next, | 1142 | rcu_assign_pointer(rth->dst.rt_next, |
| 1143 | rt_hash_table[hash].chain); | 1143 | rt_hash_table[hash].chain); |
| 1144 | /* | 1144 | /* |
| 1145 | * Since lookup is lockfree, the update writes | 1145 | * Since lookup is lockfree, the update writes |
| @@ -1147,18 +1147,18 @@ restart: | |||
| 1147 | */ | 1147 | */ |
| 1148 | rcu_assign_pointer(rt_hash_table[hash].chain, rth); | 1148 | rcu_assign_pointer(rt_hash_table[hash].chain, rth); |
| 1149 | 1149 | ||
| 1150 | dst_use(&rth->u.dst, now); | 1150 | dst_use(&rth->dst, now); |
| 1151 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1151 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
| 1152 | 1152 | ||
| 1153 | rt_drop(rt); | 1153 | rt_drop(rt); |
| 1154 | if (rp) | 1154 | if (rp) |
| 1155 | *rp = rth; | 1155 | *rp = rth; |
| 1156 | else | 1156 | else |
| 1157 | skb_dst_set(skb, &rth->u.dst); | 1157 | skb_dst_set(skb, &rth->dst); |
| 1158 | return 0; | 1158 | return 0; |
| 1159 | } | 1159 | } |
| 1160 | 1160 | ||
| 1161 | if (!atomic_read(&rth->u.dst.__refcnt)) { | 1161 | if (!atomic_read(&rth->dst.__refcnt)) { |
| 1162 | u32 score = rt_score(rth); | 1162 | u32 score = rt_score(rth); |
| 1163 | 1163 | ||
| 1164 | if (score <= min_score) { | 1164 | if (score <= min_score) { |
| @@ -1170,7 +1170,7 @@ restart: | |||
| 1170 | 1170 | ||
| 1171 | chain_length++; | 1171 | chain_length++; |
| 1172 | 1172 | ||
| 1173 | rthp = &rth->u.dst.rt_next; | 1173 | rthp = &rth->dst.rt_next; |
| 1174 | } | 1174 | } |
| 1175 | 1175 | ||
| 1176 | if (cand) { | 1176 | if (cand) { |
| @@ -1181,18 +1181,24 @@ restart: | |||
| 1181 | * only 2 entries per bucket. We will see. | 1181 | * only 2 entries per bucket. We will see. |
| 1182 | */ | 1182 | */ |
| 1183 | if (chain_length > ip_rt_gc_elasticity) { | 1183 | if (chain_length > ip_rt_gc_elasticity) { |
| 1184 | *candp = cand->u.dst.rt_next; | 1184 | *candp = cand->dst.rt_next; |
| 1185 | rt_free(cand); | 1185 | rt_free(cand); |
| 1186 | } | 1186 | } |
| 1187 | } else { | 1187 | } else { |
| 1188 | if (chain_length > rt_chain_length_max) { | 1188 | if (chain_length > rt_chain_length_max && |
| 1189 | struct net *net = dev_net(rt->u.dst.dev); | 1189 | slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { |
| 1190 | struct net *net = dev_net(rt->dst.dev); | ||
| 1190 | int num = ++net->ipv4.current_rt_cache_rebuild_count; | 1191 | int num = ++net->ipv4.current_rt_cache_rebuild_count; |
| 1191 | if (!rt_caching(dev_net(rt->u.dst.dev))) { | 1192 | if (!rt_caching(net)) { |
| 1192 | printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", | 1193 | printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", |
| 1193 | rt->u.dst.dev->name, num); | 1194 | rt->dst.dev->name, num); |
| 1194 | } | 1195 | } |
| 1195 | rt_emergency_hash_rebuild(dev_net(rt->u.dst.dev)); | 1196 | rt_emergency_hash_rebuild(net); |
| 1197 | spin_unlock_bh(rt_hash_lock_addr(hash)); | ||
| 1198 | |||
| 1199 | hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, | ||
| 1200 | ifindex, rt_genid(net)); | ||
| 1201 | goto restart; | ||
| 1196 | } | 1202 | } |
| 1197 | } | 1203 | } |
| 1198 | 1204 | ||
| @@ -1200,7 +1206,7 @@ restart: | |||
| 1200 | route or unicast forwarding path. | 1206 | route or unicast forwarding path. |
| 1201 | */ | 1207 | */ |
| 1202 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { | 1208 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { |
| 1203 | int err = arp_bind_neighbour(&rt->u.dst); | 1209 | int err = arp_bind_neighbour(&rt->dst); |
| 1204 | if (err) { | 1210 | if (err) { |
| 1205 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1211 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
| 1206 | 1212 | ||
| @@ -1225,20 +1231,20 @@ restart: | |||
| 1225 | } | 1231 | } |
| 1226 | 1232 | ||
| 1227 | if (net_ratelimit()) | 1233 | if (net_ratelimit()) |
| 1228 | printk(KERN_WARNING "Neighbour table overflow.\n"); | 1234 | printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); |
| 1229 | rt_drop(rt); | 1235 | rt_drop(rt); |
| 1230 | return -ENOBUFS; | 1236 | return -ENOBUFS; |
| 1231 | } | 1237 | } |
| 1232 | } | 1238 | } |
| 1233 | 1239 | ||
| 1234 | rt->u.dst.rt_next = rt_hash_table[hash].chain; | 1240 | rt->dst.rt_next = rt_hash_table[hash].chain; |
| 1235 | 1241 | ||
| 1236 | #if RT_CACHE_DEBUG >= 2 | 1242 | #if RT_CACHE_DEBUG >= 2 |
| 1237 | if (rt->u.dst.rt_next) { | 1243 | if (rt->dst.rt_next) { |
| 1238 | struct rtable *trt; | 1244 | struct rtable *trt; |
| 1239 | printk(KERN_DEBUG "rt_cache @%02x: %pI4", | 1245 | printk(KERN_DEBUG "rt_cache @%02x: %pI4", |
| 1240 | hash, &rt->rt_dst); | 1246 | hash, &rt->rt_dst); |
| 1241 | for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) | 1247 | for (trt = rt->dst.rt_next; trt; trt = trt->dst.rt_next) |
| 1242 | printk(" . %pI4", &trt->rt_dst); | 1248 | printk(" . %pI4", &trt->rt_dst); |
| 1243 | printk("\n"); | 1249 | printk("\n"); |
| 1244 | } | 1250 | } |
| @@ -1256,7 +1262,7 @@ skip_hashing: | |||
| 1256 | if (rp) | 1262 | if (rp) |
| 1257 | *rp = rt; | 1263 | *rp = rt; |
| 1258 | else | 1264 | else |
| 1259 | skb_dst_set(skb, &rt->u.dst); | 1265 | skb_dst_set(skb, &rt->dst); |
| 1260 | return 0; | 1266 | return 0; |
| 1261 | } | 1267 | } |
| 1262 | 1268 | ||
| @@ -1318,6 +1324,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) | |||
| 1318 | 1324 | ||
| 1319 | ip_select_fb_ident(iph); | 1325 | ip_select_fb_ident(iph); |
| 1320 | } | 1326 | } |
| 1327 | EXPORT_SYMBOL(__ip_select_ident); | ||
| 1321 | 1328 | ||
| 1322 | static void rt_del(unsigned hash, struct rtable *rt) | 1329 | static void rt_del(unsigned hash, struct rtable *rt) |
| 1323 | { | 1330 | { |
| @@ -1328,20 +1335,21 @@ static void rt_del(unsigned hash, struct rtable *rt) | |||
| 1328 | ip_rt_put(rt); | 1335 | ip_rt_put(rt); |
| 1329 | while ((aux = *rthp) != NULL) { | 1336 | while ((aux = *rthp) != NULL) { |
| 1330 | if (aux == rt || rt_is_expired(aux)) { | 1337 | if (aux == rt || rt_is_expired(aux)) { |
| 1331 | *rthp = aux->u.dst.rt_next; | 1338 | *rthp = aux->dst.rt_next; |
| 1332 | rt_free(aux); | 1339 | rt_free(aux); |
| 1333 | continue; | 1340 | continue; |
| 1334 | } | 1341 | } |
| 1335 | rthp = &aux->u.dst.rt_next; | 1342 | rthp = &aux->dst.rt_next; |
| 1336 | } | 1343 | } |
| 1337 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1344 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
| 1338 | } | 1345 | } |
| 1339 | 1346 | ||
| 1347 | /* called in rcu_read_lock() section */ | ||
| 1340 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | 1348 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, |
| 1341 | __be32 saddr, struct net_device *dev) | 1349 | __be32 saddr, struct net_device *dev) |
| 1342 | { | 1350 | { |
| 1343 | int i, k; | 1351 | int i, k; |
| 1344 | struct in_device *in_dev = in_dev_get(dev); | 1352 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
| 1345 | struct rtable *rth, **rthp; | 1353 | struct rtable *rth, **rthp; |
| 1346 | __be32 skeys[2] = { saddr, 0 }; | 1354 | __be32 skeys[2] = { saddr, 0 }; |
| 1347 | int ikeys[2] = { dev->ifindex, 0 }; | 1355 | int ikeys[2] = { dev->ifindex, 0 }; |
| @@ -1377,7 +1385,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
| 1377 | 1385 | ||
| 1378 | rthp=&rt_hash_table[hash].chain; | 1386 | rthp=&rt_hash_table[hash].chain; |
| 1379 | 1387 | ||
| 1380 | rcu_read_lock(); | ||
| 1381 | while ((rth = rcu_dereference(*rthp)) != NULL) { | 1388 | while ((rth = rcu_dereference(*rthp)) != NULL) { |
| 1382 | struct rtable *rt; | 1389 | struct rtable *rt; |
| 1383 | 1390 | ||
| @@ -1386,44 +1393,42 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
| 1386 | rth->fl.oif != ikeys[k] || | 1393 | rth->fl.oif != ikeys[k] || |
| 1387 | rth->fl.iif != 0 || | 1394 | rth->fl.iif != 0 || |
| 1388 | rt_is_expired(rth) || | 1395 | rt_is_expired(rth) || |
| 1389 | !net_eq(dev_net(rth->u.dst.dev), net)) { | 1396 | !net_eq(dev_net(rth->dst.dev), net)) { |
| 1390 | rthp = &rth->u.dst.rt_next; | 1397 | rthp = &rth->dst.rt_next; |
| 1391 | continue; | 1398 | continue; |
| 1392 | } | 1399 | } |
| 1393 | 1400 | ||
| 1394 | if (rth->rt_dst != daddr || | 1401 | if (rth->rt_dst != daddr || |
| 1395 | rth->rt_src != saddr || | 1402 | rth->rt_src != saddr || |
| 1396 | rth->u.dst.error || | 1403 | rth->dst.error || |
| 1397 | rth->rt_gateway != old_gw || | 1404 | rth->rt_gateway != old_gw || |
| 1398 | rth->u.dst.dev != dev) | 1405 | rth->dst.dev != dev) |
| 1399 | break; | 1406 | break; |
| 1400 | 1407 | ||
| 1401 | dst_hold(&rth->u.dst); | 1408 | dst_hold(&rth->dst); |
| 1402 | rcu_read_unlock(); | ||
| 1403 | 1409 | ||
| 1404 | rt = dst_alloc(&ipv4_dst_ops); | 1410 | rt = dst_alloc(&ipv4_dst_ops); |
| 1405 | if (rt == NULL) { | 1411 | if (rt == NULL) { |
| 1406 | ip_rt_put(rth); | 1412 | ip_rt_put(rth); |
| 1407 | in_dev_put(in_dev); | ||
| 1408 | return; | 1413 | return; |
| 1409 | } | 1414 | } |
| 1410 | 1415 | ||
| 1411 | /* Copy all the information. */ | 1416 | /* Copy all the information. */ |
| 1412 | *rt = *rth; | 1417 | *rt = *rth; |
| 1413 | rt->u.dst.__use = 1; | 1418 | rt->dst.__use = 1; |
| 1414 | atomic_set(&rt->u.dst.__refcnt, 1); | 1419 | atomic_set(&rt->dst.__refcnt, 1); |
| 1415 | rt->u.dst.child = NULL; | 1420 | rt->dst.child = NULL; |
| 1416 | if (rt->u.dst.dev) | 1421 | if (rt->dst.dev) |
| 1417 | dev_hold(rt->u.dst.dev); | 1422 | dev_hold(rt->dst.dev); |
| 1418 | if (rt->idev) | 1423 | if (rt->idev) |
| 1419 | in_dev_hold(rt->idev); | 1424 | in_dev_hold(rt->idev); |
| 1420 | rt->u.dst.obsolete = 0; | 1425 | rt->dst.obsolete = -1; |
| 1421 | rt->u.dst.lastuse = jiffies; | 1426 | rt->dst.lastuse = jiffies; |
| 1422 | rt->u.dst.path = &rt->u.dst; | 1427 | rt->dst.path = &rt->dst; |
| 1423 | rt->u.dst.neighbour = NULL; | 1428 | rt->dst.neighbour = NULL; |
| 1424 | rt->u.dst.hh = NULL; | 1429 | rt->dst.hh = NULL; |
| 1425 | #ifdef CONFIG_XFRM | 1430 | #ifdef CONFIG_XFRM |
| 1426 | rt->u.dst.xfrm = NULL; | 1431 | rt->dst.xfrm = NULL; |
| 1427 | #endif | 1432 | #endif |
| 1428 | rt->rt_genid = rt_genid(net); | 1433 | rt->rt_genid = rt_genid(net); |
| 1429 | rt->rt_flags |= RTCF_REDIRECTED; | 1434 | rt->rt_flags |= RTCF_REDIRECTED; |
| @@ -1432,37 +1437,35 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
| 1432 | rt->rt_gateway = new_gw; | 1437 | rt->rt_gateway = new_gw; |
| 1433 | 1438 | ||
| 1434 | /* Redirect received -> path was valid */ | 1439 | /* Redirect received -> path was valid */ |
| 1435 | dst_confirm(&rth->u.dst); | 1440 | dst_confirm(&rth->dst); |
| 1436 | 1441 | ||
| 1437 | if (rt->peer) | 1442 | if (rt->peer) |
| 1438 | atomic_inc(&rt->peer->refcnt); | 1443 | atomic_inc(&rt->peer->refcnt); |
| 1439 | 1444 | ||
| 1440 | if (arp_bind_neighbour(&rt->u.dst) || | 1445 | if (arp_bind_neighbour(&rt->dst) || |
| 1441 | !(rt->u.dst.neighbour->nud_state & | 1446 | !(rt->dst.neighbour->nud_state & |
| 1442 | NUD_VALID)) { | 1447 | NUD_VALID)) { |
| 1443 | if (rt->u.dst.neighbour) | 1448 | if (rt->dst.neighbour) |
| 1444 | neigh_event_send(rt->u.dst.neighbour, NULL); | 1449 | neigh_event_send(rt->dst.neighbour, NULL); |
| 1445 | ip_rt_put(rth); | 1450 | ip_rt_put(rth); |
| 1446 | rt_drop(rt); | 1451 | rt_drop(rt); |
| 1447 | goto do_next; | 1452 | goto do_next; |
| 1448 | } | 1453 | } |
| 1449 | 1454 | ||
| 1450 | netevent.old = &rth->u.dst; | 1455 | netevent.old = &rth->dst; |
| 1451 | netevent.new = &rt->u.dst; | 1456 | netevent.new = &rt->dst; |
| 1452 | call_netevent_notifiers(NETEVENT_REDIRECT, | 1457 | call_netevent_notifiers(NETEVENT_REDIRECT, |
| 1453 | &netevent); | 1458 | &netevent); |
| 1454 | 1459 | ||
| 1455 | rt_del(hash, rth); | 1460 | rt_del(hash, rth); |
| 1456 | if (!rt_intern_hash(hash, rt, &rt, NULL)) | 1461 | if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif)) |
| 1457 | ip_rt_put(rt); | 1462 | ip_rt_put(rt); |
| 1458 | goto do_next; | 1463 | goto do_next; |
| 1459 | } | 1464 | } |
| 1460 | rcu_read_unlock(); | ||
| 1461 | do_next: | 1465 | do_next: |
| 1462 | ; | 1466 | ; |
| 1463 | } | 1467 | } |
| 1464 | } | 1468 | } |
| 1465 | in_dev_put(in_dev); | ||
| 1466 | return; | 1469 | return; |
| 1467 | 1470 | ||
| 1468 | reject_redirect: | 1471 | reject_redirect: |
| @@ -1473,7 +1476,7 @@ reject_redirect: | |||
| 1473 | &old_gw, dev->name, &new_gw, | 1476 | &old_gw, dev->name, &new_gw, |
| 1474 | &saddr, &daddr); | 1477 | &saddr, &daddr); |
| 1475 | #endif | 1478 | #endif |
| 1476 | in_dev_put(in_dev); | 1479 | ; |
| 1477 | } | 1480 | } |
| 1478 | 1481 | ||
| 1479 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | 1482 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) |
| @@ -1482,11 +1485,12 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
| 1482 | struct dst_entry *ret = dst; | 1485 | struct dst_entry *ret = dst; |
| 1483 | 1486 | ||
| 1484 | if (rt) { | 1487 | if (rt) { |
| 1485 | if (dst->obsolete) { | 1488 | if (dst->obsolete > 0) { |
| 1486 | ip_rt_put(rt); | 1489 | ip_rt_put(rt); |
| 1487 | ret = NULL; | 1490 | ret = NULL; |
| 1488 | } else if ((rt->rt_flags & RTCF_REDIRECTED) || | 1491 | } else if ((rt->rt_flags & RTCF_REDIRECTED) || |
| 1489 | rt->u.dst.expires) { | 1492 | (rt->dst.expires && |
| 1493 | time_after_eq(jiffies, rt->dst.expires))) { | ||
| 1490 | unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, | 1494 | unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, |
| 1491 | rt->fl.oif, | 1495 | rt->fl.oif, |
| 1492 | rt_genid(dev_net(dst->dev))); | 1496 | rt_genid(dev_net(dst->dev))); |
| @@ -1524,7 +1528,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
| 1524 | int log_martians; | 1528 | int log_martians; |
| 1525 | 1529 | ||
| 1526 | rcu_read_lock(); | 1530 | rcu_read_lock(); |
| 1527 | in_dev = __in_dev_get_rcu(rt->u.dst.dev); | 1531 | in_dev = __in_dev_get_rcu(rt->dst.dev); |
| 1528 | if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { | 1532 | if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { |
| 1529 | rcu_read_unlock(); | 1533 | rcu_read_unlock(); |
| 1530 | return; | 1534 | return; |
| @@ -1535,30 +1539,30 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
| 1535 | /* No redirected packets during ip_rt_redirect_silence; | 1539 | /* No redirected packets during ip_rt_redirect_silence; |
| 1536 | * reset the algorithm. | 1540 | * reset the algorithm. |
| 1537 | */ | 1541 | */ |
| 1538 | if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence)) | 1542 | if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence)) |
| 1539 | rt->u.dst.rate_tokens = 0; | 1543 | rt->dst.rate_tokens = 0; |
| 1540 | 1544 | ||
| 1541 | /* Too many ignored redirects; do not send anything | 1545 | /* Too many ignored redirects; do not send anything |
| 1542 | * set u.dst.rate_last to the last seen redirected packet. | 1546 | * set dst.rate_last to the last seen redirected packet. |
| 1543 | */ | 1547 | */ |
| 1544 | if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { | 1548 | if (rt->dst.rate_tokens >= ip_rt_redirect_number) { |
| 1545 | rt->u.dst.rate_last = jiffies; | 1549 | rt->dst.rate_last = jiffies; |
| 1546 | return; | 1550 | return; |
| 1547 | } | 1551 | } |
| 1548 | 1552 | ||
| 1549 | /* Check for load limit; set rate_last to the latest sent | 1553 | /* Check for load limit; set rate_last to the latest sent |
| 1550 | * redirect. | 1554 | * redirect. |
| 1551 | */ | 1555 | */ |
| 1552 | if (rt->u.dst.rate_tokens == 0 || | 1556 | if (rt->dst.rate_tokens == 0 || |
| 1553 | time_after(jiffies, | 1557 | time_after(jiffies, |
| 1554 | (rt->u.dst.rate_last + | 1558 | (rt->dst.rate_last + |
| 1555 | (ip_rt_redirect_load << rt->u.dst.rate_tokens)))) { | 1559 | (ip_rt_redirect_load << rt->dst.rate_tokens)))) { |
| 1556 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); | 1560 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); |
| 1557 | rt->u.dst.rate_last = jiffies; | 1561 | rt->dst.rate_last = jiffies; |
| 1558 | ++rt->u.dst.rate_tokens; | 1562 | ++rt->dst.rate_tokens; |
| 1559 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 1563 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
| 1560 | if (log_martians && | 1564 | if (log_martians && |
| 1561 | rt->u.dst.rate_tokens == ip_rt_redirect_number && | 1565 | rt->dst.rate_tokens == ip_rt_redirect_number && |
| 1562 | net_ratelimit()) | 1566 | net_ratelimit()) |
| 1563 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", | 1567 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", |
| 1564 | &rt->rt_src, rt->rt_iif, | 1568 | &rt->rt_src, rt->rt_iif, |
| @@ -1573,7 +1577,7 @@ static int ip_error(struct sk_buff *skb) | |||
| 1573 | unsigned long now; | 1577 | unsigned long now; |
| 1574 | int code; | 1578 | int code; |
| 1575 | 1579 | ||
| 1576 | switch (rt->u.dst.error) { | 1580 | switch (rt->dst.error) { |
| 1577 | case EINVAL: | 1581 | case EINVAL: |
| 1578 | default: | 1582 | default: |
| 1579 | goto out; | 1583 | goto out; |
| @@ -1582,7 +1586,7 @@ static int ip_error(struct sk_buff *skb) | |||
| 1582 | break; | 1586 | break; |
| 1583 | case ENETUNREACH: | 1587 | case ENETUNREACH: |
| 1584 | code = ICMP_NET_UNREACH; | 1588 | code = ICMP_NET_UNREACH; |
| 1585 | IP_INC_STATS_BH(dev_net(rt->u.dst.dev), | 1589 | IP_INC_STATS_BH(dev_net(rt->dst.dev), |
| 1586 | IPSTATS_MIB_INNOROUTES); | 1590 | IPSTATS_MIB_INNOROUTES); |
| 1587 | break; | 1591 | break; |
| 1588 | case EACCES: | 1592 | case EACCES: |
| @@ -1591,12 +1595,12 @@ static int ip_error(struct sk_buff *skb) | |||
| 1591 | } | 1595 | } |
| 1592 | 1596 | ||
| 1593 | now = jiffies; | 1597 | now = jiffies; |
| 1594 | rt->u.dst.rate_tokens += now - rt->u.dst.rate_last; | 1598 | rt->dst.rate_tokens += now - rt->dst.rate_last; |
| 1595 | if (rt->u.dst.rate_tokens > ip_rt_error_burst) | 1599 | if (rt->dst.rate_tokens > ip_rt_error_burst) |
| 1596 | rt->u.dst.rate_tokens = ip_rt_error_burst; | 1600 | rt->dst.rate_tokens = ip_rt_error_burst; |
| 1597 | rt->u.dst.rate_last = now; | 1601 | rt->dst.rate_last = now; |
| 1598 | if (rt->u.dst.rate_tokens >= ip_rt_error_cost) { | 1602 | if (rt->dst.rate_tokens >= ip_rt_error_cost) { |
| 1599 | rt->u.dst.rate_tokens -= ip_rt_error_cost; | 1603 | rt->dst.rate_tokens -= ip_rt_error_cost; |
| 1600 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); | 1604 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); |
| 1601 | } | 1605 | } |
| 1602 | 1606 | ||
| @@ -1641,7 +1645,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
| 1641 | 1645 | ||
| 1642 | rcu_read_lock(); | 1646 | rcu_read_lock(); |
| 1643 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 1647 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; |
| 1644 | rth = rcu_dereference(rth->u.dst.rt_next)) { | 1648 | rth = rcu_dereference(rth->dst.rt_next)) { |
| 1645 | unsigned short mtu = new_mtu; | 1649 | unsigned short mtu = new_mtu; |
| 1646 | 1650 | ||
| 1647 | if (rth->fl.fl4_dst != daddr || | 1651 | if (rth->fl.fl4_dst != daddr || |
| @@ -1650,8 +1654,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
| 1650 | rth->rt_src != iph->saddr || | 1654 | rth->rt_src != iph->saddr || |
| 1651 | rth->fl.oif != ikeys[k] || | 1655 | rth->fl.oif != ikeys[k] || |
| 1652 | rth->fl.iif != 0 || | 1656 | rth->fl.iif != 0 || |
| 1653 | dst_metric_locked(&rth->u.dst, RTAX_MTU) || | 1657 | dst_metric_locked(&rth->dst, RTAX_MTU) || |
| 1654 | !net_eq(dev_net(rth->u.dst.dev), net) || | 1658 | !net_eq(dev_net(rth->dst.dev), net) || |
| 1655 | rt_is_expired(rth)) | 1659 | rt_is_expired(rth)) |
| 1656 | continue; | 1660 | continue; |
| 1657 | 1661 | ||
| @@ -1659,22 +1663,22 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
| 1659 | 1663 | ||
| 1660 | /* BSD 4.2 compatibility hack :-( */ | 1664 | /* BSD 4.2 compatibility hack :-( */ |
| 1661 | if (mtu == 0 && | 1665 | if (mtu == 0 && |
| 1662 | old_mtu >= dst_mtu(&rth->u.dst) && | 1666 | old_mtu >= dst_mtu(&rth->dst) && |
| 1663 | old_mtu >= 68 + (iph->ihl << 2)) | 1667 | old_mtu >= 68 + (iph->ihl << 2)) |
| 1664 | old_mtu -= iph->ihl << 2; | 1668 | old_mtu -= iph->ihl << 2; |
| 1665 | 1669 | ||
| 1666 | mtu = guess_mtu(old_mtu); | 1670 | mtu = guess_mtu(old_mtu); |
| 1667 | } | 1671 | } |
| 1668 | if (mtu <= dst_mtu(&rth->u.dst)) { | 1672 | if (mtu <= dst_mtu(&rth->dst)) { |
| 1669 | if (mtu < dst_mtu(&rth->u.dst)) { | 1673 | if (mtu < dst_mtu(&rth->dst)) { |
| 1670 | dst_confirm(&rth->u.dst); | 1674 | dst_confirm(&rth->dst); |
| 1671 | if (mtu < ip_rt_min_pmtu) { | 1675 | if (mtu < ip_rt_min_pmtu) { |
| 1672 | mtu = ip_rt_min_pmtu; | 1676 | mtu = ip_rt_min_pmtu; |
| 1673 | rth->u.dst.metrics[RTAX_LOCK-1] |= | 1677 | rth->dst.metrics[RTAX_LOCK-1] |= |
| 1674 | (1 << RTAX_MTU); | 1678 | (1 << RTAX_MTU); |
| 1675 | } | 1679 | } |
| 1676 | rth->u.dst.metrics[RTAX_MTU-1] = mtu; | 1680 | rth->dst.metrics[RTAX_MTU-1] = mtu; |
| 1677 | dst_set_expires(&rth->u.dst, | 1681 | dst_set_expires(&rth->dst, |
| 1678 | ip_rt_mtu_expires); | 1682 | ip_rt_mtu_expires); |
| 1679 | } | 1683 | } |
| 1680 | est_mtu = mtu; | 1684 | est_mtu = mtu; |
| @@ -1702,7 +1706,9 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) | |||
| 1702 | 1706 | ||
| 1703 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) | 1707 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) |
| 1704 | { | 1708 | { |
| 1705 | return NULL; | 1709 | if (rt_is_expired((struct rtable *)dst)) |
| 1710 | return NULL; | ||
| 1711 | return dst; | ||
| 1706 | } | 1712 | } |
| 1707 | 1713 | ||
| 1708 | static void ipv4_dst_destroy(struct dst_entry *dst) | 1714 | static void ipv4_dst_destroy(struct dst_entry *dst) |
| @@ -1745,7 +1751,7 @@ static void ipv4_link_failure(struct sk_buff *skb) | |||
| 1745 | 1751 | ||
| 1746 | rt = skb_rtable(skb); | 1752 | rt = skb_rtable(skb); |
| 1747 | if (rt) | 1753 | if (rt) |
| 1748 | dst_set_expires(&rt->u.dst, 0); | 1754 | dst_set_expires(&rt->dst, 0); |
| 1749 | } | 1755 | } |
| 1750 | 1756 | ||
| 1751 | static int ip_rt_bug(struct sk_buff *skb) | 1757 | static int ip_rt_bug(struct sk_buff *skb) |
| @@ -1773,11 +1779,11 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) | |||
| 1773 | 1779 | ||
| 1774 | if (rt->fl.iif == 0) | 1780 | if (rt->fl.iif == 0) |
| 1775 | src = rt->rt_src; | 1781 | src = rt->rt_src; |
| 1776 | else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) { | 1782 | else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) { |
| 1777 | src = FIB_RES_PREFSRC(res); | 1783 | src = FIB_RES_PREFSRC(res); |
| 1778 | fib_res_put(&res); | 1784 | fib_res_put(&res); |
| 1779 | } else | 1785 | } else |
| 1780 | src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, | 1786 | src = inet_select_addr(rt->dst.dev, rt->rt_gateway, |
| 1781 | RT_SCOPE_UNIVERSE); | 1787 | RT_SCOPE_UNIVERSE); |
| 1782 | memcpy(addr, &src, 4); | 1788 | memcpy(addr, &src, 4); |
| 1783 | } | 1789 | } |
| @@ -1785,10 +1791,10 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) | |||
| 1785 | #ifdef CONFIG_NET_CLS_ROUTE | 1791 | #ifdef CONFIG_NET_CLS_ROUTE |
| 1786 | static void set_class_tag(struct rtable *rt, u32 tag) | 1792 | static void set_class_tag(struct rtable *rt, u32 tag) |
| 1787 | { | 1793 | { |
| 1788 | if (!(rt->u.dst.tclassid & 0xFFFF)) | 1794 | if (!(rt->dst.tclassid & 0xFFFF)) |
| 1789 | rt->u.dst.tclassid |= tag & 0xFFFF; | 1795 | rt->dst.tclassid |= tag & 0xFFFF; |
| 1790 | if (!(rt->u.dst.tclassid & 0xFFFF0000)) | 1796 | if (!(rt->dst.tclassid & 0xFFFF0000)) |
| 1791 | rt->u.dst.tclassid |= tag & 0xFFFF0000; | 1797 | rt->dst.tclassid |= tag & 0xFFFF0000; |
| 1792 | } | 1798 | } |
| 1793 | #endif | 1799 | #endif |
| 1794 | 1800 | ||
| @@ -1800,30 +1806,30 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) | |||
| 1800 | if (FIB_RES_GW(*res) && | 1806 | if (FIB_RES_GW(*res) && |
| 1801 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | 1807 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) |
| 1802 | rt->rt_gateway = FIB_RES_GW(*res); | 1808 | rt->rt_gateway = FIB_RES_GW(*res); |
| 1803 | memcpy(rt->u.dst.metrics, fi->fib_metrics, | 1809 | memcpy(rt->dst.metrics, fi->fib_metrics, |
| 1804 | sizeof(rt->u.dst.metrics)); | 1810 | sizeof(rt->dst.metrics)); |
| 1805 | if (fi->fib_mtu == 0) { | 1811 | if (fi->fib_mtu == 0) { |
| 1806 | rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; | 1812 | rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu; |
| 1807 | if (dst_metric_locked(&rt->u.dst, RTAX_MTU) && | 1813 | if (dst_metric_locked(&rt->dst, RTAX_MTU) && |
| 1808 | rt->rt_gateway != rt->rt_dst && | 1814 | rt->rt_gateway != rt->rt_dst && |
| 1809 | rt->u.dst.dev->mtu > 576) | 1815 | rt->dst.dev->mtu > 576) |
| 1810 | rt->u.dst.metrics[RTAX_MTU-1] = 576; | 1816 | rt->dst.metrics[RTAX_MTU-1] = 576; |
| 1811 | } | 1817 | } |
| 1812 | #ifdef CONFIG_NET_CLS_ROUTE | 1818 | #ifdef CONFIG_NET_CLS_ROUTE |
| 1813 | rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid; | 1819 | rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; |
| 1814 | #endif | 1820 | #endif |
| 1815 | } else | 1821 | } else |
| 1816 | rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; | 1822 | rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu; |
| 1817 | 1823 | ||
| 1818 | if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) | 1824 | if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) |
| 1819 | rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; | 1825 | rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; |
| 1820 | if (dst_mtu(&rt->u.dst) > IP_MAX_MTU) | 1826 | if (dst_mtu(&rt->dst) > IP_MAX_MTU) |
| 1821 | rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; | 1827 | rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; |
| 1822 | if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0) | 1828 | if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0) |
| 1823 | rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, | 1829 | rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40, |
| 1824 | ip_rt_min_advmss); | 1830 | ip_rt_min_advmss); |
| 1825 | if (dst_metric(&rt->u.dst, RTAX_ADVMSS) > 65535 - 40) | 1831 | if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40) |
| 1826 | rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; | 1832 | rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; |
| 1827 | 1833 | ||
| 1828 | #ifdef CONFIG_NET_CLS_ROUTE | 1834 | #ifdef CONFIG_NET_CLS_ROUTE |
| 1829 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 1835 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
| @@ -1834,14 +1840,16 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) | |||
| 1834 | rt->rt_type = res->type; | 1840 | rt->rt_type = res->type; |
| 1835 | } | 1841 | } |
| 1836 | 1842 | ||
| 1843 | /* called in rcu_read_lock() section */ | ||
| 1837 | static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 1844 | static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
| 1838 | u8 tos, struct net_device *dev, int our) | 1845 | u8 tos, struct net_device *dev, int our) |
| 1839 | { | 1846 | { |
| 1840 | unsigned hash; | 1847 | unsigned int hash; |
| 1841 | struct rtable *rth; | 1848 | struct rtable *rth; |
| 1842 | __be32 spec_dst; | 1849 | __be32 spec_dst; |
| 1843 | struct in_device *in_dev = in_dev_get(dev); | 1850 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
| 1844 | u32 itag = 0; | 1851 | u32 itag = 0; |
| 1852 | int err; | ||
| 1845 | 1853 | ||
| 1846 | /* Primary sanity checks. */ | 1854 | /* Primary sanity checks. */ |
| 1847 | 1855 | ||
| @@ -1856,20 +1864,23 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 1856 | if (!ipv4_is_local_multicast(daddr)) | 1864 | if (!ipv4_is_local_multicast(daddr)) |
| 1857 | goto e_inval; | 1865 | goto e_inval; |
| 1858 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); | 1866 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); |
| 1859 | } else if (fib_validate_source(saddr, 0, tos, 0, | 1867 | } else { |
| 1860 | dev, &spec_dst, &itag, 0) < 0) | 1868 | err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, |
| 1861 | goto e_inval; | 1869 | &itag, 0); |
| 1862 | 1870 | if (err < 0) | |
| 1871 | goto e_err; | ||
| 1872 | } | ||
| 1863 | rth = dst_alloc(&ipv4_dst_ops); | 1873 | rth = dst_alloc(&ipv4_dst_ops); |
| 1864 | if (!rth) | 1874 | if (!rth) |
| 1865 | goto e_nobufs; | 1875 | goto e_nobufs; |
| 1866 | 1876 | ||
| 1867 | rth->u.dst.output= ip_rt_bug; | 1877 | rth->dst.output = ip_rt_bug; |
| 1878 | rth->dst.obsolete = -1; | ||
| 1868 | 1879 | ||
| 1869 | atomic_set(&rth->u.dst.__refcnt, 1); | 1880 | atomic_set(&rth->dst.__refcnt, 1); |
| 1870 | rth->u.dst.flags= DST_HOST; | 1881 | rth->dst.flags= DST_HOST; |
| 1871 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | 1882 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) |
| 1872 | rth->u.dst.flags |= DST_NOPOLICY; | 1883 | rth->dst.flags |= DST_NOPOLICY; |
| 1873 | rth->fl.fl4_dst = daddr; | 1884 | rth->fl.fl4_dst = daddr; |
| 1874 | rth->rt_dst = daddr; | 1885 | rth->rt_dst = daddr; |
| 1875 | rth->fl.fl4_tos = tos; | 1886 | rth->fl.fl4_tos = tos; |
| @@ -1877,13 +1888,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 1877 | rth->fl.fl4_src = saddr; | 1888 | rth->fl.fl4_src = saddr; |
| 1878 | rth->rt_src = saddr; | 1889 | rth->rt_src = saddr; |
| 1879 | #ifdef CONFIG_NET_CLS_ROUTE | 1890 | #ifdef CONFIG_NET_CLS_ROUTE |
| 1880 | rth->u.dst.tclassid = itag; | 1891 | rth->dst.tclassid = itag; |
| 1881 | #endif | 1892 | #endif |
| 1882 | rth->rt_iif = | 1893 | rth->rt_iif = |
| 1883 | rth->fl.iif = dev->ifindex; | 1894 | rth->fl.iif = dev->ifindex; |
| 1884 | rth->u.dst.dev = init_net.loopback_dev; | 1895 | rth->dst.dev = init_net.loopback_dev; |
| 1885 | dev_hold(rth->u.dst.dev); | 1896 | dev_hold(rth->dst.dev); |
| 1886 | rth->idev = in_dev_get(rth->u.dst.dev); | 1897 | rth->idev = in_dev_get(rth->dst.dev); |
| 1887 | rth->fl.oif = 0; | 1898 | rth->fl.oif = 0; |
| 1888 | rth->rt_gateway = daddr; | 1899 | rth->rt_gateway = daddr; |
| 1889 | rth->rt_spec_dst= spec_dst; | 1900 | rth->rt_spec_dst= spec_dst; |
| @@ -1891,27 +1902,25 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 1891 | rth->rt_flags = RTCF_MULTICAST; | 1902 | rth->rt_flags = RTCF_MULTICAST; |
| 1892 | rth->rt_type = RTN_MULTICAST; | 1903 | rth->rt_type = RTN_MULTICAST; |
| 1893 | if (our) { | 1904 | if (our) { |
| 1894 | rth->u.dst.input= ip_local_deliver; | 1905 | rth->dst.input= ip_local_deliver; |
| 1895 | rth->rt_flags |= RTCF_LOCAL; | 1906 | rth->rt_flags |= RTCF_LOCAL; |
| 1896 | } | 1907 | } |
| 1897 | 1908 | ||
| 1898 | #ifdef CONFIG_IP_MROUTE | 1909 | #ifdef CONFIG_IP_MROUTE |
| 1899 | if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) | 1910 | if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) |
| 1900 | rth->u.dst.input = ip_mr_input; | 1911 | rth->dst.input = ip_mr_input; |
| 1901 | #endif | 1912 | #endif |
| 1902 | RT_CACHE_STAT_INC(in_slow_mc); | 1913 | RT_CACHE_STAT_INC(in_slow_mc); |
| 1903 | 1914 | ||
| 1904 | in_dev_put(in_dev); | ||
| 1905 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); | 1915 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); |
| 1906 | return rt_intern_hash(hash, rth, NULL, skb); | 1916 | return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); |
| 1907 | 1917 | ||
| 1908 | e_nobufs: | 1918 | e_nobufs: |
| 1909 | in_dev_put(in_dev); | ||
| 1910 | return -ENOBUFS; | 1919 | return -ENOBUFS; |
| 1911 | |||
| 1912 | e_inval: | 1920 | e_inval: |
| 1913 | in_dev_put(in_dev); | ||
| 1914 | return -EINVAL; | 1921 | return -EINVAL; |
| 1922 | e_err: | ||
| 1923 | return err; | ||
| 1915 | } | 1924 | } |
| 1916 | 1925 | ||
| 1917 | 1926 | ||
| @@ -1945,22 +1954,22 @@ static void ip_handle_martian_source(struct net_device *dev, | |||
| 1945 | #endif | 1954 | #endif |
| 1946 | } | 1955 | } |
| 1947 | 1956 | ||
| 1957 | /* called in rcu_read_lock() section */ | ||
| 1948 | static int __mkroute_input(struct sk_buff *skb, | 1958 | static int __mkroute_input(struct sk_buff *skb, |
| 1949 | struct fib_result *res, | 1959 | struct fib_result *res, |
| 1950 | struct in_device *in_dev, | 1960 | struct in_device *in_dev, |
| 1951 | __be32 daddr, __be32 saddr, u32 tos, | 1961 | __be32 daddr, __be32 saddr, u32 tos, |
| 1952 | struct rtable **result) | 1962 | struct rtable **result) |
| 1953 | { | 1963 | { |
| 1954 | |||
| 1955 | struct rtable *rth; | 1964 | struct rtable *rth; |
| 1956 | int err; | 1965 | int err; |
| 1957 | struct in_device *out_dev; | 1966 | struct in_device *out_dev; |
| 1958 | unsigned flags = 0; | 1967 | unsigned int flags = 0; |
| 1959 | __be32 spec_dst; | 1968 | __be32 spec_dst; |
| 1960 | u32 itag; | 1969 | u32 itag; |
| 1961 | 1970 | ||
| 1962 | /* get a working reference to the output device */ | 1971 | /* get a working reference to the output device */ |
| 1963 | out_dev = in_dev_get(FIB_RES_DEV(*res)); | 1972 | out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); |
| 1964 | if (out_dev == NULL) { | 1973 | if (out_dev == NULL) { |
| 1965 | if (net_ratelimit()) | 1974 | if (net_ratelimit()) |
| 1966 | printk(KERN_CRIT "Bug in ip_route_input" \ | 1975 | printk(KERN_CRIT "Bug in ip_route_input" \ |
| @@ -1975,7 +1984,6 @@ static int __mkroute_input(struct sk_buff *skb, | |||
| 1975 | ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, | 1984 | ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, |
| 1976 | saddr); | 1985 | saddr); |
| 1977 | 1986 | ||
| 1978 | err = -EINVAL; | ||
| 1979 | goto cleanup; | 1987 | goto cleanup; |
| 1980 | } | 1988 | } |
| 1981 | 1989 | ||
| @@ -1990,8 +1998,13 @@ static int __mkroute_input(struct sk_buff *skb, | |||
| 1990 | if (skb->protocol != htons(ETH_P_IP)) { | 1998 | if (skb->protocol != htons(ETH_P_IP)) { |
| 1991 | /* Not IP (i.e. ARP). Do not create route, if it is | 1999 | /* Not IP (i.e. ARP). Do not create route, if it is |
| 1992 | * invalid for proxy arp. DNAT routes are always valid. | 2000 | * invalid for proxy arp. DNAT routes are always valid. |
| 2001 | * | ||
| 2002 | * Proxy arp feature have been extended to allow, ARP | ||
| 2003 | * replies back to the same interface, to support | ||
| 2004 | * Private VLAN switch technologies. See arp.c. | ||
| 1993 | */ | 2005 | */ |
| 1994 | if (out_dev == in_dev) { | 2006 | if (out_dev == in_dev && |
| 2007 | IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) { | ||
| 1995 | err = -EINVAL; | 2008 | err = -EINVAL; |
| 1996 | goto cleanup; | 2009 | goto cleanup; |
| 1997 | } | 2010 | } |
| @@ -2004,12 +2017,12 @@ static int __mkroute_input(struct sk_buff *skb, | |||
| 2004 | goto cleanup; | 2017 | goto cleanup; |
| 2005 | } | 2018 | } |
| 2006 | 2019 | ||
| 2007 | atomic_set(&rth->u.dst.__refcnt, 1); | 2020 | atomic_set(&rth->dst.__refcnt, 1); |
| 2008 | rth->u.dst.flags= DST_HOST; | 2021 | rth->dst.flags= DST_HOST; |
| 2009 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | 2022 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) |
| 2010 | rth->u.dst.flags |= DST_NOPOLICY; | 2023 | rth->dst.flags |= DST_NOPOLICY; |
| 2011 | if (IN_DEV_CONF_GET(out_dev, NOXFRM)) | 2024 | if (IN_DEV_CONF_GET(out_dev, NOXFRM)) |
| 2012 | rth->u.dst.flags |= DST_NOXFRM; | 2025 | rth->dst.flags |= DST_NOXFRM; |
| 2013 | rth->fl.fl4_dst = daddr; | 2026 | rth->fl.fl4_dst = daddr; |
| 2014 | rth->rt_dst = daddr; | 2027 | rth->rt_dst = daddr; |
| 2015 | rth->fl.fl4_tos = tos; | 2028 | rth->fl.fl4_tos = tos; |
| @@ -2019,15 +2032,16 @@ static int __mkroute_input(struct sk_buff *skb, | |||
| 2019 | rth->rt_gateway = daddr; | 2032 | rth->rt_gateway = daddr; |
| 2020 | rth->rt_iif = | 2033 | rth->rt_iif = |
| 2021 | rth->fl.iif = in_dev->dev->ifindex; | 2034 | rth->fl.iif = in_dev->dev->ifindex; |
| 2022 | rth->u.dst.dev = (out_dev)->dev; | 2035 | rth->dst.dev = (out_dev)->dev; |
| 2023 | dev_hold(rth->u.dst.dev); | 2036 | dev_hold(rth->dst.dev); |
| 2024 | rth->idev = in_dev_get(rth->u.dst.dev); | 2037 | rth->idev = in_dev_get(rth->dst.dev); |
| 2025 | rth->fl.oif = 0; | 2038 | rth->fl.oif = 0; |
| 2026 | rth->rt_spec_dst= spec_dst; | 2039 | rth->rt_spec_dst= spec_dst; |
| 2027 | 2040 | ||
| 2028 | rth->u.dst.input = ip_forward; | 2041 | rth->dst.obsolete = -1; |
| 2029 | rth->u.dst.output = ip_output; | 2042 | rth->dst.input = ip_forward; |
| 2030 | rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); | 2043 | rth->dst.output = ip_output; |
| 2044 | rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); | ||
| 2031 | 2045 | ||
| 2032 | rt_set_nexthop(rth, res, itag); | 2046 | rt_set_nexthop(rth, res, itag); |
| 2033 | 2047 | ||
| @@ -2036,8 +2050,6 @@ static int __mkroute_input(struct sk_buff *skb, | |||
| 2036 | *result = rth; | 2050 | *result = rth; |
| 2037 | err = 0; | 2051 | err = 0; |
| 2038 | cleanup: | 2052 | cleanup: |
| 2039 | /* release the working reference to the output device */ | ||
| 2040 | in_dev_put(out_dev); | ||
| 2041 | return err; | 2053 | return err; |
| 2042 | } | 2054 | } |
| 2043 | 2055 | ||
| @@ -2063,8 +2075,8 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
| 2063 | 2075 | ||
| 2064 | /* put it into the cache */ | 2076 | /* put it into the cache */ |
| 2065 | hash = rt_hash(daddr, saddr, fl->iif, | 2077 | hash = rt_hash(daddr, saddr, fl->iif, |
| 2066 | rt_genid(dev_net(rth->u.dst.dev))); | 2078 | rt_genid(dev_net(rth->dst.dev))); |
| 2067 | return rt_intern_hash(hash, rth, NULL, skb); | 2079 | return rt_intern_hash(hash, rth, NULL, skb, fl->iif); |
| 2068 | } | 2080 | } |
| 2069 | 2081 | ||
| 2070 | /* | 2082 | /* |
| @@ -2081,7 +2093,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 2081 | u8 tos, struct net_device *dev) | 2093 | u8 tos, struct net_device *dev) |
| 2082 | { | 2094 | { |
| 2083 | struct fib_result res; | 2095 | struct fib_result res; |
| 2084 | struct in_device *in_dev = in_dev_get(dev); | 2096 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
| 2085 | struct flowi fl = { .nl_u = { .ip4_u = | 2097 | struct flowi fl = { .nl_u = { .ip4_u = |
| 2086 | { .daddr = daddr, | 2098 | { .daddr = daddr, |
| 2087 | .saddr = saddr, | 2099 | .saddr = saddr, |
| @@ -2141,13 +2153,12 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 2141 | goto brd_input; | 2153 | goto brd_input; |
| 2142 | 2154 | ||
| 2143 | if (res.type == RTN_LOCAL) { | 2155 | if (res.type == RTN_LOCAL) { |
| 2144 | int result; | 2156 | err = fib_validate_source(saddr, daddr, tos, |
| 2145 | result = fib_validate_source(saddr, daddr, tos, | ||
| 2146 | net->loopback_dev->ifindex, | 2157 | net->loopback_dev->ifindex, |
| 2147 | dev, &spec_dst, &itag, skb->mark); | 2158 | dev, &spec_dst, &itag, skb->mark); |
| 2148 | if (result < 0) | 2159 | if (err < 0) |
| 2149 | goto martian_source; | 2160 | goto martian_source_keep_err; |
| 2150 | if (result) | 2161 | if (err) |
| 2151 | flags |= RTCF_DIRECTSRC; | 2162 | flags |= RTCF_DIRECTSRC; |
| 2152 | spec_dst = daddr; | 2163 | spec_dst = daddr; |
| 2153 | goto local_input; | 2164 | goto local_input; |
| @@ -2160,7 +2171,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 2160 | 2171 | ||
| 2161 | err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); | 2172 | err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); |
| 2162 | done: | 2173 | done: |
| 2163 | in_dev_put(in_dev); | ||
| 2164 | if (free_res) | 2174 | if (free_res) |
| 2165 | fib_res_put(&res); | 2175 | fib_res_put(&res); |
| 2166 | out: return err; | 2176 | out: return err; |
| @@ -2175,7 +2185,7 @@ brd_input: | |||
| 2175 | err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, | 2185 | err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, |
| 2176 | &itag, skb->mark); | 2186 | &itag, skb->mark); |
| 2177 | if (err < 0) | 2187 | if (err < 0) |
| 2178 | goto martian_source; | 2188 | goto martian_source_keep_err; |
| 2179 | if (err) | 2189 | if (err) |
| 2180 | flags |= RTCF_DIRECTSRC; | 2190 | flags |= RTCF_DIRECTSRC; |
| 2181 | } | 2191 | } |
| @@ -2188,13 +2198,14 @@ local_input: | |||
| 2188 | if (!rth) | 2198 | if (!rth) |
| 2189 | goto e_nobufs; | 2199 | goto e_nobufs; |
| 2190 | 2200 | ||
| 2191 | rth->u.dst.output= ip_rt_bug; | 2201 | rth->dst.output= ip_rt_bug; |
| 2202 | rth->dst.obsolete = -1; | ||
| 2192 | rth->rt_genid = rt_genid(net); | 2203 | rth->rt_genid = rt_genid(net); |
| 2193 | 2204 | ||
| 2194 | atomic_set(&rth->u.dst.__refcnt, 1); | 2205 | atomic_set(&rth->dst.__refcnt, 1); |
| 2195 | rth->u.dst.flags= DST_HOST; | 2206 | rth->dst.flags= DST_HOST; |
| 2196 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | 2207 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) |
| 2197 | rth->u.dst.flags |= DST_NOPOLICY; | 2208 | rth->dst.flags |= DST_NOPOLICY; |
| 2198 | rth->fl.fl4_dst = daddr; | 2209 | rth->fl.fl4_dst = daddr; |
| 2199 | rth->rt_dst = daddr; | 2210 | rth->rt_dst = daddr; |
| 2200 | rth->fl.fl4_tos = tos; | 2211 | rth->fl.fl4_tos = tos; |
| @@ -2202,25 +2213,25 @@ local_input: | |||
| 2202 | rth->fl.fl4_src = saddr; | 2213 | rth->fl.fl4_src = saddr; |
| 2203 | rth->rt_src = saddr; | 2214 | rth->rt_src = saddr; |
| 2204 | #ifdef CONFIG_NET_CLS_ROUTE | 2215 | #ifdef CONFIG_NET_CLS_ROUTE |
| 2205 | rth->u.dst.tclassid = itag; | 2216 | rth->dst.tclassid = itag; |
| 2206 | #endif | 2217 | #endif |
| 2207 | rth->rt_iif = | 2218 | rth->rt_iif = |
| 2208 | rth->fl.iif = dev->ifindex; | 2219 | rth->fl.iif = dev->ifindex; |
| 2209 | rth->u.dst.dev = net->loopback_dev; | 2220 | rth->dst.dev = net->loopback_dev; |
| 2210 | dev_hold(rth->u.dst.dev); | 2221 | dev_hold(rth->dst.dev); |
| 2211 | rth->idev = in_dev_get(rth->u.dst.dev); | 2222 | rth->idev = in_dev_get(rth->dst.dev); |
| 2212 | rth->rt_gateway = daddr; | 2223 | rth->rt_gateway = daddr; |
| 2213 | rth->rt_spec_dst= spec_dst; | 2224 | rth->rt_spec_dst= spec_dst; |
| 2214 | rth->u.dst.input= ip_local_deliver; | 2225 | rth->dst.input= ip_local_deliver; |
| 2215 | rth->rt_flags = flags|RTCF_LOCAL; | 2226 | rth->rt_flags = flags|RTCF_LOCAL; |
| 2216 | if (res.type == RTN_UNREACHABLE) { | 2227 | if (res.type == RTN_UNREACHABLE) { |
| 2217 | rth->u.dst.input= ip_error; | 2228 | rth->dst.input= ip_error; |
| 2218 | rth->u.dst.error= -err; | 2229 | rth->dst.error= -err; |
| 2219 | rth->rt_flags &= ~RTCF_LOCAL; | 2230 | rth->rt_flags &= ~RTCF_LOCAL; |
| 2220 | } | 2231 | } |
| 2221 | rth->rt_type = res.type; | 2232 | rth->rt_type = res.type; |
| 2222 | hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); | 2233 | hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); |
| 2223 | err = rt_intern_hash(hash, rth, NULL, skb); | 2234 | err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); |
| 2224 | goto done; | 2235 | goto done; |
| 2225 | 2236 | ||
| 2226 | no_route: | 2237 | no_route: |
| @@ -2255,46 +2266,54 @@ e_nobufs: | |||
| 2255 | goto done; | 2266 | goto done; |
| 2256 | 2267 | ||
| 2257 | martian_source: | 2268 | martian_source: |
| 2269 | err = -EINVAL; | ||
| 2270 | martian_source_keep_err: | ||
| 2258 | ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); | 2271 | ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); |
| 2259 | goto e_inval; | 2272 | goto done; |
| 2260 | } | 2273 | } |
| 2261 | 2274 | ||
| 2262 | int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 2275 | int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
| 2263 | u8 tos, struct net_device *dev) | 2276 | u8 tos, struct net_device *dev, bool noref) |
| 2264 | { | 2277 | { |
| 2265 | struct rtable * rth; | 2278 | struct rtable * rth; |
| 2266 | unsigned hash; | 2279 | unsigned hash; |
| 2267 | int iif = dev->ifindex; | 2280 | int iif = dev->ifindex; |
| 2268 | struct net *net; | 2281 | struct net *net; |
| 2282 | int res; | ||
| 2269 | 2283 | ||
| 2270 | net = dev_net(dev); | 2284 | net = dev_net(dev); |
| 2271 | 2285 | ||
| 2286 | rcu_read_lock(); | ||
| 2287 | |||
| 2272 | if (!rt_caching(net)) | 2288 | if (!rt_caching(net)) |
| 2273 | goto skip_cache; | 2289 | goto skip_cache; |
| 2274 | 2290 | ||
| 2275 | tos &= IPTOS_RT_MASK; | 2291 | tos &= IPTOS_RT_MASK; |
| 2276 | hash = rt_hash(daddr, saddr, iif, rt_genid(net)); | 2292 | hash = rt_hash(daddr, saddr, iif, rt_genid(net)); |
| 2277 | 2293 | ||
| 2278 | rcu_read_lock(); | ||
| 2279 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 2294 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; |
| 2280 | rth = rcu_dereference(rth->u.dst.rt_next)) { | 2295 | rth = rcu_dereference(rth->dst.rt_next)) { |
| 2281 | if (((rth->fl.fl4_dst ^ daddr) | | 2296 | if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | |
| 2282 | (rth->fl.fl4_src ^ saddr) | | 2297 | ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | |
| 2283 | (rth->fl.iif ^ iif) | | 2298 | (rth->fl.iif ^ iif) | |
| 2284 | rth->fl.oif | | 2299 | rth->fl.oif | |
| 2285 | (rth->fl.fl4_tos ^ tos)) == 0 && | 2300 | (rth->fl.fl4_tos ^ tos)) == 0 && |
| 2286 | rth->fl.mark == skb->mark && | 2301 | rth->fl.mark == skb->mark && |
| 2287 | net_eq(dev_net(rth->u.dst.dev), net) && | 2302 | net_eq(dev_net(rth->dst.dev), net) && |
| 2288 | !rt_is_expired(rth)) { | 2303 | !rt_is_expired(rth)) { |
| 2289 | dst_use(&rth->u.dst, jiffies); | 2304 | if (noref) { |
| 2305 | dst_use_noref(&rth->dst, jiffies); | ||
| 2306 | skb_dst_set_noref(skb, &rth->dst); | ||
| 2307 | } else { | ||
| 2308 | dst_use(&rth->dst, jiffies); | ||
| 2309 | skb_dst_set(skb, &rth->dst); | ||
| 2310 | } | ||
| 2290 | RT_CACHE_STAT_INC(in_hit); | 2311 | RT_CACHE_STAT_INC(in_hit); |
| 2291 | rcu_read_unlock(); | 2312 | rcu_read_unlock(); |
| 2292 | skb_dst_set(skb, &rth->u.dst); | ||
| 2293 | return 0; | 2313 | return 0; |
| 2294 | } | 2314 | } |
| 2295 | RT_CACHE_STAT_INC(in_hlist_search); | 2315 | RT_CACHE_STAT_INC(in_hlist_search); |
| 2296 | } | 2316 | } |
| 2297 | rcu_read_unlock(); | ||
| 2298 | 2317 | ||
| 2299 | skip_cache: | 2318 | skip_cache: |
| 2300 | /* Multicast recognition logic is moved from route cache to here. | 2319 | /* Multicast recognition logic is moved from route cache to here. |
| @@ -2309,12 +2328,11 @@ skip_cache: | |||
| 2309 | route cache entry is created eventually. | 2328 | route cache entry is created eventually. |
| 2310 | */ | 2329 | */ |
| 2311 | if (ipv4_is_multicast(daddr)) { | 2330 | if (ipv4_is_multicast(daddr)) { |
| 2312 | struct in_device *in_dev; | 2331 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
| 2313 | 2332 | ||
| 2314 | rcu_read_lock(); | 2333 | if (in_dev) { |
| 2315 | if ((in_dev = __in_dev_get_rcu(dev)) != NULL) { | ||
| 2316 | int our = ip_check_mc(in_dev, daddr, saddr, | 2334 | int our = ip_check_mc(in_dev, daddr, saddr, |
| 2317 | ip_hdr(skb)->protocol); | 2335 | ip_hdr(skb)->protocol); |
| 2318 | if (our | 2336 | if (our |
| 2319 | #ifdef CONFIG_IP_MROUTE | 2337 | #ifdef CONFIG_IP_MROUTE |
| 2320 | || | 2338 | || |
| @@ -2322,16 +2340,20 @@ skip_cache: | |||
| 2322 | IN_DEV_MFORWARD(in_dev)) | 2340 | IN_DEV_MFORWARD(in_dev)) |
| 2323 | #endif | 2341 | #endif |
| 2324 | ) { | 2342 | ) { |
| 2343 | int res = ip_route_input_mc(skb, daddr, saddr, | ||
| 2344 | tos, dev, our); | ||
| 2325 | rcu_read_unlock(); | 2345 | rcu_read_unlock(); |
| 2326 | return ip_route_input_mc(skb, daddr, saddr, | 2346 | return res; |
| 2327 | tos, dev, our); | ||
| 2328 | } | 2347 | } |
| 2329 | } | 2348 | } |
| 2330 | rcu_read_unlock(); | 2349 | rcu_read_unlock(); |
| 2331 | return -EINVAL; | 2350 | return -EINVAL; |
| 2332 | } | 2351 | } |
| 2333 | return ip_route_input_slow(skb, daddr, saddr, tos, dev); | 2352 | res = ip_route_input_slow(skb, daddr, saddr, tos, dev); |
| 2353 | rcu_read_unlock(); | ||
| 2354 | return res; | ||
| 2334 | } | 2355 | } |
| 2356 | EXPORT_SYMBOL(ip_route_input_common); | ||
| 2335 | 2357 | ||
| 2336 | static int __mkroute_output(struct rtable **result, | 2358 | static int __mkroute_output(struct rtable **result, |
| 2337 | struct fib_result *res, | 2359 | struct fib_result *res, |
| @@ -2391,12 +2413,12 @@ static int __mkroute_output(struct rtable **result, | |||
| 2391 | goto cleanup; | 2413 | goto cleanup; |
| 2392 | } | 2414 | } |
| 2393 | 2415 | ||
| 2394 | atomic_set(&rth->u.dst.__refcnt, 1); | 2416 | atomic_set(&rth->dst.__refcnt, 1); |
| 2395 | rth->u.dst.flags= DST_HOST; | 2417 | rth->dst.flags= DST_HOST; |
| 2396 | if (IN_DEV_CONF_GET(in_dev, NOXFRM)) | 2418 | if (IN_DEV_CONF_GET(in_dev, NOXFRM)) |
| 2397 | rth->u.dst.flags |= DST_NOXFRM; | 2419 | rth->dst.flags |= DST_NOXFRM; |
| 2398 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | 2420 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) |
| 2399 | rth->u.dst.flags |= DST_NOPOLICY; | 2421 | rth->dst.flags |= DST_NOPOLICY; |
| 2400 | 2422 | ||
| 2401 | rth->fl.fl4_dst = oldflp->fl4_dst; | 2423 | rth->fl.fl4_dst = oldflp->fl4_dst; |
| 2402 | rth->fl.fl4_tos = tos; | 2424 | rth->fl.fl4_tos = tos; |
| @@ -2408,34 +2430,35 @@ static int __mkroute_output(struct rtable **result, | |||
| 2408 | rth->rt_iif = oldflp->oif ? : dev_out->ifindex; | 2430 | rth->rt_iif = oldflp->oif ? : dev_out->ifindex; |
| 2409 | /* get references to the devices that are to be hold by the routing | 2431 | /* get references to the devices that are to be hold by the routing |
| 2410 | cache entry */ | 2432 | cache entry */ |
| 2411 | rth->u.dst.dev = dev_out; | 2433 | rth->dst.dev = dev_out; |
| 2412 | dev_hold(dev_out); | 2434 | dev_hold(dev_out); |
| 2413 | rth->idev = in_dev_get(dev_out); | 2435 | rth->idev = in_dev_get(dev_out); |
| 2414 | rth->rt_gateway = fl->fl4_dst; | 2436 | rth->rt_gateway = fl->fl4_dst; |
| 2415 | rth->rt_spec_dst= fl->fl4_src; | 2437 | rth->rt_spec_dst= fl->fl4_src; |
| 2416 | 2438 | ||
| 2417 | rth->u.dst.output=ip_output; | 2439 | rth->dst.output=ip_output; |
| 2440 | rth->dst.obsolete = -1; | ||
| 2418 | rth->rt_genid = rt_genid(dev_net(dev_out)); | 2441 | rth->rt_genid = rt_genid(dev_net(dev_out)); |
| 2419 | 2442 | ||
| 2420 | RT_CACHE_STAT_INC(out_slow_tot); | 2443 | RT_CACHE_STAT_INC(out_slow_tot); |
| 2421 | 2444 | ||
| 2422 | if (flags & RTCF_LOCAL) { | 2445 | if (flags & RTCF_LOCAL) { |
| 2423 | rth->u.dst.input = ip_local_deliver; | 2446 | rth->dst.input = ip_local_deliver; |
| 2424 | rth->rt_spec_dst = fl->fl4_dst; | 2447 | rth->rt_spec_dst = fl->fl4_dst; |
| 2425 | } | 2448 | } |
| 2426 | if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { | 2449 | if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { |
| 2427 | rth->rt_spec_dst = fl->fl4_src; | 2450 | rth->rt_spec_dst = fl->fl4_src; |
| 2428 | if (flags & RTCF_LOCAL && | 2451 | if (flags & RTCF_LOCAL && |
| 2429 | !(dev_out->flags & IFF_LOOPBACK)) { | 2452 | !(dev_out->flags & IFF_LOOPBACK)) { |
| 2430 | rth->u.dst.output = ip_mc_output; | 2453 | rth->dst.output = ip_mc_output; |
| 2431 | RT_CACHE_STAT_INC(out_slow_mc); | 2454 | RT_CACHE_STAT_INC(out_slow_mc); |
| 2432 | } | 2455 | } |
| 2433 | #ifdef CONFIG_IP_MROUTE | 2456 | #ifdef CONFIG_IP_MROUTE |
| 2434 | if (res->type == RTN_MULTICAST) { | 2457 | if (res->type == RTN_MULTICAST) { |
| 2435 | if (IN_DEV_MFORWARD(in_dev) && | 2458 | if (IN_DEV_MFORWARD(in_dev) && |
| 2436 | !ipv4_is_local_multicast(oldflp->fl4_dst)) { | 2459 | !ipv4_is_local_multicast(oldflp->fl4_dst)) { |
| 2437 | rth->u.dst.input = ip_mr_input; | 2460 | rth->dst.input = ip_mr_input; |
| 2438 | rth->u.dst.output = ip_mc_output; | 2461 | rth->dst.output = ip_mc_output; |
| 2439 | } | 2462 | } |
| 2440 | } | 2463 | } |
| 2441 | #endif | 2464 | #endif |
| @@ -2466,7 +2489,7 @@ static int ip_mkroute_output(struct rtable **rp, | |||
| 2466 | if (err == 0) { | 2489 | if (err == 0) { |
| 2467 | hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, | 2490 | hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, |
| 2468 | rt_genid(dev_net(dev_out))); | 2491 | rt_genid(dev_net(dev_out))); |
| 2469 | err = rt_intern_hash(hash, rth, rp, NULL); | 2492 | err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif); |
| 2470 | } | 2493 | } |
| 2471 | 2494 | ||
| 2472 | return err; | 2495 | return err; |
| @@ -2689,8 +2712,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
| 2689 | hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); | 2712 | hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); |
| 2690 | 2713 | ||
| 2691 | rcu_read_lock_bh(); | 2714 | rcu_read_lock_bh(); |
| 2692 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 2715 | for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; |
| 2693 | rth = rcu_dereference(rth->u.dst.rt_next)) { | 2716 | rth = rcu_dereference_bh(rth->dst.rt_next)) { |
| 2694 | if (rth->fl.fl4_dst == flp->fl4_dst && | 2717 | if (rth->fl.fl4_dst == flp->fl4_dst && |
| 2695 | rth->fl.fl4_src == flp->fl4_src && | 2718 | rth->fl.fl4_src == flp->fl4_src && |
| 2696 | rth->fl.iif == 0 && | 2719 | rth->fl.iif == 0 && |
| @@ -2698,9 +2721,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
| 2698 | rth->fl.mark == flp->mark && | 2721 | rth->fl.mark == flp->mark && |
| 2699 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & | 2722 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & |
| 2700 | (IPTOS_RT_MASK | RTO_ONLINK)) && | 2723 | (IPTOS_RT_MASK | RTO_ONLINK)) && |
| 2701 | net_eq(dev_net(rth->u.dst.dev), net) && | 2724 | net_eq(dev_net(rth->dst.dev), net) && |
| 2702 | !rt_is_expired(rth)) { | 2725 | !rt_is_expired(rth)) { |
| 2703 | dst_use(&rth->u.dst, jiffies); | 2726 | dst_use(&rth->dst, jiffies); |
| 2704 | RT_CACHE_STAT_INC(out_hit); | 2727 | RT_CACHE_STAT_INC(out_hit); |
| 2705 | rcu_read_unlock_bh(); | 2728 | rcu_read_unlock_bh(); |
| 2706 | *rp = rth; | 2729 | *rp = rth; |
| @@ -2713,9 +2736,13 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
| 2713 | slow_output: | 2736 | slow_output: |
| 2714 | return ip_route_output_slow(net, rp, flp); | 2737 | return ip_route_output_slow(net, rp, flp); |
| 2715 | } | 2738 | } |
| 2716 | |||
| 2717 | EXPORT_SYMBOL_GPL(__ip_route_output_key); | 2739 | EXPORT_SYMBOL_GPL(__ip_route_output_key); |
| 2718 | 2740 | ||
| 2741 | static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) | ||
| 2742 | { | ||
| 2743 | return NULL; | ||
| 2744 | } | ||
| 2745 | |||
| 2719 | static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) | 2746 | static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) |
| 2720 | { | 2747 | { |
| 2721 | } | 2748 | } |
| @@ -2724,7 +2751,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { | |||
| 2724 | .family = AF_INET, | 2751 | .family = AF_INET, |
| 2725 | .protocol = cpu_to_be16(ETH_P_IP), | 2752 | .protocol = cpu_to_be16(ETH_P_IP), |
| 2726 | .destroy = ipv4_dst_destroy, | 2753 | .destroy = ipv4_dst_destroy, |
| 2727 | .check = ipv4_dst_check, | 2754 | .check = ipv4_blackhole_dst_check, |
| 2728 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, | 2755 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, |
| 2729 | .entries = ATOMIC_INIT(0), | 2756 | .entries = ATOMIC_INIT(0), |
| 2730 | }; | 2757 | }; |
| @@ -2737,15 +2764,15 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi | |||
| 2737 | dst_alloc(&ipv4_dst_blackhole_ops); | 2764 | dst_alloc(&ipv4_dst_blackhole_ops); |
| 2738 | 2765 | ||
| 2739 | if (rt) { | 2766 | if (rt) { |
| 2740 | struct dst_entry *new = &rt->u.dst; | 2767 | struct dst_entry *new = &rt->dst; |
| 2741 | 2768 | ||
| 2742 | atomic_set(&new->__refcnt, 1); | 2769 | atomic_set(&new->__refcnt, 1); |
| 2743 | new->__use = 1; | 2770 | new->__use = 1; |
| 2744 | new->input = dst_discard; | 2771 | new->input = dst_discard; |
| 2745 | new->output = dst_discard; | 2772 | new->output = dst_discard; |
| 2746 | memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); | 2773 | memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); |
| 2747 | 2774 | ||
| 2748 | new->dev = ort->u.dst.dev; | 2775 | new->dev = ort->dst.dev; |
| 2749 | if (new->dev) | 2776 | if (new->dev) |
| 2750 | dev_hold(new->dev); | 2777 | dev_hold(new->dev); |
| 2751 | 2778 | ||
| @@ -2769,7 +2796,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi | |||
| 2769 | dst_free(new); | 2796 | dst_free(new); |
| 2770 | } | 2797 | } |
| 2771 | 2798 | ||
| 2772 | dst_release(&(*rp)->u.dst); | 2799 | dst_release(&(*rp)->dst); |
| 2773 | *rp = rt; | 2800 | *rp = rt; |
| 2774 | return (rt ? 0 : -ENOMEM); | 2801 | return (rt ? 0 : -ENOMEM); |
| 2775 | } | 2802 | } |
| @@ -2797,13 +2824,13 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, | |||
| 2797 | 2824 | ||
| 2798 | return 0; | 2825 | return 0; |
| 2799 | } | 2826 | } |
| 2800 | |||
| 2801 | EXPORT_SYMBOL_GPL(ip_route_output_flow); | 2827 | EXPORT_SYMBOL_GPL(ip_route_output_flow); |
| 2802 | 2828 | ||
| 2803 | int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) | 2829 | int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) |
| 2804 | { | 2830 | { |
| 2805 | return ip_route_output_flow(net, rp, flp, NULL, 0); | 2831 | return ip_route_output_flow(net, rp, flp, NULL, 0); |
| 2806 | } | 2832 | } |
| 2833 | EXPORT_SYMBOL(ip_route_output_key); | ||
| 2807 | 2834 | ||
| 2808 | static int rt_fill_info(struct net *net, | 2835 | static int rt_fill_info(struct net *net, |
| 2809 | struct sk_buff *skb, u32 pid, u32 seq, int event, | 2836 | struct sk_buff *skb, u32 pid, u32 seq, int event, |
| @@ -2839,11 +2866,11 @@ static int rt_fill_info(struct net *net, | |||
| 2839 | r->rtm_src_len = 32; | 2866 | r->rtm_src_len = 32; |
| 2840 | NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); | 2867 | NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); |
| 2841 | } | 2868 | } |
| 2842 | if (rt->u.dst.dev) | 2869 | if (rt->dst.dev) |
| 2843 | NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex); | 2870 | NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); |
| 2844 | #ifdef CONFIG_NET_CLS_ROUTE | 2871 | #ifdef CONFIG_NET_CLS_ROUTE |
| 2845 | if (rt->u.dst.tclassid) | 2872 | if (rt->dst.tclassid) |
| 2846 | NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); | 2873 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); |
| 2847 | #endif | 2874 | #endif |
| 2848 | if (rt->fl.iif) | 2875 | if (rt->fl.iif) |
| 2849 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); | 2876 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); |
| @@ -2853,12 +2880,16 @@ static int rt_fill_info(struct net *net, | |||
| 2853 | if (rt->rt_dst != rt->rt_gateway) | 2880 | if (rt->rt_dst != rt->rt_gateway) |
| 2854 | NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); | 2881 | NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); |
| 2855 | 2882 | ||
| 2856 | if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) | 2883 | if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) |
| 2857 | goto nla_put_failure; | 2884 | goto nla_put_failure; |
| 2858 | 2885 | ||
| 2859 | error = rt->u.dst.error; | 2886 | if (rt->fl.mark) |
| 2860 | expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; | 2887 | NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark); |
| 2888 | |||
| 2889 | error = rt->dst.error; | ||
| 2890 | expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; | ||
| 2861 | if (rt->peer) { | 2891 | if (rt->peer) { |
| 2892 | inet_peer_refcheck(rt->peer); | ||
| 2862 | id = atomic_read(&rt->peer->ip_id_count) & 0xffff; | 2893 | id = atomic_read(&rt->peer->ip_id_count) & 0xffff; |
| 2863 | if (rt->peer->tcp_ts_stamp) { | 2894 | if (rt->peer->tcp_ts_stamp) { |
| 2864 | ts = rt->peer->tcp_ts; | 2895 | ts = rt->peer->tcp_ts; |
| @@ -2889,7 +2920,7 @@ static int rt_fill_info(struct net *net, | |||
| 2889 | NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); | 2920 | NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); |
| 2890 | } | 2921 | } |
| 2891 | 2922 | ||
| 2892 | if (rtnl_put_cacheinfo(skb, &rt->u.dst, id, ts, tsage, | 2923 | if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, |
| 2893 | expires, error) < 0) | 2924 | expires, error) < 0) |
| 2894 | goto nla_put_failure; | 2925 | goto nla_put_failure; |
| 2895 | 2926 | ||
| @@ -2910,6 +2941,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
| 2910 | __be32 src = 0; | 2941 | __be32 src = 0; |
| 2911 | u32 iif; | 2942 | u32 iif; |
| 2912 | int err; | 2943 | int err; |
| 2944 | int mark; | ||
| 2913 | struct sk_buff *skb; | 2945 | struct sk_buff *skb; |
| 2914 | 2946 | ||
| 2915 | err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); | 2947 | err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); |
| @@ -2937,6 +2969,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
| 2937 | src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; | 2969 | src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; |
| 2938 | dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0; | 2970 | dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0; |
| 2939 | iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; | 2971 | iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; |
| 2972 | mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; | ||
| 2940 | 2973 | ||
| 2941 | if (iif) { | 2974 | if (iif) { |
| 2942 | struct net_device *dev; | 2975 | struct net_device *dev; |
| @@ -2949,13 +2982,14 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
| 2949 | 2982 | ||
| 2950 | skb->protocol = htons(ETH_P_IP); | 2983 | skb->protocol = htons(ETH_P_IP); |
| 2951 | skb->dev = dev; | 2984 | skb->dev = dev; |
| 2985 | skb->mark = mark; | ||
| 2952 | local_bh_disable(); | 2986 | local_bh_disable(); |
| 2953 | err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); | 2987 | err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); |
| 2954 | local_bh_enable(); | 2988 | local_bh_enable(); |
| 2955 | 2989 | ||
| 2956 | rt = skb_rtable(skb); | 2990 | rt = skb_rtable(skb); |
| 2957 | if (err == 0 && rt->u.dst.error) | 2991 | if (err == 0 && rt->dst.error) |
| 2958 | err = -rt->u.dst.error; | 2992 | err = -rt->dst.error; |
| 2959 | } else { | 2993 | } else { |
| 2960 | struct flowi fl = { | 2994 | struct flowi fl = { |
| 2961 | .nl_u = { | 2995 | .nl_u = { |
| @@ -2966,6 +3000,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
| 2966 | }, | 3000 | }, |
| 2967 | }, | 3001 | }, |
| 2968 | .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, | 3002 | .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, |
| 3003 | .mark = mark, | ||
| 2969 | }; | 3004 | }; |
| 2970 | err = ip_route_output_key(net, &rt, &fl); | 3005 | err = ip_route_output_key(net, &rt, &fl); |
| 2971 | } | 3006 | } |
| @@ -2973,7 +3008,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
| 2973 | if (err) | 3008 | if (err) |
| 2974 | goto errout_free; | 3009 | goto errout_free; |
| 2975 | 3010 | ||
| 2976 | skb_dst_set(skb, &rt->u.dst); | 3011 | skb_dst_set(skb, &rt->dst); |
| 2977 | if (rtm->rtm_flags & RTM_F_NOTIFY) | 3012 | if (rtm->rtm_flags & RTM_F_NOTIFY) |
| 2978 | rt->rt_flags |= RTCF_NOTIFY; | 3013 | rt->rt_flags |= RTCF_NOTIFY; |
| 2979 | 3014 | ||
| @@ -3008,13 +3043,13 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
| 3008 | if (!rt_hash_table[h].chain) | 3043 | if (!rt_hash_table[h].chain) |
| 3009 | continue; | 3044 | continue; |
| 3010 | rcu_read_lock_bh(); | 3045 | rcu_read_lock_bh(); |
| 3011 | for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; | 3046 | for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; |
| 3012 | rt = rcu_dereference(rt->u.dst.rt_next), idx++) { | 3047 | rt = rcu_dereference_bh(rt->dst.rt_next), idx++) { |
| 3013 | if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) | 3048 | if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx) |
| 3014 | continue; | 3049 | continue; |
| 3015 | if (rt_is_expired(rt)) | 3050 | if (rt_is_expired(rt)) |
| 3016 | continue; | 3051 | continue; |
| 3017 | skb_dst_set(skb, dst_clone(&rt->u.dst)); | 3052 | skb_dst_set_noref(skb, &rt->dst); |
| 3018 | if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, | 3053 | if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, |
| 3019 | cb->nlh->nlmsg_seq, RTM_NEWROUTE, | 3054 | cb->nlh->nlmsg_seq, RTM_NEWROUTE, |
| 3020 | 1, NLM_F_MULTI) <= 0) { | 3055 | 1, NLM_F_MULTI) <= 0) { |
| @@ -3060,50 +3095,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, | |||
| 3060 | return -EINVAL; | 3095 | return -EINVAL; |
| 3061 | } | 3096 | } |
| 3062 | 3097 | ||
| 3063 | static void rt_secret_reschedule(int old) | ||
| 3064 | { | ||
| 3065 | struct net *net; | ||
| 3066 | int new = ip_rt_secret_interval; | ||
| 3067 | int diff = new - old; | ||
| 3068 | |||
| 3069 | if (!diff) | ||
| 3070 | return; | ||
| 3071 | |||
| 3072 | rtnl_lock(); | ||
| 3073 | for_each_net(net) { | ||
| 3074 | int deleted = del_timer_sync(&net->ipv4.rt_secret_timer); | ||
| 3075 | |||
| 3076 | if (!new) | ||
| 3077 | continue; | ||
| 3078 | |||
| 3079 | if (deleted) { | ||
| 3080 | long time = net->ipv4.rt_secret_timer.expires - jiffies; | ||
| 3081 | |||
| 3082 | if (time <= 0 || (time += diff) <= 0) | ||
| 3083 | time = 0; | ||
| 3084 | |||
| 3085 | net->ipv4.rt_secret_timer.expires = time; | ||
| 3086 | } else | ||
| 3087 | net->ipv4.rt_secret_timer.expires = new; | ||
| 3088 | |||
| 3089 | net->ipv4.rt_secret_timer.expires += jiffies; | ||
| 3090 | add_timer(&net->ipv4.rt_secret_timer); | ||
| 3091 | } | ||
| 3092 | rtnl_unlock(); | ||
| 3093 | } | ||
| 3094 | |||
| 3095 | static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, | ||
| 3096 | void __user *buffer, size_t *lenp, | ||
| 3097 | loff_t *ppos) | ||
| 3098 | { | ||
| 3099 | int old = ip_rt_secret_interval; | ||
| 3100 | int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); | ||
| 3101 | |||
| 3102 | rt_secret_reschedule(old); | ||
| 3103 | |||
| 3104 | return ret; | ||
| 3105 | } | ||
| 3106 | |||
| 3107 | static ctl_table ipv4_route_table[] = { | 3098 | static ctl_table ipv4_route_table[] = { |
| 3108 | { | 3099 | { |
| 3109 | .procname = "gc_thresh", | 3100 | .procname = "gc_thresh", |
| @@ -3212,13 +3203,6 @@ static ctl_table ipv4_route_table[] = { | |||
| 3212 | .mode = 0644, | 3203 | .mode = 0644, |
| 3213 | .proc_handler = proc_dointvec, | 3204 | .proc_handler = proc_dointvec, |
| 3214 | }, | 3205 | }, |
| 3215 | { | ||
| 3216 | .procname = "secret_interval", | ||
| 3217 | .data = &ip_rt_secret_interval, | ||
| 3218 | .maxlen = sizeof(int), | ||
| 3219 | .mode = 0644, | ||
| 3220 | .proc_handler = ipv4_sysctl_rt_secret_interval, | ||
| 3221 | }, | ||
| 3222 | { } | 3206 | { } |
| 3223 | }; | 3207 | }; |
| 3224 | 3208 | ||
| @@ -3297,39 +3281,20 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { | |||
| 3297 | }; | 3281 | }; |
| 3298 | #endif | 3282 | #endif |
| 3299 | 3283 | ||
| 3300 | 3284 | static __net_init int rt_genid_init(struct net *net) | |
| 3301 | static __net_init int rt_secret_timer_init(struct net *net) | ||
| 3302 | { | 3285 | { |
| 3303 | atomic_set(&net->ipv4.rt_genid, | 3286 | get_random_bytes(&net->ipv4.rt_genid, |
| 3304 | (int) ((num_physpages ^ (num_physpages>>8)) ^ | 3287 | sizeof(net->ipv4.rt_genid)); |
| 3305 | (jiffies ^ (jiffies >> 7)))); | ||
| 3306 | |||
| 3307 | net->ipv4.rt_secret_timer.function = rt_secret_rebuild; | ||
| 3308 | net->ipv4.rt_secret_timer.data = (unsigned long)net; | ||
| 3309 | init_timer_deferrable(&net->ipv4.rt_secret_timer); | ||
| 3310 | |||
| 3311 | if (ip_rt_secret_interval) { | ||
| 3312 | net->ipv4.rt_secret_timer.expires = | ||
| 3313 | jiffies + net_random() % ip_rt_secret_interval + | ||
| 3314 | ip_rt_secret_interval; | ||
| 3315 | add_timer(&net->ipv4.rt_secret_timer); | ||
| 3316 | } | ||
| 3317 | return 0; | 3288 | return 0; |
| 3318 | } | 3289 | } |
| 3319 | 3290 | ||
| 3320 | static __net_exit void rt_secret_timer_exit(struct net *net) | 3291 | static __net_initdata struct pernet_operations rt_genid_ops = { |
| 3321 | { | 3292 | .init = rt_genid_init, |
| 3322 | del_timer_sync(&net->ipv4.rt_secret_timer); | ||
| 3323 | } | ||
| 3324 | |||
| 3325 | static __net_initdata struct pernet_operations rt_secret_timer_ops = { | ||
| 3326 | .init = rt_secret_timer_init, | ||
| 3327 | .exit = rt_secret_timer_exit, | ||
| 3328 | }; | 3293 | }; |
| 3329 | 3294 | ||
| 3330 | 3295 | ||
| 3331 | #ifdef CONFIG_NET_CLS_ROUTE | 3296 | #ifdef CONFIG_NET_CLS_ROUTE |
| 3332 | struct ip_rt_acct *ip_rt_acct __read_mostly; | 3297 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; |
| 3333 | #endif /* CONFIG_NET_CLS_ROUTE */ | 3298 | #endif /* CONFIG_NET_CLS_ROUTE */ |
| 3334 | 3299 | ||
| 3335 | static __initdata unsigned long rhash_entries; | 3300 | static __initdata unsigned long rhash_entries; |
| @@ -3385,9 +3350,6 @@ int __init ip_rt_init(void) | |||
| 3385 | schedule_delayed_work(&expires_work, | 3350 | schedule_delayed_work(&expires_work, |
| 3386 | net_random() % ip_rt_gc_interval + ip_rt_gc_interval); | 3351 | net_random() % ip_rt_gc_interval + ip_rt_gc_interval); |
| 3387 | 3352 | ||
| 3388 | if (register_pernet_subsys(&rt_secret_timer_ops)) | ||
| 3389 | printk(KERN_ERR "Unable to setup rt_secret_timer\n"); | ||
| 3390 | |||
| 3391 | if (ip_rt_proc_init()) | 3353 | if (ip_rt_proc_init()) |
| 3392 | printk(KERN_ERR "Unable to create route proc files\n"); | 3354 | printk(KERN_ERR "Unable to create route proc files\n"); |
| 3393 | #ifdef CONFIG_XFRM | 3355 | #ifdef CONFIG_XFRM |
| @@ -3399,6 +3361,7 @@ int __init ip_rt_init(void) | |||
| 3399 | #ifdef CONFIG_SYSCTL | 3361 | #ifdef CONFIG_SYSCTL |
| 3400 | register_pernet_subsys(&sysctl_route_ops); | 3362 | register_pernet_subsys(&sysctl_route_ops); |
| 3401 | #endif | 3363 | #endif |
| 3364 | register_pernet_subsys(&rt_genid_ops); | ||
| 3402 | return rc; | 3365 | return rc; |
| 3403 | } | 3366 | } |
| 3404 | 3367 | ||
| @@ -3412,7 +3375,3 @@ void __init ip_static_sysctl_init(void) | |||
| 3412 | register_sysctl_paths(ipv4_path, ipv4_skeleton); | 3375 | register_sysctl_paths(ipv4_path, ipv4_skeleton); |
| 3413 | } | 3376 | } |
| 3414 | #endif | 3377 | #endif |
| 3415 | |||
| 3416 | EXPORT_SYMBOL(__ip_select_ident); | ||
| 3417 | EXPORT_SYMBOL(ip_route_input); | ||
| 3418 | EXPORT_SYMBOL(ip_route_output_key); | ||
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 66fd80ef2473..650cace2180d 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
| @@ -18,8 +18,8 @@ | |||
| 18 | #include <net/tcp.h> | 18 | #include <net/tcp.h> |
| 19 | #include <net/route.h> | 19 | #include <net/route.h> |
| 20 | 20 | ||
| 21 | /* Timestamps: lowest 9 bits store TCP options */ | 21 | /* Timestamps: lowest bits store TCP options */ |
| 22 | #define TSBITS 9 | 22 | #define TSBITS 6 |
| 23 | #define TSMASK (((__u32)1 << TSBITS) - 1) | 23 | #define TSMASK (((__u32)1 << TSBITS) - 1) |
| 24 | 24 | ||
| 25 | extern int sysctl_tcp_syncookies; | 25 | extern int sysctl_tcp_syncookies; |
| @@ -58,7 +58,7 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, | |||
| 58 | 58 | ||
| 59 | /* | 59 | /* |
| 60 | * when syncookies are in effect and tcp timestamps are enabled we encode | 60 | * when syncookies are in effect and tcp timestamps are enabled we encode |
| 61 | * tcp options in the lowest 9 bits of the timestamp value that will be | 61 | * tcp options in the lower bits of the timestamp value that will be |
| 62 | * sent in the syn-ack. | 62 | * sent in the syn-ack. |
| 63 | * Since subsequent timestamps use the normal tcp_time_stamp value, we | 63 | * Since subsequent timestamps use the normal tcp_time_stamp value, we |
| 64 | * must make sure that the resulting initial timestamp is <= tcp_time_stamp. | 64 | * must make sure that the resulting initial timestamp is <= tcp_time_stamp. |
| @@ -70,11 +70,10 @@ __u32 cookie_init_timestamp(struct request_sock *req) | |||
| 70 | u32 options = 0; | 70 | u32 options = 0; |
| 71 | 71 | ||
| 72 | ireq = inet_rsk(req); | 72 | ireq = inet_rsk(req); |
| 73 | if (ireq->wscale_ok) { | 73 | |
| 74 | options = ireq->snd_wscale; | 74 | options = ireq->wscale_ok ? ireq->snd_wscale : 0xf; |
| 75 | options |= ireq->rcv_wscale << 4; | 75 | options |= ireq->sack_ok << 4; |
| 76 | } | 76 | options |= ireq->ecn_ok << 5; |
| 77 | options |= ireq->sack_ok << 8; | ||
| 78 | 77 | ||
| 79 | ts = ts_now & ~TSMASK; | 78 | ts = ts_now & ~TSMASK; |
| 80 | ts |= options; | 79 | ts |= options; |
| @@ -138,23 +137,23 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr, | |||
| 138 | } | 137 | } |
| 139 | 138 | ||
| 140 | /* | 139 | /* |
| 141 | * This table has to be sorted and terminated with (__u16)-1. | 140 | * MSS Values are taken from the 2009 paper |
| 142 | * XXX generate a better table. | 141 | * 'Measuring TCP Maximum Segment Size' by S. Alcock and R. Nelson: |
| 143 | * Unresolved Issues: HIPPI with a 64k MSS is not well supported. | 142 | * - values 1440 to 1460 accounted for 80% of observed mss values |
| 143 | * - values outside the 536-1460 range are rare (<0.2%). | ||
| 144 | * | ||
| 145 | * Table must be sorted. | ||
| 144 | */ | 146 | */ |
| 145 | static __u16 const msstab[] = { | 147 | static __u16 const msstab[] = { |
| 146 | 64 - 1, | 148 | 64, |
| 147 | 256 - 1, | 149 | 512, |
| 148 | 512 - 1, | 150 | 536, |
| 149 | 536 - 1, | 151 | 1024, |
| 150 | 1024 - 1, | 152 | 1440, |
| 151 | 1440 - 1, | 153 | 1460, |
| 152 | 1460 - 1, | 154 | 4312, |
| 153 | 4312 - 1, | 155 | 8960, |
| 154 | (__u16)-1 | ||
| 155 | }; | 156 | }; |
| 156 | /* The number doesn't include the -1 terminator */ | ||
| 157 | #define NUM_MSS (ARRAY_SIZE(msstab) - 1) | ||
| 158 | 157 | ||
| 159 | /* | 158 | /* |
| 160 | * Generate a syncookie. mssp points to the mss, which is returned | 159 | * Generate a syncookie. mssp points to the mss, which is returned |
| @@ -169,10 +168,10 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) | |||
| 169 | 168 | ||
| 170 | tcp_synq_overflow(sk); | 169 | tcp_synq_overflow(sk); |
| 171 | 170 | ||
| 172 | /* XXX sort msstab[] by probability? Binary search? */ | 171 | for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--) |
| 173 | for (mssind = 0; mss > msstab[mssind + 1]; mssind++) | 172 | if (mss >= msstab[mssind]) |
| 174 | ; | 173 | break; |
| 175 | *mssp = msstab[mssind] + 1; | 174 | *mssp = msstab[mssind]; |
| 176 | 175 | ||
| 177 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); | 176 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); |
| 178 | 177 | ||
| @@ -202,7 +201,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) | |||
| 202 | jiffies / (HZ * 60), | 201 | jiffies / (HZ * 60), |
| 203 | COUNTER_TRIES); | 202 | COUNTER_TRIES); |
| 204 | 203 | ||
| 205 | return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; | 204 | return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; |
| 206 | } | 205 | } |
| 207 | 206 | ||
| 208 | static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, | 207 | static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, |
| @@ -227,26 +226,38 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, | |||
| 227 | * additional tcp options in the timestamp. | 226 | * additional tcp options in the timestamp. |
| 228 | * This extracts these options from the timestamp echo. | 227 | * This extracts these options from the timestamp echo. |
| 229 | * | 228 | * |
| 230 | * The lowest 4 bits are for snd_wscale | 229 | * The lowest 4 bits store snd_wscale. |
| 231 | * The next 4 lsb are for rcv_wscale | 230 | * next 2 bits indicate SACK and ECN support. |
| 232 | * The next lsb is for sack_ok | 231 | * |
| 232 | * return false if we decode an option that should not be. | ||
| 233 | */ | 233 | */ |
| 234 | void cookie_check_timestamp(struct tcp_options_received *tcp_opt) | 234 | bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok) |
| 235 | { | 235 | { |
| 236 | /* echoed timestamp, 9 lowest bits contain options */ | 236 | /* echoed timestamp, lowest bits contain options */ |
| 237 | u32 options = tcp_opt->rcv_tsecr & TSMASK; | 237 | u32 options = tcp_opt->rcv_tsecr & TSMASK; |
| 238 | 238 | ||
| 239 | tcp_opt->snd_wscale = options & 0xf; | 239 | if (!tcp_opt->saw_tstamp) { |
| 240 | options >>= 4; | 240 | tcp_clear_options(tcp_opt); |
| 241 | tcp_opt->rcv_wscale = options & 0xf; | 241 | return true; |
| 242 | } | ||
| 243 | |||
| 244 | if (!sysctl_tcp_timestamps) | ||
| 245 | return false; | ||
| 242 | 246 | ||
| 243 | tcp_opt->sack_ok = (options >> 4) & 0x1; | 247 | tcp_opt->sack_ok = (options >> 4) & 0x1; |
| 248 | *ecn_ok = (options >> 5) & 1; | ||
| 249 | if (*ecn_ok && !sysctl_tcp_ecn) | ||
| 250 | return false; | ||
| 251 | |||
| 252 | if (tcp_opt->sack_ok && !sysctl_tcp_sack) | ||
| 253 | return false; | ||
| 244 | 254 | ||
| 245 | if (tcp_opt->sack_ok) | 255 | if ((options & 0xf) == 0xf) |
| 246 | tcp_sack_reset(tcp_opt); | 256 | return true; /* no window scaling */ |
| 247 | 257 | ||
| 248 | if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale) | 258 | tcp_opt->wscale_ok = 1; |
| 249 | tcp_opt->wscale_ok = 1; | 259 | tcp_opt->snd_wscale = options & 0xf; |
| 260 | return sysctl_tcp_window_scaling != 0; | ||
| 250 | } | 261 | } |
| 251 | EXPORT_SYMBOL(cookie_check_timestamp); | 262 | EXPORT_SYMBOL(cookie_check_timestamp); |
| 252 | 263 | ||
| @@ -265,8 +276,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
| 265 | int mss; | 276 | int mss; |
| 266 | struct rtable *rt; | 277 | struct rtable *rt; |
| 267 | __u8 rcv_wscale; | 278 | __u8 rcv_wscale; |
| 279 | bool ecn_ok; | ||
| 268 | 280 | ||
| 269 | if (!sysctl_tcp_syncookies || !th->ack) | 281 | if (!sysctl_tcp_syncookies || !th->ack || th->rst) |
| 270 | goto out; | 282 | goto out; |
| 271 | 283 | ||
| 272 | if (tcp_synq_no_recent_overflow(sk) || | 284 | if (tcp_synq_no_recent_overflow(sk) || |
| @@ -281,8 +293,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
| 281 | memset(&tcp_opt, 0, sizeof(tcp_opt)); | 293 | memset(&tcp_opt, 0, sizeof(tcp_opt)); |
| 282 | tcp_parse_options(skb, &tcp_opt, &hash_location, 0); | 294 | tcp_parse_options(skb, &tcp_opt, &hash_location, 0); |
| 283 | 295 | ||
| 284 | if (tcp_opt.saw_tstamp) | 296 | if (!cookie_check_timestamp(&tcp_opt, &ecn_ok)) |
| 285 | cookie_check_timestamp(&tcp_opt); | 297 | goto out; |
| 286 | 298 | ||
| 287 | ret = NULL; | 299 | ret = NULL; |
| 288 | req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ | 300 | req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ |
| @@ -298,9 +310,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
| 298 | ireq->rmt_port = th->source; | 310 | ireq->rmt_port = th->source; |
| 299 | ireq->loc_addr = ip_hdr(skb)->daddr; | 311 | ireq->loc_addr = ip_hdr(skb)->daddr; |
| 300 | ireq->rmt_addr = ip_hdr(skb)->saddr; | 312 | ireq->rmt_addr = ip_hdr(skb)->saddr; |
| 301 | ireq->ecn_ok = 0; | 313 | ireq->ecn_ok = ecn_ok; |
| 302 | ireq->snd_wscale = tcp_opt.snd_wscale; | 314 | ireq->snd_wscale = tcp_opt.snd_wscale; |
| 303 | ireq->rcv_wscale = tcp_opt.rcv_wscale; | ||
| 304 | ireq->sack_ok = tcp_opt.sack_ok; | 315 | ireq->sack_ok = tcp_opt.sack_ok; |
| 305 | ireq->wscale_ok = tcp_opt.wscale_ok; | 316 | ireq->wscale_ok = tcp_opt.wscale_ok; |
| 306 | ireq->tstamp_ok = tcp_opt.saw_tstamp; | 317 | ireq->tstamp_ok = tcp_opt.saw_tstamp; |
| @@ -347,21 +358,22 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
| 347 | { .sport = th->dest, | 358 | { .sport = th->dest, |
| 348 | .dport = th->source } } }; | 359 | .dport = th->source } } }; |
| 349 | security_req_classify_flow(req, &fl); | 360 | security_req_classify_flow(req, &fl); |
| 350 | if (ip_route_output_key(&init_net, &rt, &fl)) { | 361 | if (ip_route_output_key(sock_net(sk), &rt, &fl)) { |
| 351 | reqsk_free(req); | 362 | reqsk_free(req); |
| 352 | goto out; | 363 | goto out; |
| 353 | } | 364 | } |
| 354 | } | 365 | } |
| 355 | 366 | ||
| 356 | /* Try to redo what tcp_v4_send_synack did. */ | 367 | /* Try to redo what tcp_v4_send_synack did. */ |
| 357 | req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW); | 368 | req->window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); |
| 358 | 369 | ||
| 359 | tcp_select_initial_window(tcp_full_space(sk), req->mss, | 370 | tcp_select_initial_window(tcp_full_space(sk), req->mss, |
| 360 | &req->rcv_wnd, &req->window_clamp, | 371 | &req->rcv_wnd, &req->window_clamp, |
| 361 | ireq->wscale_ok, &rcv_wscale); | 372 | ireq->wscale_ok, &rcv_wscale, |
| 373 | dst_metric(&rt->dst, RTAX_INITRWND)); | ||
| 362 | 374 | ||
| 363 | ireq->rcv_wscale = rcv_wscale; | 375 | ireq->rcv_wscale = rcv_wscale; |
| 364 | 376 | ||
| 365 | ret = get_cookie_sock(sk, skb, req, &rt->u.dst); | 377 | ret = get_cookie_sock(sk, skb, req, &rt->dst); |
| 366 | out: return ret; | 378 | out: return ret; |
| 367 | } | 379 | } |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 7e3712ce3994..d96c1da4b17c 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include <linux/inetdevice.h> | 12 | #include <linux/inetdevice.h> |
| 13 | #include <linux/seqlock.h> | 13 | #include <linux/seqlock.h> |
| 14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
| 15 | #include <linux/slab.h> | ||
| 15 | #include <net/snmp.h> | 16 | #include <net/snmp.h> |
| 16 | #include <net/icmp.h> | 17 | #include <net/icmp.h> |
| 17 | #include <net/ip.h> | 18 | #include <net/ip.h> |
| @@ -298,6 +299,13 @@ static struct ctl_table ipv4_table[] = { | |||
| 298 | .mode = 0644, | 299 | .mode = 0644, |
| 299 | .proc_handler = ipv4_local_port_range, | 300 | .proc_handler = ipv4_local_port_range, |
| 300 | }, | 301 | }, |
| 302 | { | ||
| 303 | .procname = "ip_local_reserved_ports", | ||
| 304 | .data = NULL, /* initialized in sysctl_ipv4_init */ | ||
| 305 | .maxlen = 65536, | ||
| 306 | .mode = 0644, | ||
| 307 | .proc_handler = proc_do_large_bitmap, | ||
| 308 | }, | ||
| 301 | #ifdef CONFIG_IP_MULTICAST | 309 | #ifdef CONFIG_IP_MULTICAST |
| 302 | { | 310 | { |
| 303 | .procname = "igmp_max_memberships", | 311 | .procname = "igmp_max_memberships", |
| @@ -576,6 +584,20 @@ static struct ctl_table ipv4_table[] = { | |||
| 576 | .proc_handler = proc_dointvec | 584 | .proc_handler = proc_dointvec |
| 577 | }, | 585 | }, |
| 578 | { | 586 | { |
| 587 | .procname = "tcp_thin_linear_timeouts", | ||
| 588 | .data = &sysctl_tcp_thin_linear_timeouts, | ||
| 589 | .maxlen = sizeof(int), | ||
| 590 | .mode = 0644, | ||
| 591 | .proc_handler = proc_dointvec | ||
| 592 | }, | ||
| 593 | { | ||
| 594 | .procname = "tcp_thin_dupack", | ||
| 595 | .data = &sysctl_tcp_thin_dupack, | ||
| 596 | .maxlen = sizeof(int), | ||
| 597 | .mode = 0644, | ||
| 598 | .proc_handler = proc_dointvec | ||
| 599 | }, | ||
| 600 | { | ||
| 579 | .procname = "udp_mem", | 601 | .procname = "udp_mem", |
| 580 | .data = &sysctl_udp_mem, | 602 | .data = &sysctl_udp_mem, |
| 581 | .maxlen = sizeof(sysctl_udp_mem), | 603 | .maxlen = sizeof(sysctl_udp_mem), |
| @@ -721,6 +743,16 @@ static __net_initdata struct pernet_operations ipv4_sysctl_ops = { | |||
| 721 | static __init int sysctl_ipv4_init(void) | 743 | static __init int sysctl_ipv4_init(void) |
| 722 | { | 744 | { |
| 723 | struct ctl_table_header *hdr; | 745 | struct ctl_table_header *hdr; |
| 746 | struct ctl_table *i; | ||
| 747 | |||
| 748 | for (i = ipv4_table; i->procname; i++) { | ||
| 749 | if (strcmp(i->procname, "ip_local_reserved_ports") == 0) { | ||
| 750 | i->data = sysctl_local_reserved_ports; | ||
| 751 | break; | ||
| 752 | } | ||
| 753 | } | ||
| 754 | if (!i->procname) | ||
| 755 | return -EINVAL; | ||
| 724 | 756 | ||
| 725 | hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table); | 757 | hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table); |
| 726 | if (hdr == NULL) | 758 | if (hdr == NULL) |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b0a26bb25e2e..f115ea68a4ef 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
| @@ -265,6 +265,7 @@ | |||
| 265 | #include <linux/err.h> | 265 | #include <linux/err.h> |
| 266 | #include <linux/crypto.h> | 266 | #include <linux/crypto.h> |
| 267 | #include <linux/time.h> | 267 | #include <linux/time.h> |
| 268 | #include <linux/slab.h> | ||
| 268 | 269 | ||
| 269 | #include <net/icmp.h> | 270 | #include <net/icmp.h> |
| 270 | #include <net/tcp.h> | 271 | #include <net/tcp.h> |
| @@ -314,7 +315,6 @@ struct tcp_splice_state { | |||
| 314 | * is strict, actions are advisory and have some latency. | 315 | * is strict, actions are advisory and have some latency. |
| 315 | */ | 316 | */ |
| 316 | int tcp_memory_pressure __read_mostly; | 317 | int tcp_memory_pressure __read_mostly; |
| 317 | |||
| 318 | EXPORT_SYMBOL(tcp_memory_pressure); | 318 | EXPORT_SYMBOL(tcp_memory_pressure); |
| 319 | 319 | ||
| 320 | void tcp_enter_memory_pressure(struct sock *sk) | 320 | void tcp_enter_memory_pressure(struct sock *sk) |
| @@ -324,7 +324,6 @@ void tcp_enter_memory_pressure(struct sock *sk) | |||
| 324 | tcp_memory_pressure = 1; | 324 | tcp_memory_pressure = 1; |
| 325 | } | 325 | } |
| 326 | } | 326 | } |
| 327 | |||
| 328 | EXPORT_SYMBOL(tcp_enter_memory_pressure); | 327 | EXPORT_SYMBOL(tcp_enter_memory_pressure); |
| 329 | 328 | ||
| 330 | /* Convert seconds to retransmits based on initial and max timeout */ | 329 | /* Convert seconds to retransmits based on initial and max timeout */ |
| @@ -377,7 +376,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
| 377 | struct sock *sk = sock->sk; | 376 | struct sock *sk = sock->sk; |
| 378 | struct tcp_sock *tp = tcp_sk(sk); | 377 | struct tcp_sock *tp = tcp_sk(sk); |
| 379 | 378 | ||
| 380 | sock_poll_wait(file, sk->sk_sleep, wait); | 379 | sock_poll_wait(file, sk_sleep(sk), wait); |
| 381 | if (sk->sk_state == TCP_LISTEN) | 380 | if (sk->sk_state == TCP_LISTEN) |
| 382 | return inet_csk_listen_poll(sk); | 381 | return inet_csk_listen_poll(sk); |
| 383 | 382 | ||
| @@ -387,8 +386,6 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
| 387 | */ | 386 | */ |
| 388 | 387 | ||
| 389 | mask = 0; | 388 | mask = 0; |
| 390 | if (sk->sk_err) | ||
| 391 | mask = POLLERR; | ||
| 392 | 389 | ||
| 393 | /* | 390 | /* |
| 394 | * POLLHUP is certainly not done right. But poll() doesn't | 391 | * POLLHUP is certainly not done right. But poll() doesn't |
| @@ -429,7 +426,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
| 429 | if (tp->urg_seq == tp->copied_seq && | 426 | if (tp->urg_seq == tp->copied_seq && |
| 430 | !sock_flag(sk, SOCK_URGINLINE) && | 427 | !sock_flag(sk, SOCK_URGINLINE) && |
| 431 | tp->urg_data) | 428 | tp->urg_data) |
| 432 | target--; | 429 | target++; |
| 433 | 430 | ||
| 434 | /* Potential race condition. If read of tp below will | 431 | /* Potential race condition. If read of tp below will |
| 435 | * escape above sk->sk_state, we can be illegally awaken | 432 | * escape above sk->sk_state, we can be illegally awaken |
| @@ -452,13 +449,20 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
| 452 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) | 449 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) |
| 453 | mask |= POLLOUT | POLLWRNORM; | 450 | mask |= POLLOUT | POLLWRNORM; |
| 454 | } | 451 | } |
| 455 | } | 452 | } else |
| 453 | mask |= POLLOUT | POLLWRNORM; | ||
| 456 | 454 | ||
| 457 | if (tp->urg_data & TCP_URG_VALID) | 455 | if (tp->urg_data & TCP_URG_VALID) |
| 458 | mask |= POLLPRI; | 456 | mask |= POLLPRI; |
| 459 | } | 457 | } |
| 458 | /* This barrier is coupled with smp_wmb() in tcp_reset() */ | ||
| 459 | smp_rmb(); | ||
| 460 | if (sk->sk_err) | ||
| 461 | mask |= POLLERR; | ||
| 462 | |||
| 460 | return mask; | 463 | return mask; |
| 461 | } | 464 | } |
| 465 | EXPORT_SYMBOL(tcp_poll); | ||
| 462 | 466 | ||
| 463 | int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) | 467 | int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) |
| 464 | { | 468 | { |
| @@ -507,10 +511,11 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
| 507 | 511 | ||
| 508 | return put_user(answ, (int __user *)arg); | 512 | return put_user(answ, (int __user *)arg); |
| 509 | } | 513 | } |
| 514 | EXPORT_SYMBOL(tcp_ioctl); | ||
| 510 | 515 | ||
| 511 | static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) | 516 | static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) |
| 512 | { | 517 | { |
| 513 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; | 518 | TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; |
| 514 | tp->pushed_seq = tp->write_seq; | 519 | tp->pushed_seq = tp->write_seq; |
| 515 | } | 520 | } |
| 516 | 521 | ||
| @@ -526,7 +531,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) | |||
| 526 | 531 | ||
| 527 | skb->csum = 0; | 532 | skb->csum = 0; |
| 528 | tcb->seq = tcb->end_seq = tp->write_seq; | 533 | tcb->seq = tcb->end_seq = tp->write_seq; |
| 529 | tcb->flags = TCPCB_FLAG_ACK; | 534 | tcb->flags = TCPHDR_ACK; |
| 530 | tcb->sacked = 0; | 535 | tcb->sacked = 0; |
| 531 | skb_header_release(skb); | 536 | skb_header_release(skb); |
| 532 | tcp_add_write_queue_tail(sk, skb); | 537 | tcp_add_write_queue_tail(sk, skb); |
| @@ -536,8 +541,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) | |||
| 536 | tp->nonagle &= ~TCP_NAGLE_PUSH; | 541 | tp->nonagle &= ~TCP_NAGLE_PUSH; |
| 537 | } | 542 | } |
| 538 | 543 | ||
| 539 | static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, | 544 | static inline void tcp_mark_urg(struct tcp_sock *tp, int flags) |
| 540 | struct sk_buff *skb) | ||
| 541 | { | 545 | { |
| 542 | if (flags & MSG_OOB) | 546 | if (flags & MSG_OOB) |
| 543 | tp->snd_up = tp->write_seq; | 547 | tp->snd_up = tp->write_seq; |
| @@ -546,13 +550,13 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, | |||
| 546 | static inline void tcp_push(struct sock *sk, int flags, int mss_now, | 550 | static inline void tcp_push(struct sock *sk, int flags, int mss_now, |
| 547 | int nonagle) | 551 | int nonagle) |
| 548 | { | 552 | { |
| 549 | struct tcp_sock *tp = tcp_sk(sk); | ||
| 550 | |||
| 551 | if (tcp_send_head(sk)) { | 553 | if (tcp_send_head(sk)) { |
| 552 | struct sk_buff *skb = tcp_write_queue_tail(sk); | 554 | struct tcp_sock *tp = tcp_sk(sk); |
| 555 | |||
| 553 | if (!(flags & MSG_MORE) || forced_push(tp)) | 556 | if (!(flags & MSG_MORE) || forced_push(tp)) |
| 554 | tcp_mark_push(tp, skb); | 557 | tcp_mark_push(tp, tcp_write_queue_tail(sk)); |
| 555 | tcp_mark_urg(tp, flags, skb); | 558 | |
| 559 | tcp_mark_urg(tp, flags); | ||
| 556 | __tcp_push_pending_frames(sk, mss_now, | 560 | __tcp_push_pending_frames(sk, mss_now, |
| 557 | (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); | 561 | (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); |
| 558 | } | 562 | } |
| @@ -608,6 +612,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, | |||
| 608 | ssize_t spliced; | 612 | ssize_t spliced; |
| 609 | int ret; | 613 | int ret; |
| 610 | 614 | ||
| 615 | sock_rps_record_flow(sk); | ||
| 611 | /* | 616 | /* |
| 612 | * We can't seek on a socket input | 617 | * We can't seek on a socket input |
| 613 | */ | 618 | */ |
| @@ -675,6 +680,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, | |||
| 675 | 680 | ||
| 676 | return ret; | 681 | return ret; |
| 677 | } | 682 | } |
| 683 | EXPORT_SYMBOL(tcp_splice_read); | ||
| 678 | 684 | ||
| 679 | struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) | 685 | struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) |
| 680 | { | 686 | { |
| @@ -815,7 +821,7 @@ new_segment: | |||
| 815 | skb_shinfo(skb)->gso_segs = 0; | 821 | skb_shinfo(skb)->gso_segs = 0; |
| 816 | 822 | ||
| 817 | if (!copied) | 823 | if (!copied) |
| 818 | TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; | 824 | TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; |
| 819 | 825 | ||
| 820 | copied += copy; | 826 | copied += copy; |
| 821 | poffset += copy; | 827 | poffset += copy; |
| @@ -856,15 +862,15 @@ out_err: | |||
| 856 | return sk_stream_error(sk, flags, err); | 862 | return sk_stream_error(sk, flags, err); |
| 857 | } | 863 | } |
| 858 | 864 | ||
| 859 | ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, | 865 | int tcp_sendpage(struct sock *sk, struct page *page, int offset, |
| 860 | size_t size, int flags) | 866 | size_t size, int flags) |
| 861 | { | 867 | { |
| 862 | ssize_t res; | 868 | ssize_t res; |
| 863 | struct sock *sk = sock->sk; | ||
| 864 | 869 | ||
| 865 | if (!(sk->sk_route_caps & NETIF_F_SG) || | 870 | if (!(sk->sk_route_caps & NETIF_F_SG) || |
| 866 | !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) | 871 | !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) |
| 867 | return sock_no_sendpage(sock, page, offset, size, flags); | 872 | return sock_no_sendpage(sk->sk_socket, page, offset, size, |
| 873 | flags); | ||
| 868 | 874 | ||
| 869 | lock_sock(sk); | 875 | lock_sock(sk); |
| 870 | TCP_CHECK_TIMER(sk); | 876 | TCP_CHECK_TIMER(sk); |
| @@ -873,16 +879,17 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, | |||
| 873 | release_sock(sk); | 879 | release_sock(sk); |
| 874 | return res; | 880 | return res; |
| 875 | } | 881 | } |
| 882 | EXPORT_SYMBOL(tcp_sendpage); | ||
| 876 | 883 | ||
| 877 | #define TCP_PAGE(sk) (sk->sk_sndmsg_page) | 884 | #define TCP_PAGE(sk) (sk->sk_sndmsg_page) |
| 878 | #define TCP_OFF(sk) (sk->sk_sndmsg_off) | 885 | #define TCP_OFF(sk) (sk->sk_sndmsg_off) |
| 879 | 886 | ||
| 880 | static inline int select_size(struct sock *sk) | 887 | static inline int select_size(struct sock *sk, int sg) |
| 881 | { | 888 | { |
| 882 | struct tcp_sock *tp = tcp_sk(sk); | 889 | struct tcp_sock *tp = tcp_sk(sk); |
| 883 | int tmp = tp->mss_cache; | 890 | int tmp = tp->mss_cache; |
| 884 | 891 | ||
| 885 | if (sk->sk_route_caps & NETIF_F_SG) { | 892 | if (sg) { |
| 886 | if (sk_can_gso(sk)) | 893 | if (sk_can_gso(sk)) |
| 887 | tmp = 0; | 894 | tmp = 0; |
| 888 | else { | 895 | else { |
| @@ -897,16 +904,15 @@ static inline int select_size(struct sock *sk) | |||
| 897 | return tmp; | 904 | return tmp; |
| 898 | } | 905 | } |
| 899 | 906 | ||
| 900 | int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | 907 | int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, |
| 901 | size_t size) | 908 | size_t size) |
| 902 | { | 909 | { |
| 903 | struct sock *sk = sock->sk; | ||
| 904 | struct iovec *iov; | 910 | struct iovec *iov; |
| 905 | struct tcp_sock *tp = tcp_sk(sk); | 911 | struct tcp_sock *tp = tcp_sk(sk); |
| 906 | struct sk_buff *skb; | 912 | struct sk_buff *skb; |
| 907 | int iovlen, flags; | 913 | int iovlen, flags; |
| 908 | int mss_now, size_goal; | 914 | int mss_now, size_goal; |
| 909 | int err, copied; | 915 | int sg, err, copied; |
| 910 | long timeo; | 916 | long timeo; |
| 911 | 917 | ||
| 912 | lock_sock(sk); | 918 | lock_sock(sk); |
| @@ -934,8 +940,10 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | |||
| 934 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) | 940 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) |
| 935 | goto out_err; | 941 | goto out_err; |
| 936 | 942 | ||
| 943 | sg = sk->sk_route_caps & NETIF_F_SG; | ||
| 944 | |||
| 937 | while (--iovlen >= 0) { | 945 | while (--iovlen >= 0) { |
| 938 | int seglen = iov->iov_len; | 946 | size_t seglen = iov->iov_len; |
| 939 | unsigned char __user *from = iov->iov_base; | 947 | unsigned char __user *from = iov->iov_base; |
| 940 | 948 | ||
| 941 | iov++; | 949 | iov++; |
| @@ -959,8 +967,9 @@ new_segment: | |||
| 959 | if (!sk_stream_memory_free(sk)) | 967 | if (!sk_stream_memory_free(sk)) |
| 960 | goto wait_for_sndbuf; | 968 | goto wait_for_sndbuf; |
| 961 | 969 | ||
| 962 | skb = sk_stream_alloc_skb(sk, select_size(sk), | 970 | skb = sk_stream_alloc_skb(sk, |
| 963 | sk->sk_allocation); | 971 | select_size(sk, sg), |
| 972 | sk->sk_allocation); | ||
| 964 | if (!skb) | 973 | if (!skb) |
| 965 | goto wait_for_memory; | 974 | goto wait_for_memory; |
| 966 | 975 | ||
| @@ -997,9 +1006,7 @@ new_segment: | |||
| 997 | /* We can extend the last page | 1006 | /* We can extend the last page |
| 998 | * fragment. */ | 1007 | * fragment. */ |
| 999 | merge = 1; | 1008 | merge = 1; |
| 1000 | } else if (i == MAX_SKB_FRAGS || | 1009 | } else if (i == MAX_SKB_FRAGS || !sg) { |
| 1001 | (!i && | ||
| 1002 | !(sk->sk_route_caps & NETIF_F_SG))) { | ||
| 1003 | /* Need to add new fragment and cannot | 1010 | /* Need to add new fragment and cannot |
| 1004 | * do this because interface is non-SG, | 1011 | * do this because interface is non-SG, |
| 1005 | * or because all the page slots are | 1012 | * or because all the page slots are |
| @@ -1060,7 +1067,7 @@ new_segment: | |||
| 1060 | } | 1067 | } |
| 1061 | 1068 | ||
| 1062 | if (!copied) | 1069 | if (!copied) |
| 1063 | TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; | 1070 | TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; |
| 1064 | 1071 | ||
| 1065 | tp->write_seq += copy; | 1072 | tp->write_seq += copy; |
| 1066 | TCP_SKB_CB(skb)->end_seq += copy; | 1073 | TCP_SKB_CB(skb)->end_seq += copy; |
| @@ -1120,6 +1127,7 @@ out_err: | |||
| 1120 | release_sock(sk); | 1127 | release_sock(sk); |
| 1121 | return err; | 1128 | return err; |
| 1122 | } | 1129 | } |
| 1130 | EXPORT_SYMBOL(tcp_sendmsg); | ||
| 1123 | 1131 | ||
| 1124 | /* | 1132 | /* |
| 1125 | * Handle reading urgent data. BSD has very simple semantics for | 1133 | * Handle reading urgent data. BSD has very simple semantics for |
| @@ -1254,6 +1262,39 @@ static void tcp_prequeue_process(struct sock *sk) | |||
| 1254 | tp->ucopy.memory = 0; | 1262 | tp->ucopy.memory = 0; |
| 1255 | } | 1263 | } |
| 1256 | 1264 | ||
| 1265 | #ifdef CONFIG_NET_DMA | ||
| 1266 | static void tcp_service_net_dma(struct sock *sk, bool wait) | ||
| 1267 | { | ||
| 1268 | dma_cookie_t done, used; | ||
| 1269 | dma_cookie_t last_issued; | ||
| 1270 | struct tcp_sock *tp = tcp_sk(sk); | ||
| 1271 | |||
| 1272 | if (!tp->ucopy.dma_chan) | ||
| 1273 | return; | ||
| 1274 | |||
| 1275 | last_issued = tp->ucopy.dma_cookie; | ||
| 1276 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | ||
| 1277 | |||
| 1278 | do { | ||
| 1279 | if (dma_async_memcpy_complete(tp->ucopy.dma_chan, | ||
| 1280 | last_issued, &done, | ||
| 1281 | &used) == DMA_SUCCESS) { | ||
| 1282 | /* Safe to free early-copied skbs now */ | ||
| 1283 | __skb_queue_purge(&sk->sk_async_wait_queue); | ||
| 1284 | break; | ||
| 1285 | } else { | ||
| 1286 | struct sk_buff *skb; | ||
| 1287 | while ((skb = skb_peek(&sk->sk_async_wait_queue)) && | ||
| 1288 | (dma_async_is_complete(skb->dma_cookie, done, | ||
| 1289 | used) == DMA_SUCCESS)) { | ||
| 1290 | __skb_dequeue(&sk->sk_async_wait_queue); | ||
| 1291 | kfree_skb(skb); | ||
| 1292 | } | ||
| 1293 | } | ||
| 1294 | } while (wait); | ||
| 1295 | } | ||
| 1296 | #endif | ||
| 1297 | |||
| 1257 | static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) | 1298 | static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) |
| 1258 | { | 1299 | { |
| 1259 | struct sk_buff *skb; | 1300 | struct sk_buff *skb; |
| @@ -1335,6 +1376,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | |||
| 1335 | sk_eat_skb(sk, skb, 0); | 1376 | sk_eat_skb(sk, skb, 0); |
| 1336 | if (!desc->count) | 1377 | if (!desc->count) |
| 1337 | break; | 1378 | break; |
| 1379 | tp->copied_seq = seq; | ||
| 1338 | } | 1380 | } |
| 1339 | tp->copied_seq = seq; | 1381 | tp->copied_seq = seq; |
| 1340 | 1382 | ||
| @@ -1345,6 +1387,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | |||
| 1345 | tcp_cleanup_rbuf(sk, copied); | 1387 | tcp_cleanup_rbuf(sk, copied); |
| 1346 | return copied; | 1388 | return copied; |
| 1347 | } | 1389 | } |
| 1390 | EXPORT_SYMBOL(tcp_read_sock); | ||
| 1348 | 1391 | ||
| 1349 | /* | 1392 | /* |
| 1350 | * This routine copies from a sock struct into the user buffer. | 1393 | * This routine copies from a sock struct into the user buffer. |
| @@ -1546,6 +1589,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 1546 | /* __ Set realtime policy in scheduler __ */ | 1589 | /* __ Set realtime policy in scheduler __ */ |
| 1547 | } | 1590 | } |
| 1548 | 1591 | ||
| 1592 | #ifdef CONFIG_NET_DMA | ||
| 1593 | if (tp->ucopy.dma_chan) | ||
| 1594 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | ||
| 1595 | #endif | ||
| 1549 | if (copied >= target) { | 1596 | if (copied >= target) { |
| 1550 | /* Do not sleep, just process backlog. */ | 1597 | /* Do not sleep, just process backlog. */ |
| 1551 | release_sock(sk); | 1598 | release_sock(sk); |
| @@ -1554,6 +1601,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 1554 | sk_wait_data(sk, &timeo); | 1601 | sk_wait_data(sk, &timeo); |
| 1555 | 1602 | ||
| 1556 | #ifdef CONFIG_NET_DMA | 1603 | #ifdef CONFIG_NET_DMA |
| 1604 | tcp_service_net_dma(sk, false); /* Don't block */ | ||
| 1557 | tp->ucopy.wakeup = 0; | 1605 | tp->ucopy.wakeup = 0; |
| 1558 | #endif | 1606 | #endif |
| 1559 | 1607 | ||
| @@ -1633,6 +1681,9 @@ do_prequeue: | |||
| 1633 | copied = -EFAULT; | 1681 | copied = -EFAULT; |
| 1634 | break; | 1682 | break; |
| 1635 | } | 1683 | } |
| 1684 | |||
| 1685 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | ||
| 1686 | |||
| 1636 | if ((offset + used) == skb->len) | 1687 | if ((offset + used) == skb->len) |
| 1637 | copied_early = 1; | 1688 | copied_early = 1; |
| 1638 | 1689 | ||
| @@ -1702,27 +1753,9 @@ skip_copy: | |||
| 1702 | } | 1753 | } |
| 1703 | 1754 | ||
| 1704 | #ifdef CONFIG_NET_DMA | 1755 | #ifdef CONFIG_NET_DMA |
| 1705 | if (tp->ucopy.dma_chan) { | 1756 | tcp_service_net_dma(sk, true); /* Wait for queue to drain */ |
| 1706 | dma_cookie_t done, used; | 1757 | tp->ucopy.dma_chan = NULL; |
| 1707 | |||
| 1708 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | ||
| 1709 | |||
| 1710 | while (dma_async_memcpy_complete(tp->ucopy.dma_chan, | ||
| 1711 | tp->ucopy.dma_cookie, &done, | ||
| 1712 | &used) == DMA_IN_PROGRESS) { | ||
| 1713 | /* do partial cleanup of sk_async_wait_queue */ | ||
| 1714 | while ((skb = skb_peek(&sk->sk_async_wait_queue)) && | ||
| 1715 | (dma_async_is_complete(skb->dma_cookie, done, | ||
| 1716 | used) == DMA_SUCCESS)) { | ||
| 1717 | __skb_dequeue(&sk->sk_async_wait_queue); | ||
| 1718 | kfree_skb(skb); | ||
| 1719 | } | ||
| 1720 | } | ||
| 1721 | 1758 | ||
| 1722 | /* Safe to free early-copied skbs now */ | ||
| 1723 | __skb_queue_purge(&sk->sk_async_wait_queue); | ||
| 1724 | tp->ucopy.dma_chan = NULL; | ||
| 1725 | } | ||
| 1726 | if (tp->ucopy.pinned_list) { | 1759 | if (tp->ucopy.pinned_list) { |
| 1727 | dma_unpin_iovec_pages(tp->ucopy.pinned_list); | 1760 | dma_unpin_iovec_pages(tp->ucopy.pinned_list); |
| 1728 | tp->ucopy.pinned_list = NULL; | 1761 | tp->ucopy.pinned_list = NULL; |
| @@ -1749,6 +1782,7 @@ recv_urg: | |||
| 1749 | err = tcp_recv_urg(sk, msg, len, flags); | 1782 | err = tcp_recv_urg(sk, msg, len, flags); |
| 1750 | goto out; | 1783 | goto out; |
| 1751 | } | 1784 | } |
| 1785 | EXPORT_SYMBOL(tcp_recvmsg); | ||
| 1752 | 1786 | ||
| 1753 | void tcp_set_state(struct sock *sk, int state) | 1787 | void tcp_set_state(struct sock *sk, int state) |
| 1754 | { | 1788 | { |
| @@ -1841,6 +1875,7 @@ void tcp_shutdown(struct sock *sk, int how) | |||
| 1841 | tcp_send_fin(sk); | 1875 | tcp_send_fin(sk); |
| 1842 | } | 1876 | } |
| 1843 | } | 1877 | } |
| 1878 | EXPORT_SYMBOL(tcp_shutdown); | ||
| 1844 | 1879 | ||
| 1845 | void tcp_close(struct sock *sk, long timeout) | 1880 | void tcp_close(struct sock *sk, long timeout) |
| 1846 | { | 1881 | { |
| @@ -1873,6 +1908,10 @@ void tcp_close(struct sock *sk, long timeout) | |||
| 1873 | 1908 | ||
| 1874 | sk_mem_reclaim(sk); | 1909 | sk_mem_reclaim(sk); |
| 1875 | 1910 | ||
| 1911 | /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */ | ||
| 1912 | if (sk->sk_state == TCP_CLOSE) | ||
| 1913 | goto adjudge_to_death; | ||
| 1914 | |||
| 1876 | /* As outlined in RFC 2525, section 2.17, we send a RST here because | 1915 | /* As outlined in RFC 2525, section 2.17, we send a RST here because |
| 1877 | * data was lost. To witness the awful effects of the old behavior of | 1916 | * data was lost. To witness the awful effects of the old behavior of |
| 1878 | * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk | 1917 | * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk |
| @@ -1976,11 +2015,8 @@ adjudge_to_death: | |||
| 1976 | } | 2015 | } |
| 1977 | } | 2016 | } |
| 1978 | if (sk->sk_state != TCP_CLOSE) { | 2017 | if (sk->sk_state != TCP_CLOSE) { |
| 1979 | int orphan_count = percpu_counter_read_positive( | ||
| 1980 | sk->sk_prot->orphan_count); | ||
| 1981 | |||
| 1982 | sk_mem_reclaim(sk); | 2018 | sk_mem_reclaim(sk); |
| 1983 | if (tcp_too_many_orphans(sk, orphan_count)) { | 2019 | if (tcp_too_many_orphans(sk, 0)) { |
| 1984 | if (net_ratelimit()) | 2020 | if (net_ratelimit()) |
| 1985 | printk(KERN_INFO "TCP: too many of orphaned " | 2021 | printk(KERN_INFO "TCP: too many of orphaned " |
| 1986 | "sockets\n"); | 2022 | "sockets\n"); |
| @@ -2000,6 +2036,7 @@ out: | |||
| 2000 | local_bh_enable(); | 2036 | local_bh_enable(); |
| 2001 | sock_put(sk); | 2037 | sock_put(sk); |
| 2002 | } | 2038 | } |
| 2039 | EXPORT_SYMBOL(tcp_close); | ||
| 2003 | 2040 | ||
| 2004 | /* These states need RST on ABORT according to RFC793 */ | 2041 | /* These states need RST on ABORT according to RFC793 */ |
| 2005 | 2042 | ||
| @@ -2073,6 +2110,7 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
| 2073 | sk->sk_error_report(sk); | 2110 | sk->sk_error_report(sk); |
| 2074 | return err; | 2111 | return err; |
| 2075 | } | 2112 | } |
| 2113 | EXPORT_SYMBOL(tcp_disconnect); | ||
| 2076 | 2114 | ||
| 2077 | /* | 2115 | /* |
| 2078 | * Socket option code for TCP. | 2116 | * Socket option code for TCP. |
| @@ -2150,6 +2188,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
| 2150 | GFP_KERNEL); | 2188 | GFP_KERNEL); |
| 2151 | if (cvp == NULL) | 2189 | if (cvp == NULL) |
| 2152 | return -ENOMEM; | 2190 | return -ENOMEM; |
| 2191 | |||
| 2192 | kref_init(&cvp->kref); | ||
| 2153 | } | 2193 | } |
| 2154 | lock_sock(sk); | 2194 | lock_sock(sk); |
| 2155 | tp->rx_opt.cookie_in_always = | 2195 | tp->rx_opt.cookie_in_always = |
| @@ -2164,12 +2204,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
| 2164 | */ | 2204 | */ |
| 2165 | kref_put(&tp->cookie_values->kref, | 2205 | kref_put(&tp->cookie_values->kref, |
| 2166 | tcp_cookie_values_release); | 2206 | tcp_cookie_values_release); |
| 2167 | kref_init(&cvp->kref); | ||
| 2168 | tp->cookie_values = cvp; | ||
| 2169 | } else { | 2207 | } else { |
| 2170 | cvp = tp->cookie_values; | 2208 | cvp = tp->cookie_values; |
| 2171 | } | 2209 | } |
| 2172 | } | 2210 | } |
| 2211 | |||
| 2173 | if (cvp != NULL) { | 2212 | if (cvp != NULL) { |
| 2174 | cvp->cookie_desired = ctd.tcpct_cookie_desired; | 2213 | cvp->cookie_desired = ctd.tcpct_cookie_desired; |
| 2175 | 2214 | ||
| @@ -2183,6 +2222,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
| 2183 | cvp->s_data_desired = ctd.tcpct_s_data_desired; | 2222 | cvp->s_data_desired = ctd.tcpct_s_data_desired; |
| 2184 | cvp->s_data_constant = 0; /* false */ | 2223 | cvp->s_data_constant = 0; /* false */ |
| 2185 | } | 2224 | } |
| 2225 | |||
| 2226 | tp->cookie_values = cvp; | ||
| 2186 | } | 2227 | } |
| 2187 | release_sock(sk); | 2228 | release_sock(sk); |
| 2188 | return err; | 2229 | return err; |
| @@ -2190,7 +2231,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
| 2190 | default: | 2231 | default: |
| 2191 | /* fallthru */ | 2232 | /* fallthru */ |
| 2192 | break; | 2233 | break; |
| 2193 | }; | 2234 | } |
| 2194 | 2235 | ||
| 2195 | if (optlen < sizeof(int)) | 2236 | if (optlen < sizeof(int)) |
| 2196 | return -EINVAL; | 2237 | return -EINVAL; |
| @@ -2229,6 +2270,20 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
| 2229 | } | 2270 | } |
| 2230 | break; | 2271 | break; |
| 2231 | 2272 | ||
| 2273 | case TCP_THIN_LINEAR_TIMEOUTS: | ||
| 2274 | if (val < 0 || val > 1) | ||
| 2275 | err = -EINVAL; | ||
| 2276 | else | ||
| 2277 | tp->thin_lto = val; | ||
| 2278 | break; | ||
| 2279 | |||
| 2280 | case TCP_THIN_DUPACK: | ||
| 2281 | if (val < 0 || val > 1) | ||
| 2282 | err = -EINVAL; | ||
| 2283 | else | ||
| 2284 | tp->thin_dupack = val; | ||
| 2285 | break; | ||
| 2286 | |||
| 2232 | case TCP_CORK: | 2287 | case TCP_CORK: |
| 2233 | /* When set indicates to always queue non-full frames. | 2288 | /* When set indicates to always queue non-full frames. |
| 2234 | * Later the user clears this option and we transmit | 2289 | * Later the user clears this option and we transmit |
| @@ -2259,7 +2314,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
| 2259 | if (sock_flag(sk, SOCK_KEEPOPEN) && | 2314 | if (sock_flag(sk, SOCK_KEEPOPEN) && |
| 2260 | !((1 << sk->sk_state) & | 2315 | !((1 << sk->sk_state) & |
| 2261 | (TCPF_CLOSE | TCPF_LISTEN))) { | 2316 | (TCPF_CLOSE | TCPF_LISTEN))) { |
| 2262 | __u32 elapsed = tcp_time_stamp - tp->rcv_tstamp; | 2317 | u32 elapsed = keepalive_time_elapsed(tp); |
| 2263 | if (tp->keepalive_time > elapsed) | 2318 | if (tp->keepalive_time > elapsed) |
| 2264 | elapsed = tp->keepalive_time - elapsed; | 2319 | elapsed = tp->keepalive_time - elapsed; |
| 2265 | else | 2320 | else |
| @@ -2357,6 +2412,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
| 2357 | optval, optlen); | 2412 | optval, optlen); |
| 2358 | return do_tcp_setsockopt(sk, level, optname, optval, optlen); | 2413 | return do_tcp_setsockopt(sk, level, optname, optval, optlen); |
| 2359 | } | 2414 | } |
| 2415 | EXPORT_SYMBOL(tcp_setsockopt); | ||
| 2360 | 2416 | ||
| 2361 | #ifdef CONFIG_COMPAT | 2417 | #ifdef CONFIG_COMPAT |
| 2362 | int compat_tcp_setsockopt(struct sock *sk, int level, int optname, | 2418 | int compat_tcp_setsockopt(struct sock *sk, int level, int optname, |
| @@ -2367,7 +2423,6 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname, | |||
| 2367 | optval, optlen); | 2423 | optval, optlen); |
| 2368 | return do_tcp_setsockopt(sk, level, optname, optval, optlen); | 2424 | return do_tcp_setsockopt(sk, level, optname, optval, optlen); |
| 2369 | } | 2425 | } |
| 2370 | |||
| 2371 | EXPORT_SYMBOL(compat_tcp_setsockopt); | 2426 | EXPORT_SYMBOL(compat_tcp_setsockopt); |
| 2372 | #endif | 2427 | #endif |
| 2373 | 2428 | ||
| @@ -2433,7 +2488,6 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) | |||
| 2433 | 2488 | ||
| 2434 | info->tcpi_total_retrans = tp->total_retrans; | 2489 | info->tcpi_total_retrans = tp->total_retrans; |
| 2435 | } | 2490 | } |
| 2436 | |||
| 2437 | EXPORT_SYMBOL_GPL(tcp_get_info); | 2491 | EXPORT_SYMBOL_GPL(tcp_get_info); |
| 2438 | 2492 | ||
| 2439 | static int do_tcp_getsockopt(struct sock *sk, int level, | 2493 | static int do_tcp_getsockopt(struct sock *sk, int level, |
| @@ -2551,6 +2605,12 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
| 2551 | return -EFAULT; | 2605 | return -EFAULT; |
| 2552 | return 0; | 2606 | return 0; |
| 2553 | } | 2607 | } |
| 2608 | case TCP_THIN_LINEAR_TIMEOUTS: | ||
| 2609 | val = tp->thin_lto; | ||
| 2610 | break; | ||
| 2611 | case TCP_THIN_DUPACK: | ||
| 2612 | val = tp->thin_dupack; | ||
| 2613 | break; | ||
| 2554 | default: | 2614 | default: |
| 2555 | return -ENOPROTOOPT; | 2615 | return -ENOPROTOOPT; |
| 2556 | } | 2616 | } |
| @@ -2572,6 +2632,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
| 2572 | optval, optlen); | 2632 | optval, optlen); |
| 2573 | return do_tcp_getsockopt(sk, level, optname, optval, optlen); | 2633 | return do_tcp_getsockopt(sk, level, optname, optval, optlen); |
| 2574 | } | 2634 | } |
| 2635 | EXPORT_SYMBOL(tcp_getsockopt); | ||
| 2575 | 2636 | ||
| 2576 | #ifdef CONFIG_COMPAT | 2637 | #ifdef CONFIG_COMPAT |
| 2577 | int compat_tcp_getsockopt(struct sock *sk, int level, int optname, | 2638 | int compat_tcp_getsockopt(struct sock *sk, int level, int optname, |
| @@ -2582,7 +2643,6 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, | |||
| 2582 | optval, optlen); | 2643 | optval, optlen); |
| 2583 | return do_tcp_getsockopt(sk, level, optname, optval, optlen); | 2644 | return do_tcp_getsockopt(sk, level, optname, optval, optlen); |
| 2584 | } | 2645 | } |
| 2585 | |||
| 2586 | EXPORT_SYMBOL(compat_tcp_getsockopt); | 2646 | EXPORT_SYMBOL(compat_tcp_getsockopt); |
| 2587 | #endif | 2647 | #endif |
| 2588 | 2648 | ||
| @@ -2682,7 +2742,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) | |||
| 2682 | struct tcphdr *th2; | 2742 | struct tcphdr *th2; |
| 2683 | unsigned int len; | 2743 | unsigned int len; |
| 2684 | unsigned int thlen; | 2744 | unsigned int thlen; |
| 2685 | unsigned int flags; | 2745 | __be32 flags; |
| 2686 | unsigned int mss = 1; | 2746 | unsigned int mss = 1; |
| 2687 | unsigned int hlen; | 2747 | unsigned int hlen; |
| 2688 | unsigned int off; | 2748 | unsigned int off; |
| @@ -2732,10 +2792,10 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) | |||
| 2732 | 2792 | ||
| 2733 | found: | 2793 | found: |
| 2734 | flush = NAPI_GRO_CB(p)->flush; | 2794 | flush = NAPI_GRO_CB(p)->flush; |
| 2735 | flush |= flags & TCP_FLAG_CWR; | 2795 | flush |= (__force int)(flags & TCP_FLAG_CWR); |
| 2736 | flush |= (flags ^ tcp_flag_word(th2)) & | 2796 | flush |= (__force int)((flags ^ tcp_flag_word(th2)) & |
| 2737 | ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH); | 2797 | ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); |
| 2738 | flush |= th->ack_seq ^ th2->ack_seq; | 2798 | flush |= (__force int)(th->ack_seq ^ th2->ack_seq); |
| 2739 | for (i = sizeof(*th); i < thlen; i += 4) | 2799 | for (i = sizeof(*th); i < thlen; i += 4) |
| 2740 | flush |= *(u32 *)((u8 *)th + i) ^ | 2800 | flush |= *(u32 *)((u8 *)th + i) ^ |
| 2741 | *(u32 *)((u8 *)th2 + i); | 2801 | *(u32 *)((u8 *)th2 + i); |
| @@ -2756,8 +2816,9 @@ found: | |||
| 2756 | 2816 | ||
| 2757 | out_check_final: | 2817 | out_check_final: |
| 2758 | flush = len < mss; | 2818 | flush = len < mss; |
| 2759 | flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | | 2819 | flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH | |
| 2760 | TCP_FLAG_SYN | TCP_FLAG_FIN); | 2820 | TCP_FLAG_RST | TCP_FLAG_SYN | |
| 2821 | TCP_FLAG_FIN)); | ||
| 2761 | 2822 | ||
| 2762 | if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) | 2823 | if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) |
| 2763 | pp = head; | 2824 | pp = head; |
| @@ -2788,10 +2849,10 @@ EXPORT_SYMBOL(tcp_gro_complete); | |||
| 2788 | 2849 | ||
| 2789 | #ifdef CONFIG_TCP_MD5SIG | 2850 | #ifdef CONFIG_TCP_MD5SIG |
| 2790 | static unsigned long tcp_md5sig_users; | 2851 | static unsigned long tcp_md5sig_users; |
| 2791 | static struct tcp_md5sig_pool **tcp_md5sig_pool; | 2852 | static struct tcp_md5sig_pool * __percpu *tcp_md5sig_pool; |
| 2792 | static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); | 2853 | static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); |
| 2793 | 2854 | ||
| 2794 | static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool) | 2855 | static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool) |
| 2795 | { | 2856 | { |
| 2796 | int cpu; | 2857 | int cpu; |
| 2797 | for_each_possible_cpu(cpu) { | 2858 | for_each_possible_cpu(cpu) { |
| @@ -2800,7 +2861,6 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool) | |||
| 2800 | if (p->md5_desc.tfm) | 2861 | if (p->md5_desc.tfm) |
| 2801 | crypto_free_hash(p->md5_desc.tfm); | 2862 | crypto_free_hash(p->md5_desc.tfm); |
| 2802 | kfree(p); | 2863 | kfree(p); |
| 2803 | p = NULL; | ||
| 2804 | } | 2864 | } |
| 2805 | } | 2865 | } |
| 2806 | free_percpu(pool); | 2866 | free_percpu(pool); |
| @@ -2808,7 +2868,7 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool) | |||
| 2808 | 2868 | ||
| 2809 | void tcp_free_md5sig_pool(void) | 2869 | void tcp_free_md5sig_pool(void) |
| 2810 | { | 2870 | { |
| 2811 | struct tcp_md5sig_pool **pool = NULL; | 2871 | struct tcp_md5sig_pool * __percpu *pool = NULL; |
| 2812 | 2872 | ||
| 2813 | spin_lock_bh(&tcp_md5sig_pool_lock); | 2873 | spin_lock_bh(&tcp_md5sig_pool_lock); |
| 2814 | if (--tcp_md5sig_users == 0) { | 2874 | if (--tcp_md5sig_users == 0) { |
| @@ -2819,13 +2879,13 @@ void tcp_free_md5sig_pool(void) | |||
| 2819 | if (pool) | 2879 | if (pool) |
| 2820 | __tcp_free_md5sig_pool(pool); | 2880 | __tcp_free_md5sig_pool(pool); |
| 2821 | } | 2881 | } |
| 2822 | |||
| 2823 | EXPORT_SYMBOL(tcp_free_md5sig_pool); | 2882 | EXPORT_SYMBOL(tcp_free_md5sig_pool); |
| 2824 | 2883 | ||
| 2825 | static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(struct sock *sk) | 2884 | static struct tcp_md5sig_pool * __percpu * |
| 2885 | __tcp_alloc_md5sig_pool(struct sock *sk) | ||
| 2826 | { | 2886 | { |
| 2827 | int cpu; | 2887 | int cpu; |
| 2828 | struct tcp_md5sig_pool **pool; | 2888 | struct tcp_md5sig_pool * __percpu *pool; |
| 2829 | 2889 | ||
| 2830 | pool = alloc_percpu(struct tcp_md5sig_pool *); | 2890 | pool = alloc_percpu(struct tcp_md5sig_pool *); |
| 2831 | if (!pool) | 2891 | if (!pool) |
| @@ -2852,9 +2912,9 @@ out_free: | |||
| 2852 | return NULL; | 2912 | return NULL; |
| 2853 | } | 2913 | } |
| 2854 | 2914 | ||
| 2855 | struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *sk) | 2915 | struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *sk) |
| 2856 | { | 2916 | { |
| 2857 | struct tcp_md5sig_pool **pool; | 2917 | struct tcp_md5sig_pool * __percpu *pool; |
| 2858 | int alloc = 0; | 2918 | int alloc = 0; |
| 2859 | 2919 | ||
| 2860 | retry: | 2920 | retry: |
| @@ -2873,7 +2933,9 @@ retry: | |||
| 2873 | 2933 | ||
| 2874 | if (alloc) { | 2934 | if (alloc) { |
| 2875 | /* we cannot hold spinlock here because this may sleep. */ | 2935 | /* we cannot hold spinlock here because this may sleep. */ |
| 2876 | struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(sk); | 2936 | struct tcp_md5sig_pool * __percpu *p; |
| 2937 | |||
| 2938 | p = __tcp_alloc_md5sig_pool(sk); | ||
| 2877 | spin_lock_bh(&tcp_md5sig_pool_lock); | 2939 | spin_lock_bh(&tcp_md5sig_pool_lock); |
| 2878 | if (!p) { | 2940 | if (!p) { |
| 2879 | tcp_md5sig_users--; | 2941 | tcp_md5sig_users--; |
| @@ -2892,28 +2954,42 @@ retry: | |||
| 2892 | } | 2954 | } |
| 2893 | return pool; | 2955 | return pool; |
| 2894 | } | 2956 | } |
| 2895 | |||
| 2896 | EXPORT_SYMBOL(tcp_alloc_md5sig_pool); | 2957 | EXPORT_SYMBOL(tcp_alloc_md5sig_pool); |
| 2897 | 2958 | ||
| 2898 | struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu) | 2959 | |
| 2960 | /** | ||
| 2961 | * tcp_get_md5sig_pool - get md5sig_pool for this user | ||
| 2962 | * | ||
| 2963 | * We use percpu structure, so if we succeed, we exit with preemption | ||
| 2964 | * and BH disabled, to make sure another thread or softirq handling | ||
| 2965 | * wont try to get same context. | ||
| 2966 | */ | ||
| 2967 | struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) | ||
| 2899 | { | 2968 | { |
| 2900 | struct tcp_md5sig_pool **p; | 2969 | struct tcp_md5sig_pool * __percpu *p; |
| 2901 | spin_lock_bh(&tcp_md5sig_pool_lock); | 2970 | |
| 2971 | local_bh_disable(); | ||
| 2972 | |||
| 2973 | spin_lock(&tcp_md5sig_pool_lock); | ||
| 2902 | p = tcp_md5sig_pool; | 2974 | p = tcp_md5sig_pool; |
| 2903 | if (p) | 2975 | if (p) |
| 2904 | tcp_md5sig_users++; | 2976 | tcp_md5sig_users++; |
| 2905 | spin_unlock_bh(&tcp_md5sig_pool_lock); | 2977 | spin_unlock(&tcp_md5sig_pool_lock); |
| 2906 | return (p ? *per_cpu_ptr(p, cpu) : NULL); | 2978 | |
| 2907 | } | 2979 | if (p) |
| 2980 | return *this_cpu_ptr(p); | ||
| 2908 | 2981 | ||
| 2909 | EXPORT_SYMBOL(__tcp_get_md5sig_pool); | 2982 | local_bh_enable(); |
| 2983 | return NULL; | ||
| 2984 | } | ||
| 2985 | EXPORT_SYMBOL(tcp_get_md5sig_pool); | ||
| 2910 | 2986 | ||
| 2911 | void __tcp_put_md5sig_pool(void) | 2987 | void tcp_put_md5sig_pool(void) |
| 2912 | { | 2988 | { |
| 2989 | local_bh_enable(); | ||
| 2913 | tcp_free_md5sig_pool(); | 2990 | tcp_free_md5sig_pool(); |
| 2914 | } | 2991 | } |
| 2915 | 2992 | EXPORT_SYMBOL(tcp_put_md5sig_pool); | |
| 2916 | EXPORT_SYMBOL(__tcp_put_md5sig_pool); | ||
| 2917 | 2993 | ||
| 2918 | int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, | 2994 | int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, |
| 2919 | struct tcphdr *th) | 2995 | struct tcphdr *th) |
| @@ -2929,7 +3005,6 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, | |||
| 2929 | th->check = old_checksum; | 3005 | th->check = old_checksum; |
| 2930 | return err; | 3006 | return err; |
| 2931 | } | 3007 | } |
| 2932 | |||
| 2933 | EXPORT_SYMBOL(tcp_md5_hash_header); | 3008 | EXPORT_SYMBOL(tcp_md5_hash_header); |
| 2934 | 3009 | ||
| 2935 | int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, | 3010 | int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, |
| @@ -2942,6 +3017,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, | |||
| 2942 | const unsigned head_data_len = skb_headlen(skb) > header_len ? | 3017 | const unsigned head_data_len = skb_headlen(skb) > header_len ? |
| 2943 | skb_headlen(skb) - header_len : 0; | 3018 | skb_headlen(skb) - header_len : 0; |
| 2944 | const struct skb_shared_info *shi = skb_shinfo(skb); | 3019 | const struct skb_shared_info *shi = skb_shinfo(skb); |
| 3020 | struct sk_buff *frag_iter; | ||
| 2945 | 3021 | ||
| 2946 | sg_init_table(&sg, 1); | 3022 | sg_init_table(&sg, 1); |
| 2947 | 3023 | ||
| @@ -2956,9 +3032,12 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, | |||
| 2956 | return 1; | 3032 | return 1; |
| 2957 | } | 3033 | } |
| 2958 | 3034 | ||
| 3035 | skb_walk_frags(skb, frag_iter) | ||
| 3036 | if (tcp_md5_hash_skb_data(hp, frag_iter, 0)) | ||
| 3037 | return 1; | ||
| 3038 | |||
| 2959 | return 0; | 3039 | return 0; |
| 2960 | } | 3040 | } |
| 2961 | |||
| 2962 | EXPORT_SYMBOL(tcp_md5_hash_skb_data); | 3041 | EXPORT_SYMBOL(tcp_md5_hash_skb_data); |
| 2963 | 3042 | ||
| 2964 | int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) | 3043 | int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) |
| @@ -2968,7 +3047,6 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) | |||
| 2968 | sg_init_one(&sg, key->key, key->keylen); | 3047 | sg_init_one(&sg, key->key, key->keylen); |
| 2969 | return crypto_hash_update(&hp->md5_desc, &sg, key->keylen); | 3048 | return crypto_hash_update(&hp->md5_desc, &sg, key->keylen); |
| 2970 | } | 3049 | } |
| 2971 | |||
| 2972 | EXPORT_SYMBOL(tcp_md5_hash_key); | 3050 | EXPORT_SYMBOL(tcp_md5_hash_key); |
| 2973 | 3051 | ||
| 2974 | #endif | 3052 | #endif |
| @@ -3135,7 +3213,7 @@ void __init tcp_init(void) | |||
| 3135 | { | 3213 | { |
| 3136 | struct sk_buff *skb = NULL; | 3214 | struct sk_buff *skb = NULL; |
| 3137 | unsigned long nr_pages, limit; | 3215 | unsigned long nr_pages, limit; |
| 3138 | int order, i, max_share; | 3216 | int i, max_share, cnt; |
| 3139 | unsigned long jiffy = jiffies; | 3217 | unsigned long jiffy = jiffies; |
| 3140 | 3218 | ||
| 3141 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); | 3219 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); |
| @@ -3184,22 +3262,12 @@ void __init tcp_init(void) | |||
| 3184 | INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); | 3262 | INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); |
| 3185 | } | 3263 | } |
| 3186 | 3264 | ||
| 3187 | /* Try to be a bit smarter and adjust defaults depending | 3265 | |
| 3188 | * on available memory. | 3266 | cnt = tcp_hashinfo.ehash_mask + 1; |
| 3189 | */ | 3267 | |
| 3190 | for (order = 0; ((1 << order) << PAGE_SHIFT) < | 3268 | tcp_death_row.sysctl_max_tw_buckets = cnt / 2; |
| 3191 | (tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket)); | 3269 | sysctl_tcp_max_orphans = cnt / 2; |
| 3192 | order++) | 3270 | sysctl_max_syn_backlog = max(128, cnt / 256); |
| 3193 | ; | ||
| 3194 | if (order >= 4) { | ||
| 3195 | tcp_death_row.sysctl_max_tw_buckets = 180000; | ||
| 3196 | sysctl_tcp_max_orphans = 4096 << (order - 4); | ||
| 3197 | sysctl_max_syn_backlog = 1024; | ||
| 3198 | } else if (order < 3) { | ||
| 3199 | tcp_death_row.sysctl_max_tw_buckets >>= (3 - order); | ||
| 3200 | sysctl_tcp_max_orphans >>= (3 - order); | ||
| 3201 | sysctl_max_syn_backlog = 128; | ||
| 3202 | } | ||
| 3203 | 3271 | ||
| 3204 | /* Set the pressure threshold to be a fraction of global memory that | 3272 | /* Set the pressure threshold to be a fraction of global memory that |
| 3205 | * is up to 1/2 at 256 MB, decreasing toward zero with the amount of | 3273 | * is up to 1/2 at 256 MB, decreasing toward zero with the amount of |
| @@ -3240,16 +3308,3 @@ void __init tcp_init(void) | |||
| 3240 | tcp_secret_retiring = &tcp_secret_two; | 3308 | tcp_secret_retiring = &tcp_secret_two; |
| 3241 | tcp_secret_secondary = &tcp_secret_two; | 3309 | tcp_secret_secondary = &tcp_secret_two; |
| 3242 | } | 3310 | } |
| 3243 | |||
| 3244 | EXPORT_SYMBOL(tcp_close); | ||
| 3245 | EXPORT_SYMBOL(tcp_disconnect); | ||
| 3246 | EXPORT_SYMBOL(tcp_getsockopt); | ||
| 3247 | EXPORT_SYMBOL(tcp_ioctl); | ||
| 3248 | EXPORT_SYMBOL(tcp_poll); | ||
| 3249 | EXPORT_SYMBOL(tcp_read_sock); | ||
| 3250 | EXPORT_SYMBOL(tcp_recvmsg); | ||
| 3251 | EXPORT_SYMBOL(tcp_sendmsg); | ||
| 3252 | EXPORT_SYMBOL(tcp_splice_read); | ||
| 3253 | EXPORT_SYMBOL(tcp_sendpage); | ||
| 3254 | EXPORT_SYMBOL(tcp_setsockopt); | ||
| 3255 | EXPORT_SYMBOL(tcp_shutdown); | ||
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 6428b342b164..850c737e08e2 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
| 11 | #include <linux/types.h> | 11 | #include <linux/types.h> |
| 12 | #include <linux/list.h> | 12 | #include <linux/list.h> |
| 13 | #include <linux/gfp.h> | ||
| 13 | #include <net/tcp.h> | 14 | #include <net/tcp.h> |
| 14 | 15 | ||
| 15 | int sysctl_tcp_max_ssthresh = 0; | 16 | int sysctl_tcp_max_ssthresh = 0; |
| @@ -195,10 +196,10 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen) | |||
| 195 | int tcp_set_allowed_congestion_control(char *val) | 196 | int tcp_set_allowed_congestion_control(char *val) |
| 196 | { | 197 | { |
| 197 | struct tcp_congestion_ops *ca; | 198 | struct tcp_congestion_ops *ca; |
| 198 | char *clone, *name; | 199 | char *saved_clone, *clone, *name; |
| 199 | int ret = 0; | 200 | int ret = 0; |
| 200 | 201 | ||
| 201 | clone = kstrdup(val, GFP_USER); | 202 | saved_clone = clone = kstrdup(val, GFP_USER); |
| 202 | if (!clone) | 203 | if (!clone) |
| 203 | return -ENOMEM; | 204 | return -ENOMEM; |
| 204 | 205 | ||
| @@ -225,6 +226,7 @@ int tcp_set_allowed_congestion_control(char *val) | |||
| 225 | } | 226 | } |
| 226 | out: | 227 | out: |
| 227 | spin_unlock(&tcp_cong_list_lock); | 228 | spin_unlock(&tcp_cong_list_lock); |
| 229 | kfree(saved_clone); | ||
| 228 | 230 | ||
| 229 | return ret; | 231 | return ret; |
| 230 | } | 232 | } |
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index c209e054a634..377bc9349371 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c | |||
| @@ -126,8 +126,8 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
| 126 | * calculate 2^fract in a <<7 value. | 126 | * calculate 2^fract in a <<7 value. |
| 127 | */ | 127 | */ |
| 128 | is_slowstart = 1; | 128 | is_slowstart = 1; |
| 129 | increment = ((1 << ca->rho) * hybla_fraction(rho_fractions)) | 129 | increment = ((1 << min(ca->rho, 16U)) * |
| 130 | - 128; | 130 | hybla_fraction(rho_fractions)) - 128; |
| 131 | } else { | 131 | } else { |
| 132 | /* | 132 | /* |
| 133 | * congestion avoidance | 133 | * congestion avoidance |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 28e029632493..b55f60f6fcbe 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
| @@ -62,6 +62,7 @@ | |||
| 62 | */ | 62 | */ |
| 63 | 63 | ||
| 64 | #include <linux/mm.h> | 64 | #include <linux/mm.h> |
| 65 | #include <linux/slab.h> | ||
| 65 | #include <linux/module.h> | 66 | #include <linux/module.h> |
| 66 | #include <linux/sysctl.h> | 67 | #include <linux/sysctl.h> |
| 67 | #include <linux/kernel.h> | 68 | #include <linux/kernel.h> |
| @@ -77,10 +78,13 @@ int sysctl_tcp_window_scaling __read_mostly = 1; | |||
| 77 | int sysctl_tcp_sack __read_mostly = 1; | 78 | int sysctl_tcp_sack __read_mostly = 1; |
| 78 | int sysctl_tcp_fack __read_mostly = 1; | 79 | int sysctl_tcp_fack __read_mostly = 1; |
| 79 | int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; | 80 | int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; |
| 81 | EXPORT_SYMBOL(sysctl_tcp_reordering); | ||
| 80 | int sysctl_tcp_ecn __read_mostly = 2; | 82 | int sysctl_tcp_ecn __read_mostly = 2; |
| 83 | EXPORT_SYMBOL(sysctl_tcp_ecn); | ||
| 81 | int sysctl_tcp_dsack __read_mostly = 1; | 84 | int sysctl_tcp_dsack __read_mostly = 1; |
| 82 | int sysctl_tcp_app_win __read_mostly = 31; | 85 | int sysctl_tcp_app_win __read_mostly = 31; |
| 83 | int sysctl_tcp_adv_win_scale __read_mostly = 2; | 86 | int sysctl_tcp_adv_win_scale __read_mostly = 2; |
| 87 | EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); | ||
| 84 | 88 | ||
| 85 | int sysctl_tcp_stdurg __read_mostly; | 89 | int sysctl_tcp_stdurg __read_mostly; |
| 86 | int sysctl_tcp_rfc1337 __read_mostly; | 90 | int sysctl_tcp_rfc1337 __read_mostly; |
| @@ -89,6 +93,8 @@ int sysctl_tcp_frto __read_mostly = 2; | |||
| 89 | int sysctl_tcp_frto_response __read_mostly; | 93 | int sysctl_tcp_frto_response __read_mostly; |
| 90 | int sysctl_tcp_nometrics_save __read_mostly; | 94 | int sysctl_tcp_nometrics_save __read_mostly; |
| 91 | 95 | ||
| 96 | int sysctl_tcp_thin_dupack __read_mostly; | ||
| 97 | |||
| 92 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; | 98 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; |
| 93 | int sysctl_tcp_abc __read_mostly; | 99 | int sysctl_tcp_abc __read_mostly; |
| 94 | 100 | ||
| @@ -416,6 +422,7 @@ void tcp_initialize_rcv_mss(struct sock *sk) | |||
| 416 | 422 | ||
| 417 | inet_csk(sk)->icsk_ack.rcv_mss = hint; | 423 | inet_csk(sk)->icsk_ack.rcv_mss = hint; |
| 418 | } | 424 | } |
| 425 | EXPORT_SYMBOL(tcp_initialize_rcv_mss); | ||
| 419 | 426 | ||
| 420 | /* Receiver "autotuning" code. | 427 | /* Receiver "autotuning" code. |
| 421 | * | 428 | * |
| @@ -2447,6 +2454,16 @@ static int tcp_time_to_recover(struct sock *sk) | |||
| 2447 | return 1; | 2454 | return 1; |
| 2448 | } | 2455 | } |
| 2449 | 2456 | ||
| 2457 | /* If a thin stream is detected, retransmit after first | ||
| 2458 | * received dupack. Employ only if SACK is supported in order | ||
| 2459 | * to avoid possible corner-case series of spurious retransmissions | ||
| 2460 | * Use only if there are no unsent data. | ||
| 2461 | */ | ||
| 2462 | if ((tp->thin_dupack || sysctl_tcp_thin_dupack) && | ||
| 2463 | tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 && | ||
| 2464 | tcp_is_sack(tp) && !tcp_send_head(sk)) | ||
| 2465 | return 1; | ||
| 2466 | |||
| 2450 | return 0; | 2467 | return 0; |
| 2451 | } | 2468 | } |
| 2452 | 2469 | ||
| @@ -2499,6 +2516,9 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) | |||
| 2499 | int err; | 2516 | int err; |
| 2500 | unsigned int mss; | 2517 | unsigned int mss; |
| 2501 | 2518 | ||
| 2519 | if (packets == 0) | ||
| 2520 | return; | ||
| 2521 | |||
| 2502 | WARN_ON(packets > tp->packets_out); | 2522 | WARN_ON(packets > tp->packets_out); |
| 2503 | if (tp->lost_skb_hint) { | 2523 | if (tp->lost_skb_hint) { |
| 2504 | skb = tp->lost_skb_hint; | 2524 | skb = tp->lost_skb_hint; |
| @@ -2525,7 +2545,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) | |||
| 2525 | cnt += tcp_skb_pcount(skb); | 2545 | cnt += tcp_skb_pcount(skb); |
| 2526 | 2546 | ||
| 2527 | if (cnt > packets) { | 2547 | if (cnt > packets) { |
| 2528 | if (tcp_is_sack(tp) || (oldcnt >= packets)) | 2548 | if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || |
| 2549 | (oldcnt >= packets)) | ||
| 2529 | break; | 2550 | break; |
| 2530 | 2551 | ||
| 2531 | mss = skb_shinfo(skb)->gso_size; | 2552 | mss = skb_shinfo(skb)->gso_size; |
| @@ -2623,7 +2644,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) | |||
| 2623 | if (sk->sk_family == AF_INET) { | 2644 | if (sk->sk_family == AF_INET) { |
| 2624 | printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", | 2645 | printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", |
| 2625 | msg, | 2646 | msg, |
| 2626 | &inet->daddr, ntohs(inet->dport), | 2647 | &inet->inet_daddr, ntohs(inet->inet_dport), |
| 2627 | tp->snd_cwnd, tcp_left_out(tp), | 2648 | tp->snd_cwnd, tcp_left_out(tp), |
| 2628 | tp->snd_ssthresh, tp->prior_ssthresh, | 2649 | tp->snd_ssthresh, tp->prior_ssthresh, |
| 2629 | tp->packets_out); | 2650 | tp->packets_out); |
| @@ -2633,7 +2654,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) | |||
| 2633 | struct ipv6_pinfo *np = inet6_sk(sk); | 2654 | struct ipv6_pinfo *np = inet6_sk(sk); |
| 2634 | printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", | 2655 | printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", |
| 2635 | msg, | 2656 | msg, |
| 2636 | &np->daddr, ntohs(inet->dport), | 2657 | &np->daddr, ntohs(inet->inet_dport), |
| 2637 | tp->snd_cwnd, tcp_left_out(tp), | 2658 | tp->snd_cwnd, tcp_left_out(tp), |
| 2638 | tp->snd_ssthresh, tp->prior_ssthresh, | 2659 | tp->snd_ssthresh, tp->prior_ssthresh, |
| 2639 | tp->packets_out); | 2660 | tp->packets_out); |
| @@ -2922,6 +2943,7 @@ void tcp_simple_retransmit(struct sock *sk) | |||
| 2922 | } | 2943 | } |
| 2923 | tcp_xmit_retransmit_queue(sk); | 2944 | tcp_xmit_retransmit_queue(sk); |
| 2924 | } | 2945 | } |
| 2946 | EXPORT_SYMBOL(tcp_simple_retransmit); | ||
| 2925 | 2947 | ||
| 2926 | /* Process an event, which can update packets-in-flight not trivially. | 2948 | /* Process an event, which can update packets-in-flight not trivially. |
| 2927 | * Main goal of this function is to calculate new estimate for left_out, | 2949 | * Main goal of this function is to calculate new estimate for left_out, |
| @@ -3270,7 +3292,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
| 3270 | * connection startup slow start one packet too | 3292 | * connection startup slow start one packet too |
| 3271 | * quickly. This is severely frowned upon behavior. | 3293 | * quickly. This is severely frowned upon behavior. |
| 3272 | */ | 3294 | */ |
| 3273 | if (!(scb->flags & TCPCB_FLAG_SYN)) { | 3295 | if (!(scb->flags & TCPHDR_SYN)) { |
| 3274 | flag |= FLAG_DATA_ACKED; | 3296 | flag |= FLAG_DATA_ACKED; |
| 3275 | } else { | 3297 | } else { |
| 3276 | flag |= FLAG_SYN_ACKED; | 3298 | flag |= FLAG_SYN_ACKED; |
| @@ -3694,7 +3716,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
| 3694 | } | 3716 | } |
| 3695 | 3717 | ||
| 3696 | if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) | 3718 | if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) |
| 3697 | dst_confirm(sk->sk_dst_cache); | 3719 | dst_confirm(__sk_dst_get(sk)); |
| 3698 | 3720 | ||
| 3699 | return 1; | 3721 | return 1; |
| 3700 | 3722 | ||
| @@ -3829,18 +3851,20 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, | |||
| 3829 | /* 16-bit multiple */ | 3851 | /* 16-bit multiple */ |
| 3830 | opt_rx->cookie_plus = opsize; | 3852 | opt_rx->cookie_plus = opsize; |
| 3831 | *hvpp = ptr; | 3853 | *hvpp = ptr; |
| 3854 | break; | ||
| 3832 | default: | 3855 | default: |
| 3833 | /* ignore option */ | 3856 | /* ignore option */ |
| 3834 | break; | 3857 | break; |
| 3835 | }; | 3858 | } |
| 3836 | break; | 3859 | break; |
| 3837 | }; | 3860 | } |
| 3838 | 3861 | ||
| 3839 | ptr += opsize-2; | 3862 | ptr += opsize-2; |
| 3840 | length -= opsize; | 3863 | length -= opsize; |
| 3841 | } | 3864 | } |
| 3842 | } | 3865 | } |
| 3843 | } | 3866 | } |
| 3867 | EXPORT_SYMBOL(tcp_parse_options); | ||
| 3844 | 3868 | ||
| 3845 | static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) | 3869 | static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) |
| 3846 | { | 3870 | { |
| @@ -3907,13 +3931,14 @@ u8 *tcp_parse_md5sig_option(struct tcphdr *th) | |||
| 3907 | if (opsize < 2 || opsize > length) | 3931 | if (opsize < 2 || opsize > length) |
| 3908 | return NULL; | 3932 | return NULL; |
| 3909 | if (opcode == TCPOPT_MD5SIG) | 3933 | if (opcode == TCPOPT_MD5SIG) |
| 3910 | return ptr; | 3934 | return opsize == TCPOLEN_MD5SIG ? ptr : NULL; |
| 3911 | } | 3935 | } |
| 3912 | ptr += opsize - 2; | 3936 | ptr += opsize - 2; |
| 3913 | length -= opsize; | 3937 | length -= opsize; |
| 3914 | } | 3938 | } |
| 3915 | return NULL; | 3939 | return NULL; |
| 3916 | } | 3940 | } |
| 3941 | EXPORT_SYMBOL(tcp_parse_md5sig_option); | ||
| 3917 | #endif | 3942 | #endif |
| 3918 | 3943 | ||
| 3919 | static inline void tcp_store_ts_recent(struct tcp_sock *tp) | 3944 | static inline void tcp_store_ts_recent(struct tcp_sock *tp) |
| @@ -4024,6 +4049,8 @@ static void tcp_reset(struct sock *sk) | |||
| 4024 | default: | 4049 | default: |
| 4025 | sk->sk_err = ECONNRESET; | 4050 | sk->sk_err = ECONNRESET; |
| 4026 | } | 4051 | } |
| 4052 | /* This barrier is coupled with smp_rmb() in tcp_poll() */ | ||
| 4053 | smp_wmb(); | ||
| 4027 | 4054 | ||
| 4028 | if (!sock_flag(sk, SOCK_DEAD)) | 4055 | if (!sock_flag(sk, SOCK_DEAD)) |
| 4029 | sk->sk_error_report(sk); | 4056 | sk->sk_error_report(sk); |
| @@ -4303,7 +4330,7 @@ static void tcp_ofo_queue(struct sock *sk) | |||
| 4303 | } | 4330 | } |
| 4304 | 4331 | ||
| 4305 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { | 4332 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { |
| 4306 | SOCK_DEBUG(sk, "ofo packet was already received \n"); | 4333 | SOCK_DEBUG(sk, "ofo packet was already received\n"); |
| 4307 | __skb_unlink(skb, &tp->out_of_order_queue); | 4334 | __skb_unlink(skb, &tp->out_of_order_queue); |
| 4308 | __kfree_skb(skb); | 4335 | __kfree_skb(skb); |
| 4309 | continue; | 4336 | continue; |
| @@ -4351,6 +4378,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | |||
| 4351 | if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) | 4378 | if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) |
| 4352 | goto drop; | 4379 | goto drop; |
| 4353 | 4380 | ||
| 4381 | skb_dst_drop(skb); | ||
| 4354 | __skb_pull(skb, th->doff * 4); | 4382 | __skb_pull(skb, th->doff * 4); |
| 4355 | 4383 | ||
| 4356 | TCP_ECN_accept_cwr(tp, skb); | 4384 | TCP_ECN_accept_cwr(tp, skb); |
| @@ -5414,6 +5442,7 @@ discard: | |||
| 5414 | __kfree_skb(skb); | 5442 | __kfree_skb(skb); |
| 5415 | return 0; | 5443 | return 0; |
| 5416 | } | 5444 | } |
| 5445 | EXPORT_SYMBOL(tcp_rcv_established); | ||
| 5417 | 5446 | ||
| 5418 | static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | 5447 | static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, |
| 5419 | struct tcphdr *th, unsigned len) | 5448 | struct tcphdr *th, unsigned len) |
| @@ -5783,11 +5812,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
| 5783 | 5812 | ||
| 5784 | /* tcp_ack considers this ACK as duplicate | 5813 | /* tcp_ack considers this ACK as duplicate |
| 5785 | * and does not calculate rtt. | 5814 | * and does not calculate rtt. |
| 5786 | * Fix it at least with timestamps. | 5815 | * Force it here. |
| 5787 | */ | 5816 | */ |
| 5788 | if (tp->rx_opt.saw_tstamp && | 5817 | tcp_ack_update_rtt(sk, 0, 0); |
| 5789 | tp->rx_opt.rcv_tsecr && !tp->srtt) | ||
| 5790 | tcp_ack_saw_tstamp(sk, 0); | ||
| 5791 | 5818 | ||
| 5792 | if (tp->rx_opt.tstamp_ok) | 5819 | if (tp->rx_opt.tstamp_ok) |
| 5793 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; | 5820 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; |
| @@ -5819,7 +5846,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
| 5819 | if (tp->snd_una == tp->write_seq) { | 5846 | if (tp->snd_una == tp->write_seq) { |
| 5820 | tcp_set_state(sk, TCP_FIN_WAIT2); | 5847 | tcp_set_state(sk, TCP_FIN_WAIT2); |
| 5821 | sk->sk_shutdown |= SEND_SHUTDOWN; | 5848 | sk->sk_shutdown |= SEND_SHUTDOWN; |
| 5822 | dst_confirm(sk->sk_dst_cache); | 5849 | dst_confirm(__sk_dst_get(sk)); |
| 5823 | 5850 | ||
| 5824 | if (!sock_flag(sk, SOCK_DEAD)) | 5851 | if (!sock_flag(sk, SOCK_DEAD)) |
| 5825 | /* Wake up lingering close() */ | 5852 | /* Wake up lingering close() */ |
| @@ -5915,14 +5942,4 @@ discard: | |||
| 5915 | } | 5942 | } |
| 5916 | return 0; | 5943 | return 0; |
| 5917 | } | 5944 | } |
| 5918 | |||
| 5919 | EXPORT_SYMBOL(sysctl_tcp_ecn); | ||
| 5920 | EXPORT_SYMBOL(sysctl_tcp_reordering); | ||
| 5921 | EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); | ||
| 5922 | EXPORT_SYMBOL(tcp_parse_options); | ||
| 5923 | #ifdef CONFIG_TCP_MD5SIG | ||
| 5924 | EXPORT_SYMBOL(tcp_parse_md5sig_option); | ||
| 5925 | #endif | ||
| 5926 | EXPORT_SYMBOL(tcp_rcv_established); | ||
| 5927 | EXPORT_SYMBOL(tcp_rcv_state_process); | 5945 | EXPORT_SYMBOL(tcp_rcv_state_process); |
| 5928 | EXPORT_SYMBOL(tcp_initialize_rcv_mss); | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 65b8ebfd078a..020766292bb0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
| @@ -60,6 +60,7 @@ | |||
| 60 | #include <linux/jhash.h> | 60 | #include <linux/jhash.h> |
| 61 | #include <linux/init.h> | 61 | #include <linux/init.h> |
| 62 | #include <linux/times.h> | 62 | #include <linux/times.h> |
| 63 | #include <linux/slab.h> | ||
| 63 | 64 | ||
| 64 | #include <net/net_namespace.h> | 65 | #include <net/net_namespace.h> |
| 65 | #include <net/icmp.h> | 66 | #include <net/icmp.h> |
| @@ -83,6 +84,7 @@ | |||
| 83 | 84 | ||
| 84 | int sysctl_tcp_tw_reuse __read_mostly; | 85 | int sysctl_tcp_tw_reuse __read_mostly; |
| 85 | int sysctl_tcp_low_latency __read_mostly; | 86 | int sysctl_tcp_low_latency __read_mostly; |
| 87 | EXPORT_SYMBOL(sysctl_tcp_low_latency); | ||
| 86 | 88 | ||
| 87 | 89 | ||
| 88 | #ifdef CONFIG_TCP_MD5SIG | 90 | #ifdef CONFIG_TCP_MD5SIG |
| @@ -99,6 +101,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) | |||
| 99 | #endif | 101 | #endif |
| 100 | 102 | ||
| 101 | struct inet_hashinfo tcp_hashinfo; | 103 | struct inet_hashinfo tcp_hashinfo; |
| 104 | EXPORT_SYMBOL(tcp_hashinfo); | ||
| 102 | 105 | ||
| 103 | static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) | 106 | static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) |
| 104 | { | 107 | { |
| @@ -138,7 +141,6 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) | |||
| 138 | 141 | ||
| 139 | return 0; | 142 | return 0; |
| 140 | } | 143 | } |
| 141 | |||
| 142 | EXPORT_SYMBOL_GPL(tcp_twsk_unique); | 144 | EXPORT_SYMBOL_GPL(tcp_twsk_unique); |
| 143 | 145 | ||
| 144 | /* This will initiate an outgoing connection. */ | 146 | /* This will initiate an outgoing connection. */ |
| @@ -203,10 +205,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
| 203 | * TIME-WAIT * and initialize rx_opt.ts_recent from it, | 205 | * TIME-WAIT * and initialize rx_opt.ts_recent from it, |
| 204 | * when trying new connection. | 206 | * when trying new connection. |
| 205 | */ | 207 | */ |
| 206 | if (peer != NULL && | 208 | if (peer) { |
| 207 | (u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { | 209 | inet_peer_refcheck(peer); |
| 208 | tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; | 210 | if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { |
| 209 | tp->rx_opt.ts_recent = peer->tcp_ts; | 211 | tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; |
| 212 | tp->rx_opt.ts_recent = peer->tcp_ts; | ||
| 213 | } | ||
| 210 | } | 214 | } |
| 211 | } | 215 | } |
| 212 | 216 | ||
| @@ -236,7 +240,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
| 236 | 240 | ||
| 237 | /* OK, now commit destination to socket. */ | 241 | /* OK, now commit destination to socket. */ |
| 238 | sk->sk_gso_type = SKB_GSO_TCPV4; | 242 | sk->sk_gso_type = SKB_GSO_TCPV4; |
| 239 | sk_setup_caps(sk, &rt->u.dst); | 243 | sk_setup_caps(sk, &rt->dst); |
| 240 | 244 | ||
| 241 | if (!tp->write_seq) | 245 | if (!tp->write_seq) |
| 242 | tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, | 246 | tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, |
| @@ -264,6 +268,7 @@ failure: | |||
| 264 | inet->inet_dport = 0; | 268 | inet->inet_dport = 0; |
| 265 | return err; | 269 | return err; |
| 266 | } | 270 | } |
| 271 | EXPORT_SYMBOL(tcp_v4_connect); | ||
| 267 | 272 | ||
| 268 | /* | 273 | /* |
| 269 | * This routine does path mtu discovery as defined in RFC1191. | 274 | * This routine does path mtu discovery as defined in RFC1191. |
| @@ -370,6 +375,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
| 370 | if (sk->sk_state == TCP_CLOSE) | 375 | if (sk->sk_state == TCP_CLOSE) |
| 371 | goto out; | 376 | goto out; |
| 372 | 377 | ||
| 378 | if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { | ||
| 379 | NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); | ||
| 380 | goto out; | ||
| 381 | } | ||
| 382 | |||
| 373 | icsk = inet_csk(sk); | 383 | icsk = inet_csk(sk); |
| 374 | tp = tcp_sk(sk); | 384 | tp = tcp_sk(sk); |
| 375 | seq = ntohl(th->seq); | 385 | seq = ntohl(th->seq); |
| @@ -513,26 +523,32 @@ out: | |||
| 513 | sock_put(sk); | 523 | sock_put(sk); |
| 514 | } | 524 | } |
| 515 | 525 | ||
| 516 | /* This routine computes an IPv4 TCP checksum. */ | 526 | static void __tcp_v4_send_check(struct sk_buff *skb, |
| 517 | void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) | 527 | __be32 saddr, __be32 daddr) |
| 518 | { | 528 | { |
| 519 | struct inet_sock *inet = inet_sk(sk); | ||
| 520 | struct tcphdr *th = tcp_hdr(skb); | 529 | struct tcphdr *th = tcp_hdr(skb); |
| 521 | 530 | ||
| 522 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | 531 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
| 523 | th->check = ~tcp_v4_check(len, inet->inet_saddr, | 532 | th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0); |
| 524 | inet->inet_daddr, 0); | ||
| 525 | skb->csum_start = skb_transport_header(skb) - skb->head; | 533 | skb->csum_start = skb_transport_header(skb) - skb->head; |
| 526 | skb->csum_offset = offsetof(struct tcphdr, check); | 534 | skb->csum_offset = offsetof(struct tcphdr, check); |
| 527 | } else { | 535 | } else { |
| 528 | th->check = tcp_v4_check(len, inet->inet_saddr, | 536 | th->check = tcp_v4_check(skb->len, saddr, daddr, |
| 529 | inet->inet_daddr, | ||
| 530 | csum_partial(th, | 537 | csum_partial(th, |
| 531 | th->doff << 2, | 538 | th->doff << 2, |
| 532 | skb->csum)); | 539 | skb->csum)); |
| 533 | } | 540 | } |
| 534 | } | 541 | } |
| 535 | 542 | ||
| 543 | /* This routine computes an IPv4 TCP checksum. */ | ||
| 544 | void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) | ||
| 545 | { | ||
| 546 | struct inet_sock *inet = inet_sk(sk); | ||
| 547 | |||
| 548 | __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); | ||
| 549 | } | ||
| 550 | EXPORT_SYMBOL(tcp_v4_send_check); | ||
| 551 | |||
| 536 | int tcp_v4_gso_send_check(struct sk_buff *skb) | 552 | int tcp_v4_gso_send_check(struct sk_buff *skb) |
| 537 | { | 553 | { |
| 538 | const struct iphdr *iph; | 554 | const struct iphdr *iph; |
| @@ -545,10 +561,8 @@ int tcp_v4_gso_send_check(struct sk_buff *skb) | |||
| 545 | th = tcp_hdr(skb); | 561 | th = tcp_hdr(skb); |
| 546 | 562 | ||
| 547 | th->check = 0; | 563 | th->check = 0; |
| 548 | th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0); | ||
| 549 | skb->csum_start = skb_transport_header(skb) - skb->head; | ||
| 550 | skb->csum_offset = offsetof(struct tcphdr, check); | ||
| 551 | skb->ip_summed = CHECKSUM_PARTIAL; | 564 | skb->ip_summed = CHECKSUM_PARTIAL; |
| 565 | __tcp_v4_send_check(skb, iph->saddr, iph->daddr); | ||
| 552 | return 0; | 566 | return 0; |
| 553 | } | 567 | } |
| 554 | 568 | ||
| @@ -742,9 +756,9 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, | |||
| 742 | * This still operates on a request_sock only, not on a big | 756 | * This still operates on a request_sock only, not on a big |
| 743 | * socket. | 757 | * socket. |
| 744 | */ | 758 | */ |
| 745 | static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | 759 | static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, |
| 746 | struct request_sock *req, | 760 | struct request_sock *req, |
| 747 | struct request_values *rvp) | 761 | struct request_values *rvp) |
| 748 | { | 762 | { |
| 749 | const struct inet_request_sock *ireq = inet_rsk(req); | 763 | const struct inet_request_sock *ireq = inet_rsk(req); |
| 750 | int err = -1; | 764 | int err = -1; |
| @@ -757,13 +771,7 @@ static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | |||
| 757 | skb = tcp_make_synack(sk, dst, req, rvp); | 771 | skb = tcp_make_synack(sk, dst, req, rvp); |
| 758 | 772 | ||
| 759 | if (skb) { | 773 | if (skb) { |
| 760 | struct tcphdr *th = tcp_hdr(skb); | 774 | __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); |
| 761 | |||
| 762 | th->check = tcp_v4_check(skb->len, | ||
| 763 | ireq->loc_addr, | ||
| 764 | ireq->rmt_addr, | ||
| 765 | csum_partial(th, skb->len, | ||
| 766 | skb->csum)); | ||
| 767 | 775 | ||
| 768 | err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, | 776 | err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, |
| 769 | ireq->rmt_addr, | 777 | ireq->rmt_addr, |
| @@ -775,10 +783,11 @@ static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | |||
| 775 | return err; | 783 | return err; |
| 776 | } | 784 | } |
| 777 | 785 | ||
| 778 | static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, | 786 | static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, |
| 779 | struct request_values *rvp) | 787 | struct request_values *rvp) |
| 780 | { | 788 | { |
| 781 | return __tcp_v4_send_synack(sk, NULL, req, rvp); | 789 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); |
| 790 | return tcp_v4_send_synack(sk, NULL, req, rvp); | ||
| 782 | } | 791 | } |
| 783 | 792 | ||
| 784 | /* | 793 | /* |
| @@ -789,19 +798,20 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) | |||
| 789 | kfree(inet_rsk(req)->opt); | 798 | kfree(inet_rsk(req)->opt); |
| 790 | } | 799 | } |
| 791 | 800 | ||
| 792 | #ifdef CONFIG_SYN_COOKIES | 801 | static void syn_flood_warning(const struct sk_buff *skb) |
| 793 | static void syn_flood_warning(struct sk_buff *skb) | ||
| 794 | { | 802 | { |
| 795 | static unsigned long warntime; | 803 | const char *msg; |
| 796 | 804 | ||
| 797 | if (time_after(jiffies, (warntime + HZ * 60))) { | 805 | #ifdef CONFIG_SYN_COOKIES |
| 798 | warntime = jiffies; | 806 | if (sysctl_tcp_syncookies) |
| 799 | printk(KERN_INFO | 807 | msg = "Sending cookies"; |
| 800 | "possible SYN flooding on port %d. Sending cookies.\n", | 808 | else |
| 801 | ntohs(tcp_hdr(skb)->dest)); | ||
| 802 | } | ||
| 803 | } | ||
| 804 | #endif | 809 | #endif |
| 810 | msg = "Dropping request"; | ||
| 811 | |||
| 812 | pr_info("TCP: Possible SYN flooding on port %d. %s.\n", | ||
| 813 | ntohs(tcp_hdr(skb)->dest), msg); | ||
| 814 | } | ||
| 805 | 815 | ||
| 806 | /* | 816 | /* |
| 807 | * Save and compile IPv4 options into the request_sock if needed. | 817 | * Save and compile IPv4 options into the request_sock if needed. |
| @@ -853,7 +863,6 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, | |||
| 853 | { | 863 | { |
| 854 | return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr); | 864 | return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr); |
| 855 | } | 865 | } |
| 856 | |||
| 857 | EXPORT_SYMBOL(tcp_v4_md5_lookup); | 866 | EXPORT_SYMBOL(tcp_v4_md5_lookup); |
| 858 | 867 | ||
| 859 | static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, | 868 | static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, |
| @@ -887,7 +896,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, | |||
| 887 | kfree(newkey); | 896 | kfree(newkey); |
| 888 | return -ENOMEM; | 897 | return -ENOMEM; |
| 889 | } | 898 | } |
| 890 | sk->sk_route_caps &= ~NETIF_F_GSO_MASK; | 899 | sk_nocaps_add(sk, NETIF_F_GSO_MASK); |
| 891 | } | 900 | } |
| 892 | if (tcp_alloc_md5sig_pool(sk) == NULL) { | 901 | if (tcp_alloc_md5sig_pool(sk) == NULL) { |
| 893 | kfree(newkey); | 902 | kfree(newkey); |
| @@ -920,7 +929,6 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, | |||
| 920 | } | 929 | } |
| 921 | return 0; | 930 | return 0; |
| 922 | } | 931 | } |
| 923 | |||
| 924 | EXPORT_SYMBOL(tcp_v4_md5_do_add); | 932 | EXPORT_SYMBOL(tcp_v4_md5_do_add); |
| 925 | 933 | ||
| 926 | static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, | 934 | static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, |
| @@ -958,7 +966,6 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) | |||
| 958 | } | 966 | } |
| 959 | return -ENOENT; | 967 | return -ENOENT; |
| 960 | } | 968 | } |
| 961 | |||
| 962 | EXPORT_SYMBOL(tcp_v4_md5_do_del); | 969 | EXPORT_SYMBOL(tcp_v4_md5_do_del); |
| 963 | 970 | ||
| 964 | static void tcp_v4_clear_md5_list(struct sock *sk) | 971 | static void tcp_v4_clear_md5_list(struct sock *sk) |
| @@ -1017,7 +1024,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, | |||
| 1017 | return -EINVAL; | 1024 | return -EINVAL; |
| 1018 | 1025 | ||
| 1019 | tp->md5sig_info = p; | 1026 | tp->md5sig_info = p; |
| 1020 | sk->sk_route_caps &= ~NETIF_F_GSO_MASK; | 1027 | sk_nocaps_add(sk, NETIF_F_GSO_MASK); |
| 1021 | } | 1028 | } |
| 1022 | 1029 | ||
| 1023 | newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation); | 1030 | newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation); |
| @@ -1131,7 +1138,6 @@ clear_hash_noput: | |||
| 1131 | memset(md5_hash, 0, 16); | 1138 | memset(md5_hash, 0, 16); |
| 1132 | return 1; | 1139 | return 1; |
| 1133 | } | 1140 | } |
| 1134 | |||
| 1135 | EXPORT_SYMBOL(tcp_v4_md5_hash_skb); | 1141 | EXPORT_SYMBOL(tcp_v4_md5_hash_skb); |
| 1136 | 1142 | ||
| 1137 | static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) | 1143 | static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) |
| @@ -1192,10 +1198,11 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) | |||
| 1192 | struct request_sock_ops tcp_request_sock_ops __read_mostly = { | 1198 | struct request_sock_ops tcp_request_sock_ops __read_mostly = { |
| 1193 | .family = PF_INET, | 1199 | .family = PF_INET, |
| 1194 | .obj_size = sizeof(struct tcp_request_sock), | 1200 | .obj_size = sizeof(struct tcp_request_sock), |
| 1195 | .rtx_syn_ack = tcp_v4_send_synack, | 1201 | .rtx_syn_ack = tcp_v4_rtx_synack, |
| 1196 | .send_ack = tcp_v4_reqsk_send_ack, | 1202 | .send_ack = tcp_v4_reqsk_send_ack, |
| 1197 | .destructor = tcp_v4_reqsk_destructor, | 1203 | .destructor = tcp_v4_reqsk_destructor, |
| 1198 | .send_reset = tcp_v4_send_reset, | 1204 | .send_reset = tcp_v4_send_reset, |
| 1205 | .syn_ack_timeout = tcp_syn_ack_timeout, | ||
| 1199 | }; | 1206 | }; |
| 1200 | 1207 | ||
| 1201 | #ifdef CONFIG_TCP_MD5SIG | 1208 | #ifdef CONFIG_TCP_MD5SIG |
| @@ -1238,6 +1245,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
| 1238 | * evidently real one. | 1245 | * evidently real one. |
| 1239 | */ | 1246 | */ |
| 1240 | if (inet_csk_reqsk_queue_is_full(sk) && !isn) { | 1247 | if (inet_csk_reqsk_queue_is_full(sk) && !isn) { |
| 1248 | if (net_ratelimit()) | ||
| 1249 | syn_flood_warning(skb); | ||
| 1241 | #ifdef CONFIG_SYN_COOKIES | 1250 | #ifdef CONFIG_SYN_COOKIES |
| 1242 | if (sysctl_tcp_syncookies) { | 1251 | if (sysctl_tcp_syncookies) { |
| 1243 | want_cookie = 1; | 1252 | want_cookie = 1; |
| @@ -1281,8 +1290,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
| 1281 | goto drop_and_release; | 1290 | goto drop_and_release; |
| 1282 | 1291 | ||
| 1283 | /* Secret recipe starts with IP addresses */ | 1292 | /* Secret recipe starts with IP addresses */ |
| 1284 | *mess++ ^= daddr; | 1293 | *mess++ ^= (__force u32)daddr; |
| 1285 | *mess++ ^= saddr; | 1294 | *mess++ ^= (__force u32)saddr; |
| 1286 | 1295 | ||
| 1287 | /* plus variable length Initiator Cookie */ | 1296 | /* plus variable length Initiator Cookie */ |
| 1288 | c = (u8 *)mess; | 1297 | c = (u8 *)mess; |
| @@ -1318,15 +1327,12 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
| 1318 | if (security_inet_conn_request(sk, skb, req)) | 1327 | if (security_inet_conn_request(sk, skb, req)) |
| 1319 | goto drop_and_free; | 1328 | goto drop_and_free; |
| 1320 | 1329 | ||
| 1321 | if (!want_cookie) | 1330 | if (!want_cookie || tmp_opt.tstamp_ok) |
| 1322 | TCP_ECN_create_request(req, tcp_hdr(skb)); | 1331 | TCP_ECN_create_request(req, tcp_hdr(skb)); |
| 1323 | 1332 | ||
| 1324 | if (want_cookie) { | 1333 | if (want_cookie) { |
| 1325 | #ifdef CONFIG_SYN_COOKIES | ||
| 1326 | syn_flood_warning(skb); | ||
| 1327 | req->cookie_ts = tmp_opt.tstamp_ok; | ||
| 1328 | #endif | ||
| 1329 | isn = cookie_v4_init_sequence(sk, skb, &req->mss); | 1334 | isn = cookie_v4_init_sequence(sk, skb, &req->mss); |
| 1335 | req->cookie_ts = tmp_opt.tstamp_ok; | ||
| 1330 | } else if (!isn) { | 1336 | } else if (!isn) { |
| 1331 | struct inet_peer *peer = NULL; | 1337 | struct inet_peer *peer = NULL; |
| 1332 | 1338 | ||
| @@ -1344,6 +1350,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
| 1344 | (dst = inet_csk_route_req(sk, req)) != NULL && | 1350 | (dst = inet_csk_route_req(sk, req)) != NULL && |
| 1345 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && | 1351 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && |
| 1346 | peer->v4daddr == saddr) { | 1352 | peer->v4daddr == saddr) { |
| 1353 | inet_peer_refcheck(peer); | ||
| 1347 | if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && | 1354 | if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && |
| 1348 | (s32)(peer->tcp_ts - req->ts_recent) > | 1355 | (s32)(peer->tcp_ts - req->ts_recent) > |
| 1349 | TCP_PAWS_WINDOW) { | 1356 | TCP_PAWS_WINDOW) { |
| @@ -1373,8 +1380,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
| 1373 | } | 1380 | } |
| 1374 | tcp_rsk(req)->snt_isn = isn; | 1381 | tcp_rsk(req)->snt_isn = isn; |
| 1375 | 1382 | ||
| 1376 | if (__tcp_v4_send_synack(sk, dst, req, | 1383 | if (tcp_v4_send_synack(sk, dst, req, |
| 1377 | (struct request_values *)&tmp_ext) || | 1384 | (struct request_values *)&tmp_ext) || |
| 1378 | want_cookie) | 1385 | want_cookie) |
| 1379 | goto drop_and_free; | 1386 | goto drop_and_free; |
| 1380 | 1387 | ||
| @@ -1388,6 +1395,7 @@ drop_and_free: | |||
| 1388 | drop: | 1395 | drop: |
| 1389 | return 0; | 1396 | return 0; |
| 1390 | } | 1397 | } |
| 1398 | EXPORT_SYMBOL(tcp_v4_conn_request); | ||
| 1391 | 1399 | ||
| 1392 | 1400 | ||
| 1393 | /* | 1401 | /* |
| @@ -1457,7 +1465,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
| 1457 | if (newkey != NULL) | 1465 | if (newkey != NULL) |
| 1458 | tcp_v4_md5_do_add(newsk, newinet->inet_daddr, | 1466 | tcp_v4_md5_do_add(newsk, newinet->inet_daddr, |
| 1459 | newkey, key->keylen); | 1467 | newkey, key->keylen); |
| 1460 | newsk->sk_route_caps &= ~NETIF_F_GSO_MASK; | 1468 | sk_nocaps_add(newsk, NETIF_F_GSO_MASK); |
| 1461 | } | 1469 | } |
| 1462 | #endif | 1470 | #endif |
| 1463 | 1471 | ||
| @@ -1473,6 +1481,7 @@ exit: | |||
| 1473 | dst_release(dst); | 1481 | dst_release(dst); |
| 1474 | return NULL; | 1482 | return NULL; |
| 1475 | } | 1483 | } |
| 1484 | EXPORT_SYMBOL(tcp_v4_syn_recv_sock); | ||
| 1476 | 1485 | ||
| 1477 | static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | 1486 | static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) |
| 1478 | { | 1487 | { |
| @@ -1499,7 +1508,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
| 1499 | } | 1508 | } |
| 1500 | 1509 | ||
| 1501 | #ifdef CONFIG_SYN_COOKIES | 1510 | #ifdef CONFIG_SYN_COOKIES |
| 1502 | if (!th->rst && !th->syn && th->ack) | 1511 | if (!th->syn) |
| 1503 | sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); | 1512 | sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); |
| 1504 | #endif | 1513 | #endif |
| 1505 | return sk; | 1514 | return sk; |
| @@ -1550,6 +1559,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) | |||
| 1550 | #endif | 1559 | #endif |
| 1551 | 1560 | ||
| 1552 | if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ | 1561 | if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ |
| 1562 | sock_rps_save_rxhash(sk, skb->rxhash); | ||
| 1553 | TCP_CHECK_TIMER(sk); | 1563 | TCP_CHECK_TIMER(sk); |
| 1554 | if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { | 1564 | if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { |
| 1555 | rsk = sk; | 1565 | rsk = sk; |
| @@ -1574,7 +1584,9 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) | |||
| 1574 | } | 1584 | } |
| 1575 | return 0; | 1585 | return 0; |
| 1576 | } | 1586 | } |
| 1577 | } | 1587 | } else |
| 1588 | sock_rps_save_rxhash(sk, skb->rxhash); | ||
| 1589 | |||
| 1578 | 1590 | ||
| 1579 | TCP_CHECK_TIMER(sk); | 1591 | TCP_CHECK_TIMER(sk); |
| 1580 | if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { | 1592 | if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { |
| @@ -1599,6 +1611,7 @@ csum_err: | |||
| 1599 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); | 1611 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); |
| 1600 | goto discard; | 1612 | goto discard; |
| 1601 | } | 1613 | } |
| 1614 | EXPORT_SYMBOL(tcp_v4_do_rcv); | ||
| 1602 | 1615 | ||
| 1603 | /* | 1616 | /* |
| 1604 | * From tcp_input.c | 1617 | * From tcp_input.c |
| @@ -1653,6 +1666,11 @@ process: | |||
| 1653 | if (sk->sk_state == TCP_TIME_WAIT) | 1666 | if (sk->sk_state == TCP_TIME_WAIT) |
| 1654 | goto do_time_wait; | 1667 | goto do_time_wait; |
| 1655 | 1668 | ||
| 1669 | if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { | ||
| 1670 | NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); | ||
| 1671 | goto discard_and_relse; | ||
| 1672 | } | ||
| 1673 | |||
| 1656 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) | 1674 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) |
| 1657 | goto discard_and_relse; | 1675 | goto discard_and_relse; |
| 1658 | nf_reset(skb); | 1676 | nf_reset(skb); |
| @@ -1677,8 +1695,11 @@ process: | |||
| 1677 | if (!tcp_prequeue(sk, skb)) | 1695 | if (!tcp_prequeue(sk, skb)) |
| 1678 | ret = tcp_v4_do_rcv(sk, skb); | 1696 | ret = tcp_v4_do_rcv(sk, skb); |
| 1679 | } | 1697 | } |
| 1680 | } else | 1698 | } else if (unlikely(sk_add_backlog(sk, skb))) { |
| 1681 | sk_add_backlog(sk, skb); | 1699 | bh_unlock_sock(sk); |
| 1700 | NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); | ||
| 1701 | goto discard_and_relse; | ||
| 1702 | } | ||
| 1682 | bh_unlock_sock(sk); | 1703 | bh_unlock_sock(sk); |
| 1683 | 1704 | ||
| 1684 | sock_put(sk); | 1705 | sock_put(sk); |
| @@ -1777,6 +1798,7 @@ int tcp_v4_remember_stamp(struct sock *sk) | |||
| 1777 | 1798 | ||
| 1778 | return 0; | 1799 | return 0; |
| 1779 | } | 1800 | } |
| 1801 | EXPORT_SYMBOL(tcp_v4_remember_stamp); | ||
| 1780 | 1802 | ||
| 1781 | int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) | 1803 | int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) |
| 1782 | { | 1804 | { |
| @@ -1816,6 +1838,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = { | |||
| 1816 | .compat_getsockopt = compat_ip_getsockopt, | 1838 | .compat_getsockopt = compat_ip_getsockopt, |
| 1817 | #endif | 1839 | #endif |
| 1818 | }; | 1840 | }; |
| 1841 | EXPORT_SYMBOL(ipv4_specific); | ||
| 1819 | 1842 | ||
| 1820 | #ifdef CONFIG_TCP_MD5SIG | 1843 | #ifdef CONFIG_TCP_MD5SIG |
| 1821 | static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { | 1844 | static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { |
| @@ -1944,7 +1967,6 @@ void tcp_v4_destroy_sock(struct sock *sk) | |||
| 1944 | 1967 | ||
| 1945 | percpu_counter_dec(&tcp_sockets_allocated); | 1968 | percpu_counter_dec(&tcp_sockets_allocated); |
| 1946 | } | 1969 | } |
| 1947 | |||
| 1948 | EXPORT_SYMBOL(tcp_v4_destroy_sock); | 1970 | EXPORT_SYMBOL(tcp_v4_destroy_sock); |
| 1949 | 1971 | ||
| 1950 | #ifdef CONFIG_PROC_FS | 1972 | #ifdef CONFIG_PROC_FS |
| @@ -1962,6 +1984,11 @@ static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) | |||
| 1962 | hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; | 1984 | hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; |
| 1963 | } | 1985 | } |
| 1964 | 1986 | ||
| 1987 | /* | ||
| 1988 | * Get next listener socket follow cur. If cur is NULL, get first socket | ||
| 1989 | * starting from bucket given in st->bucket; when st->bucket is zero the | ||
| 1990 | * very first socket in the hash table is returned. | ||
| 1991 | */ | ||
| 1965 | static void *listening_get_next(struct seq_file *seq, void *cur) | 1992 | static void *listening_get_next(struct seq_file *seq, void *cur) |
| 1966 | { | 1993 | { |
| 1967 | struct inet_connection_sock *icsk; | 1994 | struct inet_connection_sock *icsk; |
| @@ -1972,14 +1999,15 @@ static void *listening_get_next(struct seq_file *seq, void *cur) | |||
| 1972 | struct net *net = seq_file_net(seq); | 1999 | struct net *net = seq_file_net(seq); |
| 1973 | 2000 | ||
| 1974 | if (!sk) { | 2001 | if (!sk) { |
| 1975 | st->bucket = 0; | 2002 | ilb = &tcp_hashinfo.listening_hash[st->bucket]; |
| 1976 | ilb = &tcp_hashinfo.listening_hash[0]; | ||
| 1977 | spin_lock_bh(&ilb->lock); | 2003 | spin_lock_bh(&ilb->lock); |
| 1978 | sk = sk_nulls_head(&ilb->head); | 2004 | sk = sk_nulls_head(&ilb->head); |
| 2005 | st->offset = 0; | ||
| 1979 | goto get_sk; | 2006 | goto get_sk; |
| 1980 | } | 2007 | } |
| 1981 | ilb = &tcp_hashinfo.listening_hash[st->bucket]; | 2008 | ilb = &tcp_hashinfo.listening_hash[st->bucket]; |
| 1982 | ++st->num; | 2009 | ++st->num; |
| 2010 | ++st->offset; | ||
| 1983 | 2011 | ||
| 1984 | if (st->state == TCP_SEQ_STATE_OPENREQ) { | 2012 | if (st->state == TCP_SEQ_STATE_OPENREQ) { |
| 1985 | struct request_sock *req = cur; | 2013 | struct request_sock *req = cur; |
| @@ -1994,6 +2022,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) | |||
| 1994 | } | 2022 | } |
| 1995 | req = req->dl_next; | 2023 | req = req->dl_next; |
| 1996 | } | 2024 | } |
| 2025 | st->offset = 0; | ||
| 1997 | if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) | 2026 | if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) |
| 1998 | break; | 2027 | break; |
| 1999 | get_req: | 2028 | get_req: |
| @@ -2029,6 +2058,7 @@ start_req: | |||
| 2029 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); | 2058 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); |
| 2030 | } | 2059 | } |
| 2031 | spin_unlock_bh(&ilb->lock); | 2060 | spin_unlock_bh(&ilb->lock); |
| 2061 | st->offset = 0; | ||
| 2032 | if (++st->bucket < INET_LHTABLE_SIZE) { | 2062 | if (++st->bucket < INET_LHTABLE_SIZE) { |
| 2033 | ilb = &tcp_hashinfo.listening_hash[st->bucket]; | 2063 | ilb = &tcp_hashinfo.listening_hash[st->bucket]; |
| 2034 | spin_lock_bh(&ilb->lock); | 2064 | spin_lock_bh(&ilb->lock); |
| @@ -2042,7 +2072,12 @@ out: | |||
| 2042 | 2072 | ||
| 2043 | static void *listening_get_idx(struct seq_file *seq, loff_t *pos) | 2073 | static void *listening_get_idx(struct seq_file *seq, loff_t *pos) |
| 2044 | { | 2074 | { |
| 2045 | void *rc = listening_get_next(seq, NULL); | 2075 | struct tcp_iter_state *st = seq->private; |
| 2076 | void *rc; | ||
| 2077 | |||
| 2078 | st->bucket = 0; | ||
| 2079 | st->offset = 0; | ||
| 2080 | rc = listening_get_next(seq, NULL); | ||
| 2046 | 2081 | ||
| 2047 | while (rc && *pos) { | 2082 | while (rc && *pos) { |
| 2048 | rc = listening_get_next(seq, rc); | 2083 | rc = listening_get_next(seq, rc); |
| @@ -2057,13 +2092,18 @@ static inline int empty_bucket(struct tcp_iter_state *st) | |||
| 2057 | hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); | 2092 | hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); |
| 2058 | } | 2093 | } |
| 2059 | 2094 | ||
| 2095 | /* | ||
| 2096 | * Get first established socket starting from bucket given in st->bucket. | ||
| 2097 | * If st->bucket is zero, the very first socket in the hash is returned. | ||
| 2098 | */ | ||
| 2060 | static void *established_get_first(struct seq_file *seq) | 2099 | static void *established_get_first(struct seq_file *seq) |
| 2061 | { | 2100 | { |
| 2062 | struct tcp_iter_state *st = seq->private; | 2101 | struct tcp_iter_state *st = seq->private; |
| 2063 | struct net *net = seq_file_net(seq); | 2102 | struct net *net = seq_file_net(seq); |
| 2064 | void *rc = NULL; | 2103 | void *rc = NULL; |
| 2065 | 2104 | ||
| 2066 | for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { | 2105 | st->offset = 0; |
| 2106 | for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { | ||
| 2067 | struct sock *sk; | 2107 | struct sock *sk; |
| 2068 | struct hlist_nulls_node *node; | 2108 | struct hlist_nulls_node *node; |
| 2069 | struct inet_timewait_sock *tw; | 2109 | struct inet_timewait_sock *tw; |
| @@ -2108,6 +2148,7 @@ static void *established_get_next(struct seq_file *seq, void *cur) | |||
| 2108 | struct net *net = seq_file_net(seq); | 2148 | struct net *net = seq_file_net(seq); |
| 2109 | 2149 | ||
| 2110 | ++st->num; | 2150 | ++st->num; |
| 2151 | ++st->offset; | ||
| 2111 | 2152 | ||
| 2112 | if (st->state == TCP_SEQ_STATE_TIME_WAIT) { | 2153 | if (st->state == TCP_SEQ_STATE_TIME_WAIT) { |
| 2113 | tw = cur; | 2154 | tw = cur; |
| @@ -2124,6 +2165,7 @@ get_tw: | |||
| 2124 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 2165 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
| 2125 | 2166 | ||
| 2126 | /* Look for next non empty bucket */ | 2167 | /* Look for next non empty bucket */ |
| 2168 | st->offset = 0; | ||
| 2127 | while (++st->bucket <= tcp_hashinfo.ehash_mask && | 2169 | while (++st->bucket <= tcp_hashinfo.ehash_mask && |
| 2128 | empty_bucket(st)) | 2170 | empty_bucket(st)) |
| 2129 | ; | 2171 | ; |
| @@ -2151,7 +2193,11 @@ out: | |||
| 2151 | 2193 | ||
| 2152 | static void *established_get_idx(struct seq_file *seq, loff_t pos) | 2194 | static void *established_get_idx(struct seq_file *seq, loff_t pos) |
| 2153 | { | 2195 | { |
| 2154 | void *rc = established_get_first(seq); | 2196 | struct tcp_iter_state *st = seq->private; |
| 2197 | void *rc; | ||
| 2198 | |||
| 2199 | st->bucket = 0; | ||
| 2200 | rc = established_get_first(seq); | ||
| 2155 | 2201 | ||
| 2156 | while (rc && pos) { | 2202 | while (rc && pos) { |
| 2157 | rc = established_get_next(seq, rc); | 2203 | rc = established_get_next(seq, rc); |
| @@ -2176,24 +2222,72 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) | |||
| 2176 | return rc; | 2222 | return rc; |
| 2177 | } | 2223 | } |
| 2178 | 2224 | ||
| 2225 | static void *tcp_seek_last_pos(struct seq_file *seq) | ||
| 2226 | { | ||
| 2227 | struct tcp_iter_state *st = seq->private; | ||
| 2228 | int offset = st->offset; | ||
| 2229 | int orig_num = st->num; | ||
| 2230 | void *rc = NULL; | ||
| 2231 | |||
| 2232 | switch (st->state) { | ||
| 2233 | case TCP_SEQ_STATE_OPENREQ: | ||
| 2234 | case TCP_SEQ_STATE_LISTENING: | ||
| 2235 | if (st->bucket >= INET_LHTABLE_SIZE) | ||
| 2236 | break; | ||
| 2237 | st->state = TCP_SEQ_STATE_LISTENING; | ||
| 2238 | rc = listening_get_next(seq, NULL); | ||
| 2239 | while (offset-- && rc) | ||
| 2240 | rc = listening_get_next(seq, rc); | ||
| 2241 | if (rc) | ||
| 2242 | break; | ||
| 2243 | st->bucket = 0; | ||
| 2244 | /* Fallthrough */ | ||
| 2245 | case TCP_SEQ_STATE_ESTABLISHED: | ||
| 2246 | case TCP_SEQ_STATE_TIME_WAIT: | ||
| 2247 | st->state = TCP_SEQ_STATE_ESTABLISHED; | ||
| 2248 | if (st->bucket > tcp_hashinfo.ehash_mask) | ||
| 2249 | break; | ||
| 2250 | rc = established_get_first(seq); | ||
| 2251 | while (offset-- && rc) | ||
| 2252 | rc = established_get_next(seq, rc); | ||
| 2253 | } | ||
| 2254 | |||
| 2255 | st->num = orig_num; | ||
| 2256 | |||
| 2257 | return rc; | ||
| 2258 | } | ||
| 2259 | |||
| 2179 | static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) | 2260 | static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) |
| 2180 | { | 2261 | { |
| 2181 | struct tcp_iter_state *st = seq->private; | 2262 | struct tcp_iter_state *st = seq->private; |
| 2263 | void *rc; | ||
| 2264 | |||
| 2265 | if (*pos && *pos == st->last_pos) { | ||
| 2266 | rc = tcp_seek_last_pos(seq); | ||
| 2267 | if (rc) | ||
| 2268 | goto out; | ||
| 2269 | } | ||
| 2270 | |||
| 2182 | st->state = TCP_SEQ_STATE_LISTENING; | 2271 | st->state = TCP_SEQ_STATE_LISTENING; |
| 2183 | st->num = 0; | 2272 | st->num = 0; |
| 2184 | return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; | 2273 | st->bucket = 0; |
| 2274 | st->offset = 0; | ||
| 2275 | rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; | ||
| 2276 | |||
| 2277 | out: | ||
| 2278 | st->last_pos = *pos; | ||
| 2279 | return rc; | ||
| 2185 | } | 2280 | } |
| 2186 | 2281 | ||
| 2187 | static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 2282 | static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
| 2188 | { | 2283 | { |
| 2284 | struct tcp_iter_state *st = seq->private; | ||
| 2189 | void *rc = NULL; | 2285 | void *rc = NULL; |
| 2190 | struct tcp_iter_state *st; | ||
| 2191 | 2286 | ||
| 2192 | if (v == SEQ_START_TOKEN) { | 2287 | if (v == SEQ_START_TOKEN) { |
| 2193 | rc = tcp_get_idx(seq, 0); | 2288 | rc = tcp_get_idx(seq, 0); |
| 2194 | goto out; | 2289 | goto out; |
| 2195 | } | 2290 | } |
| 2196 | st = seq->private; | ||
| 2197 | 2291 | ||
| 2198 | switch (st->state) { | 2292 | switch (st->state) { |
| 2199 | case TCP_SEQ_STATE_OPENREQ: | 2293 | case TCP_SEQ_STATE_OPENREQ: |
| @@ -2201,6 +2295,8 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
| 2201 | rc = listening_get_next(seq, v); | 2295 | rc = listening_get_next(seq, v); |
| 2202 | if (!rc) { | 2296 | if (!rc) { |
| 2203 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 2297 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
| 2298 | st->bucket = 0; | ||
| 2299 | st->offset = 0; | ||
| 2204 | rc = established_get_first(seq); | 2300 | rc = established_get_first(seq); |
| 2205 | } | 2301 | } |
| 2206 | break; | 2302 | break; |
| @@ -2211,6 +2307,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
| 2211 | } | 2307 | } |
| 2212 | out: | 2308 | out: |
| 2213 | ++*pos; | 2309 | ++*pos; |
| 2310 | st->last_pos = *pos; | ||
| 2214 | return rc; | 2311 | return rc; |
| 2215 | } | 2312 | } |
| 2216 | 2313 | ||
| @@ -2249,6 +2346,7 @@ static int tcp_seq_open(struct inode *inode, struct file *file) | |||
| 2249 | 2346 | ||
| 2250 | s = ((struct seq_file *)file->private_data)->private; | 2347 | s = ((struct seq_file *)file->private_data)->private; |
| 2251 | s->family = afinfo->family; | 2348 | s->family = afinfo->family; |
| 2349 | s->last_pos = 0; | ||
| 2252 | return 0; | 2350 | return 0; |
| 2253 | } | 2351 | } |
| 2254 | 2352 | ||
| @@ -2272,11 +2370,13 @@ int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo) | |||
| 2272 | rc = -ENOMEM; | 2370 | rc = -ENOMEM; |
| 2273 | return rc; | 2371 | return rc; |
| 2274 | } | 2372 | } |
| 2373 | EXPORT_SYMBOL(tcp_proc_register); | ||
| 2275 | 2374 | ||
| 2276 | void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) | 2375 | void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) |
| 2277 | { | 2376 | { |
| 2278 | proc_net_remove(net, afinfo->name); | 2377 | proc_net_remove(net, afinfo->name); |
| 2279 | } | 2378 | } |
| 2379 | EXPORT_SYMBOL(tcp_proc_unregister); | ||
| 2280 | 2380 | ||
| 2281 | static void get_openreq4(struct sock *sk, struct request_sock *req, | 2381 | static void get_openreq4(struct sock *sk, struct request_sock *req, |
| 2282 | struct seq_file *f, int i, int uid, int *len) | 2382 | struct seq_file *f, int i, int uid, int *len) |
| @@ -2425,12 +2525,12 @@ static struct tcp_seq_afinfo tcp4_seq_afinfo = { | |||
| 2425 | }, | 2525 | }, |
| 2426 | }; | 2526 | }; |
| 2427 | 2527 | ||
| 2428 | static int tcp4_proc_init_net(struct net *net) | 2528 | static int __net_init tcp4_proc_init_net(struct net *net) |
| 2429 | { | 2529 | { |
| 2430 | return tcp_proc_register(net, &tcp4_seq_afinfo); | 2530 | return tcp_proc_register(net, &tcp4_seq_afinfo); |
| 2431 | } | 2531 | } |
| 2432 | 2532 | ||
| 2433 | static void tcp4_proc_exit_net(struct net *net) | 2533 | static void __net_exit tcp4_proc_exit_net(struct net *net) |
| 2434 | { | 2534 | { |
| 2435 | tcp_proc_unregister(net, &tcp4_seq_afinfo); | 2535 | tcp_proc_unregister(net, &tcp4_seq_afinfo); |
| 2436 | } | 2536 | } |
| @@ -2500,6 +2600,8 @@ struct proto tcp_prot = { | |||
| 2500 | .setsockopt = tcp_setsockopt, | 2600 | .setsockopt = tcp_setsockopt, |
| 2501 | .getsockopt = tcp_getsockopt, | 2601 | .getsockopt = tcp_getsockopt, |
| 2502 | .recvmsg = tcp_recvmsg, | 2602 | .recvmsg = tcp_recvmsg, |
| 2603 | .sendmsg = tcp_sendmsg, | ||
| 2604 | .sendpage = tcp_sendpage, | ||
| 2503 | .backlog_rcv = tcp_v4_do_rcv, | 2605 | .backlog_rcv = tcp_v4_do_rcv, |
| 2504 | .hash = inet_hash, | 2606 | .hash = inet_hash, |
| 2505 | .unhash = inet_unhash, | 2607 | .unhash = inet_unhash, |
| @@ -2518,11 +2620,13 @@ struct proto tcp_prot = { | |||
| 2518 | .twsk_prot = &tcp_timewait_sock_ops, | 2620 | .twsk_prot = &tcp_timewait_sock_ops, |
| 2519 | .rsk_prot = &tcp_request_sock_ops, | 2621 | .rsk_prot = &tcp_request_sock_ops, |
| 2520 | .h.hashinfo = &tcp_hashinfo, | 2622 | .h.hashinfo = &tcp_hashinfo, |
| 2623 | .no_autobind = true, | ||
| 2521 | #ifdef CONFIG_COMPAT | 2624 | #ifdef CONFIG_COMPAT |
| 2522 | .compat_setsockopt = compat_tcp_setsockopt, | 2625 | .compat_setsockopt = compat_tcp_setsockopt, |
| 2523 | .compat_getsockopt = compat_tcp_getsockopt, | 2626 | .compat_getsockopt = compat_tcp_getsockopt, |
| 2524 | #endif | 2627 | #endif |
| 2525 | }; | 2628 | }; |
| 2629 | EXPORT_SYMBOL(tcp_prot); | ||
| 2526 | 2630 | ||
| 2527 | 2631 | ||
| 2528 | static int __net_init tcp_sk_init(struct net *net) | 2632 | static int __net_init tcp_sk_init(struct net *net) |
| @@ -2553,20 +2657,3 @@ void __init tcp_v4_init(void) | |||
| 2553 | if (register_pernet_subsys(&tcp_sk_ops)) | 2657 | if (register_pernet_subsys(&tcp_sk_ops)) |
| 2554 | panic("Failed to create the TCP control socket.\n"); | 2658 | panic("Failed to create the TCP control socket.\n"); |
| 2555 | } | 2659 | } |
| 2556 | |||
| 2557 | EXPORT_SYMBOL(ipv4_specific); | ||
| 2558 | EXPORT_SYMBOL(tcp_hashinfo); | ||
| 2559 | EXPORT_SYMBOL(tcp_prot); | ||
| 2560 | EXPORT_SYMBOL(tcp_v4_conn_request); | ||
| 2561 | EXPORT_SYMBOL(tcp_v4_connect); | ||
| 2562 | EXPORT_SYMBOL(tcp_v4_do_rcv); | ||
| 2563 | EXPORT_SYMBOL(tcp_v4_remember_stamp); | ||
| 2564 | EXPORT_SYMBOL(tcp_v4_send_check); | ||
| 2565 | EXPORT_SYMBOL(tcp_v4_syn_recv_sock); | ||
| 2566 | |||
| 2567 | #ifdef CONFIG_PROC_FS | ||
| 2568 | EXPORT_SYMBOL(tcp_proc_register); | ||
| 2569 | EXPORT_SYMBOL(tcp_proc_unregister); | ||
| 2570 | #endif | ||
| 2571 | EXPORT_SYMBOL(sysctl_tcp_low_latency); | ||
| 2572 | |||
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f206ee5dda80..f25b56cb85cb 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | 20 | ||
| 21 | #include <linux/mm.h> | 21 | #include <linux/mm.h> |
| 22 | #include <linux/module.h> | 22 | #include <linux/module.h> |
| 23 | #include <linux/slab.h> | ||
| 23 | #include <linux/sysctl.h> | 24 | #include <linux/sysctl.h> |
| 24 | #include <linux/workqueue.h> | 25 | #include <linux/workqueue.h> |
| 25 | #include <net/tcp.h> | 26 | #include <net/tcp.h> |
| @@ -46,7 +47,6 @@ struct inet_timewait_death_row tcp_death_row = { | |||
| 46 | .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, | 47 | .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, |
| 47 | (unsigned long)&tcp_death_row), | 48 | (unsigned long)&tcp_death_row), |
| 48 | }; | 49 | }; |
| 49 | |||
| 50 | EXPORT_SYMBOL_GPL(tcp_death_row); | 50 | EXPORT_SYMBOL_GPL(tcp_death_row); |
| 51 | 51 | ||
| 52 | static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) | 52 | static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) |
| @@ -261,6 +261,7 @@ kill: | |||
| 261 | inet_twsk_put(tw); | 261 | inet_twsk_put(tw); |
| 262 | return TCP_TW_SUCCESS; | 262 | return TCP_TW_SUCCESS; |
| 263 | } | 263 | } |
| 264 | EXPORT_SYMBOL(tcp_timewait_state_process); | ||
| 264 | 265 | ||
| 265 | /* | 266 | /* |
| 266 | * Move a socket to time-wait or dead fin-wait-2 state. | 267 | * Move a socket to time-wait or dead fin-wait-2 state. |
| @@ -361,7 +362,6 @@ void tcp_twsk_destructor(struct sock *sk) | |||
| 361 | tcp_free_md5sig_pool(); | 362 | tcp_free_md5sig_pool(); |
| 362 | #endif | 363 | #endif |
| 363 | } | 364 | } |
| 364 | |||
| 365 | EXPORT_SYMBOL_GPL(tcp_twsk_destructor); | 365 | EXPORT_SYMBOL_GPL(tcp_twsk_destructor); |
| 366 | 366 | ||
| 367 | static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, | 367 | static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, |
| @@ -509,6 +509,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
| 509 | } | 509 | } |
| 510 | return newsk; | 510 | return newsk; |
| 511 | } | 511 | } |
| 512 | EXPORT_SYMBOL(tcp_create_openreq_child); | ||
| 512 | 513 | ||
| 513 | /* | 514 | /* |
| 514 | * Process an incoming packet for SYN_RECV sockets represented | 515 | * Process an incoming packet for SYN_RECV sockets represented |
| @@ -671,6 +672,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
| 671 | if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && | 672 | if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && |
| 672 | TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { | 673 | TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { |
| 673 | inet_rsk(req)->acked = 1; | 674 | inet_rsk(req)->acked = 1; |
| 675 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); | ||
| 674 | return NULL; | 676 | return NULL; |
| 675 | } | 677 | } |
| 676 | 678 | ||
| @@ -704,6 +706,7 @@ embryonic_reset: | |||
| 704 | inet_csk_reqsk_queue_drop(sk, req, prev); | 706 | inet_csk_reqsk_queue_drop(sk, req, prev); |
| 705 | return NULL; | 707 | return NULL; |
| 706 | } | 708 | } |
| 709 | EXPORT_SYMBOL(tcp_check_req); | ||
| 707 | 710 | ||
| 708 | /* | 711 | /* |
| 709 | * Queue segment on the new socket if the new socket is active, | 712 | * Queue segment on the new socket if the new socket is active, |
| @@ -728,15 +731,11 @@ int tcp_child_process(struct sock *parent, struct sock *child, | |||
| 728 | * in main socket hash table and lock on listening | 731 | * in main socket hash table and lock on listening |
| 729 | * socket does not protect us more. | 732 | * socket does not protect us more. |
| 730 | */ | 733 | */ |
| 731 | sk_add_backlog(child, skb); | 734 | __sk_add_backlog(child, skb); |
| 732 | } | 735 | } |
| 733 | 736 | ||
| 734 | bh_unlock_sock(child); | 737 | bh_unlock_sock(child); |
| 735 | sock_put(child); | 738 | sock_put(child); |
| 736 | return ret; | 739 | return ret; |
| 737 | } | 740 | } |
| 738 | |||
| 739 | EXPORT_SYMBOL(tcp_check_req); | ||
| 740 | EXPORT_SYMBOL(tcp_child_process); | 741 | EXPORT_SYMBOL(tcp_child_process); |
| 741 | EXPORT_SYMBOL(tcp_create_openreq_child); | ||
| 742 | EXPORT_SYMBOL(tcp_timewait_state_process); | ||
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 383ce237640f..de3bd8458588 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
| @@ -37,6 +37,7 @@ | |||
| 37 | #include <net/tcp.h> | 37 | #include <net/tcp.h> |
| 38 | 38 | ||
| 39 | #include <linux/compiler.h> | 39 | #include <linux/compiler.h> |
| 40 | #include <linux/gfp.h> | ||
| 40 | #include <linux/module.h> | 41 | #include <linux/module.h> |
| 41 | 42 | ||
| 42 | /* People can turn this off for buggy TCP's found in printers etc. */ | 43 | /* People can turn this off for buggy TCP's found in printers etc. */ |
| @@ -183,7 +184,8 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) | |||
| 183 | */ | 184 | */ |
| 184 | void tcp_select_initial_window(int __space, __u32 mss, | 185 | void tcp_select_initial_window(int __space, __u32 mss, |
| 185 | __u32 *rcv_wnd, __u32 *window_clamp, | 186 | __u32 *rcv_wnd, __u32 *window_clamp, |
| 186 | int wscale_ok, __u8 *rcv_wscale) | 187 | int wscale_ok, __u8 *rcv_wscale, |
| 188 | __u32 init_rcv_wnd) | ||
| 187 | { | 189 | { |
| 188 | unsigned int space = (__space < 0 ? 0 : __space); | 190 | unsigned int space = (__space < 0 ? 0 : __space); |
| 189 | 191 | ||
| @@ -232,13 +234,20 @@ void tcp_select_initial_window(int __space, __u32 mss, | |||
| 232 | init_cwnd = 2; | 234 | init_cwnd = 2; |
| 233 | else if (mss > 1460) | 235 | else if (mss > 1460) |
| 234 | init_cwnd = 3; | 236 | init_cwnd = 3; |
| 235 | if (*rcv_wnd > init_cwnd * mss) | 237 | /* when initializing use the value from init_rcv_wnd |
| 238 | * rather than the default from above | ||
| 239 | */ | ||
| 240 | if (init_rcv_wnd && | ||
| 241 | (*rcv_wnd > init_rcv_wnd * mss)) | ||
| 242 | *rcv_wnd = init_rcv_wnd * mss; | ||
| 243 | else if (*rcv_wnd > init_cwnd * mss) | ||
| 236 | *rcv_wnd = init_cwnd * mss; | 244 | *rcv_wnd = init_cwnd * mss; |
| 237 | } | 245 | } |
| 238 | 246 | ||
| 239 | /* Set the clamp no higher than max representable value */ | 247 | /* Set the clamp no higher than max representable value */ |
| 240 | (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp); | 248 | (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp); |
| 241 | } | 249 | } |
| 250 | EXPORT_SYMBOL(tcp_select_initial_window); | ||
| 242 | 251 | ||
| 243 | /* Chose a new window to advertise, update state in tcp_sock for the | 252 | /* Chose a new window to advertise, update state in tcp_sock for the |
| 244 | * socket, and return result with RFC1323 scaling applied. The return | 253 | * socket, and return result with RFC1323 scaling applied. The return |
| @@ -286,9 +295,9 @@ static u16 tcp_select_window(struct sock *sk) | |||
| 286 | /* Packet ECN state for a SYN-ACK */ | 295 | /* Packet ECN state for a SYN-ACK */ |
| 287 | static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) | 296 | static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) |
| 288 | { | 297 | { |
| 289 | TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; | 298 | TCP_SKB_CB(skb)->flags &= ~TCPHDR_CWR; |
| 290 | if (!(tp->ecn_flags & TCP_ECN_OK)) | 299 | if (!(tp->ecn_flags & TCP_ECN_OK)) |
| 291 | TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; | 300 | TCP_SKB_CB(skb)->flags &= ~TCPHDR_ECE; |
| 292 | } | 301 | } |
| 293 | 302 | ||
| 294 | /* Packet ECN state for a SYN. */ | 303 | /* Packet ECN state for a SYN. */ |
| @@ -298,7 +307,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) | |||
| 298 | 307 | ||
| 299 | tp->ecn_flags = 0; | 308 | tp->ecn_flags = 0; |
| 300 | if (sysctl_tcp_ecn == 1) { | 309 | if (sysctl_tcp_ecn == 1) { |
| 301 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR; | 310 | TCP_SKB_CB(skb)->flags |= TCPHDR_ECE | TCPHDR_CWR; |
| 302 | tp->ecn_flags = TCP_ECN_OK; | 311 | tp->ecn_flags = TCP_ECN_OK; |
| 303 | } | 312 | } |
| 304 | } | 313 | } |
| @@ -342,6 +351,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, | |||
| 342 | */ | 351 | */ |
| 343 | static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) | 352 | static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) |
| 344 | { | 353 | { |
| 354 | skb->ip_summed = CHECKSUM_PARTIAL; | ||
| 345 | skb->csum = 0; | 355 | skb->csum = 0; |
| 346 | 356 | ||
| 347 | TCP_SKB_CB(skb)->flags = flags; | 357 | TCP_SKB_CB(skb)->flags = flags; |
| @@ -352,7 +362,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) | |||
| 352 | skb_shinfo(skb)->gso_type = 0; | 362 | skb_shinfo(skb)->gso_type = 0; |
| 353 | 363 | ||
| 354 | TCP_SKB_CB(skb)->seq = seq; | 364 | TCP_SKB_CB(skb)->seq = seq; |
| 355 | if (flags & (TCPCB_FLAG_SYN | TCPCB_FLAG_FIN)) | 365 | if (flags & (TCPHDR_SYN | TCPHDR_FIN)) |
| 356 | seq++; | 366 | seq++; |
| 357 | TCP_SKB_CB(skb)->end_seq = seq; | 367 | TCP_SKB_CB(skb)->end_seq = seq; |
| 358 | } | 368 | } |
| @@ -659,7 +669,6 @@ static unsigned tcp_synack_options(struct sock *sk, | |||
| 659 | u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? | 669 | u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? |
| 660 | xvp->cookie_plus : | 670 | xvp->cookie_plus : |
| 661 | 0; | 671 | 0; |
| 662 | bool doing_ts = ireq->tstamp_ok; | ||
| 663 | 672 | ||
| 664 | #ifdef CONFIG_TCP_MD5SIG | 673 | #ifdef CONFIG_TCP_MD5SIG |
| 665 | *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); | 674 | *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); |
| @@ -672,7 +681,7 @@ static unsigned tcp_synack_options(struct sock *sk, | |||
| 672 | * rather than TS in order to fit in better with old, | 681 | * rather than TS in order to fit in better with old, |
| 673 | * buggy kernels, but that was deemed to be unnecessary. | 682 | * buggy kernels, but that was deemed to be unnecessary. |
| 674 | */ | 683 | */ |
| 675 | doing_ts &= !ireq->sack_ok; | 684 | ireq->tstamp_ok &= !ireq->sack_ok; |
| 676 | } | 685 | } |
| 677 | #else | 686 | #else |
| 678 | *md5 = NULL; | 687 | *md5 = NULL; |
| @@ -687,7 +696,7 @@ static unsigned tcp_synack_options(struct sock *sk, | |||
| 687 | opts->options |= OPTION_WSCALE; | 696 | opts->options |= OPTION_WSCALE; |
| 688 | remaining -= TCPOLEN_WSCALE_ALIGNED; | 697 | remaining -= TCPOLEN_WSCALE_ALIGNED; |
| 689 | } | 698 | } |
| 690 | if (likely(doing_ts)) { | 699 | if (likely(ireq->tstamp_ok)) { |
| 691 | opts->options |= OPTION_TS; | 700 | opts->options |= OPTION_TS; |
| 692 | opts->tsval = TCP_SKB_CB(skb)->when; | 701 | opts->tsval = TCP_SKB_CB(skb)->when; |
| 693 | opts->tsecr = req->ts_recent; | 702 | opts->tsecr = req->ts_recent; |
| @@ -695,7 +704,7 @@ static unsigned tcp_synack_options(struct sock *sk, | |||
| 695 | } | 704 | } |
| 696 | if (likely(ireq->sack_ok)) { | 705 | if (likely(ireq->sack_ok)) { |
| 697 | opts->options |= OPTION_SACK_ADVERTISE; | 706 | opts->options |= OPTION_SACK_ADVERTISE; |
| 698 | if (unlikely(!doing_ts)) | 707 | if (unlikely(!ireq->tstamp_ok)) |
| 699 | remaining -= TCPOLEN_SACKPERM_ALIGNED; | 708 | remaining -= TCPOLEN_SACKPERM_ALIGNED; |
| 700 | } | 709 | } |
| 701 | 710 | ||
| @@ -703,7 +712,7 @@ static unsigned tcp_synack_options(struct sock *sk, | |||
| 703 | * If the <SYN> options fit, the same options should fit now! | 712 | * If the <SYN> options fit, the same options should fit now! |
| 704 | */ | 713 | */ |
| 705 | if (*md5 == NULL && | 714 | if (*md5 == NULL && |
| 706 | doing_ts && | 715 | ireq->tstamp_ok && |
| 707 | cookie_plus > TCPOLEN_COOKIE_BASE) { | 716 | cookie_plus > TCPOLEN_COOKIE_BASE) { |
| 708 | int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */ | 717 | int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */ |
| 709 | 718 | ||
| @@ -812,7 +821,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
| 812 | tcb = TCP_SKB_CB(skb); | 821 | tcb = TCP_SKB_CB(skb); |
| 813 | memset(&opts, 0, sizeof(opts)); | 822 | memset(&opts, 0, sizeof(opts)); |
| 814 | 823 | ||
| 815 | if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) | 824 | if (unlikely(tcb->flags & TCPHDR_SYN)) |
| 816 | tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); | 825 | tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); |
| 817 | else | 826 | else |
| 818 | tcp_options_size = tcp_established_options(sk, skb, &opts, | 827 | tcp_options_size = tcp_established_options(sk, skb, &opts, |
| @@ -835,7 +844,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
| 835 | *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | | 844 | *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | |
| 836 | tcb->flags); | 845 | tcb->flags); |
| 837 | 846 | ||
| 838 | if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { | 847 | if (unlikely(tcb->flags & TCPHDR_SYN)) { |
| 839 | /* RFC1323: The window in SYN & SYN/ACK segments | 848 | /* RFC1323: The window in SYN & SYN/ACK segments |
| 840 | * is never scaled. | 849 | * is never scaled. |
| 841 | */ | 850 | */ |
| @@ -852,36 +861,37 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
| 852 | th->urg_ptr = htons(tp->snd_up - tcb->seq); | 861 | th->urg_ptr = htons(tp->snd_up - tcb->seq); |
| 853 | th->urg = 1; | 862 | th->urg = 1; |
| 854 | } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) { | 863 | } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) { |
| 855 | th->urg_ptr = 0xFFFF; | 864 | th->urg_ptr = htons(0xFFFF); |
| 856 | th->urg = 1; | 865 | th->urg = 1; |
| 857 | } | 866 | } |
| 858 | } | 867 | } |
| 859 | 868 | ||
| 860 | tcp_options_write((__be32 *)(th + 1), tp, &opts); | 869 | tcp_options_write((__be32 *)(th + 1), tp, &opts); |
| 861 | if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0)) | 870 | if (likely((tcb->flags & TCPHDR_SYN) == 0)) |
| 862 | TCP_ECN_send(sk, skb, tcp_header_size); | 871 | TCP_ECN_send(sk, skb, tcp_header_size); |
| 863 | 872 | ||
| 864 | #ifdef CONFIG_TCP_MD5SIG | 873 | #ifdef CONFIG_TCP_MD5SIG |
| 865 | /* Calculate the MD5 hash, as we have all we need now */ | 874 | /* Calculate the MD5 hash, as we have all we need now */ |
| 866 | if (md5) { | 875 | if (md5) { |
| 867 | sk->sk_route_caps &= ~NETIF_F_GSO_MASK; | 876 | sk_nocaps_add(sk, NETIF_F_GSO_MASK); |
| 868 | tp->af_specific->calc_md5_hash(opts.hash_location, | 877 | tp->af_specific->calc_md5_hash(opts.hash_location, |
| 869 | md5, sk, NULL, skb); | 878 | md5, sk, NULL, skb); |
| 870 | } | 879 | } |
| 871 | #endif | 880 | #endif |
| 872 | 881 | ||
| 873 | icsk->icsk_af_ops->send_check(sk, skb->len, skb); | 882 | icsk->icsk_af_ops->send_check(sk, skb); |
| 874 | 883 | ||
| 875 | if (likely(tcb->flags & TCPCB_FLAG_ACK)) | 884 | if (likely(tcb->flags & TCPHDR_ACK)) |
| 876 | tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); | 885 | tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); |
| 877 | 886 | ||
| 878 | if (skb->len != tcp_header_size) | 887 | if (skb->len != tcp_header_size) |
| 879 | tcp_event_data_sent(tp, skb, sk); | 888 | tcp_event_data_sent(tp, skb, sk); |
| 880 | 889 | ||
| 881 | if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) | 890 | if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) |
| 882 | TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); | 891 | TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, |
| 892 | tcp_skb_pcount(skb)); | ||
| 883 | 893 | ||
| 884 | err = icsk->icsk_af_ops->queue_xmit(skb, 0); | 894 | err = icsk->icsk_af_ops->queue_xmit(skb); |
| 885 | if (likely(err <= 0)) | 895 | if (likely(err <= 0)) |
| 886 | return err; | 896 | return err; |
| 887 | 897 | ||
| @@ -1014,7 +1024,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, | |||
| 1014 | 1024 | ||
| 1015 | /* PSH and FIN should only be set in the second packet. */ | 1025 | /* PSH and FIN should only be set in the second packet. */ |
| 1016 | flags = TCP_SKB_CB(skb)->flags; | 1026 | flags = TCP_SKB_CB(skb)->flags; |
| 1017 | TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH); | 1027 | TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); |
| 1018 | TCP_SKB_CB(buff)->flags = flags; | 1028 | TCP_SKB_CB(buff)->flags = flags; |
| 1019 | TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; | 1029 | TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; |
| 1020 | 1030 | ||
| @@ -1180,6 +1190,7 @@ void tcp_mtup_init(struct sock *sk) | |||
| 1180 | icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss); | 1190 | icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss); |
| 1181 | icsk->icsk_mtup.probe_size = 0; | 1191 | icsk->icsk_mtup.probe_size = 0; |
| 1182 | } | 1192 | } |
| 1193 | EXPORT_SYMBOL(tcp_mtup_init); | ||
| 1183 | 1194 | ||
| 1184 | /* This function synchronize snd mss to current pmtu/exthdr set. | 1195 | /* This function synchronize snd mss to current pmtu/exthdr set. |
| 1185 | 1196 | ||
| @@ -1223,6 +1234,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) | |||
| 1223 | 1234 | ||
| 1224 | return mss_now; | 1235 | return mss_now; |
| 1225 | } | 1236 | } |
| 1237 | EXPORT_SYMBOL(tcp_sync_mss); | ||
| 1226 | 1238 | ||
| 1227 | /* Compute the current effective MSS, taking SACKs and IP options, | 1239 | /* Compute the current effective MSS, taking SACKs and IP options, |
| 1228 | * and even PMTU discovery events into account. | 1240 | * and even PMTU discovery events into account. |
| @@ -1319,8 +1331,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, | |||
| 1319 | u32 in_flight, cwnd; | 1331 | u32 in_flight, cwnd; |
| 1320 | 1332 | ||
| 1321 | /* Don't be strict about the congestion window for the final FIN. */ | 1333 | /* Don't be strict about the congestion window for the final FIN. */ |
| 1322 | if ((TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && | 1334 | if ((TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && tcp_skb_pcount(skb) == 1) |
| 1323 | tcp_skb_pcount(skb) == 1) | ||
| 1324 | return 1; | 1335 | return 1; |
| 1325 | 1336 | ||
| 1326 | in_flight = tcp_packets_in_flight(tp); | 1337 | in_flight = tcp_packets_in_flight(tp); |
| @@ -1389,7 +1400,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, | |||
| 1389 | * Nagle can be ignored during F-RTO too (see RFC4138). | 1400 | * Nagle can be ignored during F-RTO too (see RFC4138). |
| 1390 | */ | 1401 | */ |
| 1391 | if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || | 1402 | if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || |
| 1392 | (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) | 1403 | (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)) |
| 1393 | return 1; | 1404 | return 1; |
| 1394 | 1405 | ||
| 1395 | if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) | 1406 | if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) |
| @@ -1452,7 +1463,7 @@ int tcp_may_send_now(struct sock *sk) | |||
| 1452 | * packet has never been sent out before (and thus is not cloned). | 1463 | * packet has never been sent out before (and thus is not cloned). |
| 1453 | */ | 1464 | */ |
| 1454 | static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | 1465 | static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, |
| 1455 | unsigned int mss_now) | 1466 | unsigned int mss_now, gfp_t gfp) |
| 1456 | { | 1467 | { |
| 1457 | struct sk_buff *buff; | 1468 | struct sk_buff *buff; |
| 1458 | int nlen = skb->len - len; | 1469 | int nlen = skb->len - len; |
| @@ -1462,7 +1473,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
| 1462 | if (skb->len != skb->data_len) | 1473 | if (skb->len != skb->data_len) |
| 1463 | return tcp_fragment(sk, skb, len, mss_now); | 1474 | return tcp_fragment(sk, skb, len, mss_now); |
| 1464 | 1475 | ||
| 1465 | buff = sk_stream_alloc_skb(sk, 0, GFP_ATOMIC); | 1476 | buff = sk_stream_alloc_skb(sk, 0, gfp); |
| 1466 | if (unlikely(buff == NULL)) | 1477 | if (unlikely(buff == NULL)) |
| 1467 | return -ENOMEM; | 1478 | return -ENOMEM; |
| 1468 | 1479 | ||
| @@ -1478,7 +1489,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
| 1478 | 1489 | ||
| 1479 | /* PSH and FIN should only be set in the second packet. */ | 1490 | /* PSH and FIN should only be set in the second packet. */ |
| 1480 | flags = TCP_SKB_CB(skb)->flags; | 1491 | flags = TCP_SKB_CB(skb)->flags; |
| 1481 | TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH); | 1492 | TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); |
| 1482 | TCP_SKB_CB(buff)->flags = flags; | 1493 | TCP_SKB_CB(buff)->flags = flags; |
| 1483 | 1494 | ||
| 1484 | /* This packet was never sent out yet, so no SACK bits. */ | 1495 | /* This packet was never sent out yet, so no SACK bits. */ |
| @@ -1509,7 +1520,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) | |||
| 1509 | const struct inet_connection_sock *icsk = inet_csk(sk); | 1520 | const struct inet_connection_sock *icsk = inet_csk(sk); |
| 1510 | u32 send_win, cong_win, limit, in_flight; | 1521 | u32 send_win, cong_win, limit, in_flight; |
| 1511 | 1522 | ||
| 1512 | if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) | 1523 | if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) |
| 1513 | goto send_now; | 1524 | goto send_now; |
| 1514 | 1525 | ||
| 1515 | if (icsk->icsk_ca_state != TCP_CA_Open) | 1526 | if (icsk->icsk_ca_state != TCP_CA_Open) |
| @@ -1635,7 +1646,7 @@ static int tcp_mtu_probe(struct sock *sk) | |||
| 1635 | 1646 | ||
| 1636 | TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; | 1647 | TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; |
| 1637 | TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; | 1648 | TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; |
| 1638 | TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK; | 1649 | TCP_SKB_CB(nskb)->flags = TCPHDR_ACK; |
| 1639 | TCP_SKB_CB(nskb)->sacked = 0; | 1650 | TCP_SKB_CB(nskb)->sacked = 0; |
| 1640 | nskb->csum = 0; | 1651 | nskb->csum = 0; |
| 1641 | nskb->ip_summed = skb->ip_summed; | 1652 | nskb->ip_summed = skb->ip_summed; |
| @@ -1660,7 +1671,7 @@ static int tcp_mtu_probe(struct sock *sk) | |||
| 1660 | sk_wmem_free_skb(sk, skb); | 1671 | sk_wmem_free_skb(sk, skb); |
| 1661 | } else { | 1672 | } else { |
| 1662 | TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & | 1673 | TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & |
| 1663 | ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); | 1674 | ~(TCPHDR_FIN|TCPHDR_PSH); |
| 1664 | if (!skb_shinfo(skb)->nr_frags) { | 1675 | if (!skb_shinfo(skb)->nr_frags) { |
| 1665 | skb_pull(skb, copy); | 1676 | skb_pull(skb, copy); |
| 1666 | if (skb->ip_summed != CHECKSUM_PARTIAL) | 1677 | if (skb->ip_summed != CHECKSUM_PARTIAL) |
| @@ -1760,7 +1771,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
| 1760 | cwnd_quota); | 1771 | cwnd_quota); |
| 1761 | 1772 | ||
| 1762 | if (skb->len > limit && | 1773 | if (skb->len > limit && |
| 1763 | unlikely(tso_fragment(sk, skb, limit, mss_now))) | 1774 | unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) |
| 1764 | break; | 1775 | break; |
| 1765 | 1776 | ||
| 1766 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 1777 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
| @@ -1794,11 +1805,6 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
| 1794 | void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, | 1805 | void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, |
| 1795 | int nonagle) | 1806 | int nonagle) |
| 1796 | { | 1807 | { |
| 1797 | struct sk_buff *skb = tcp_send_head(sk); | ||
| 1798 | |||
| 1799 | if (!skb) | ||
| 1800 | return; | ||
| 1801 | |||
| 1802 | /* If we are closed, the bytes will have to remain here. | 1808 | /* If we are closed, the bytes will have to remain here. |
| 1803 | * In time closedown will finish, we empty the write queue and | 1809 | * In time closedown will finish, we empty the write queue and |
| 1804 | * all will be happy. | 1810 | * all will be happy. |
| @@ -2016,7 +2022,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, | |||
| 2016 | 2022 | ||
| 2017 | if (!sysctl_tcp_retrans_collapse) | 2023 | if (!sysctl_tcp_retrans_collapse) |
| 2018 | return; | 2024 | return; |
| 2019 | if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) | 2025 | if (TCP_SKB_CB(skb)->flags & TCPHDR_SYN) |
| 2020 | return; | 2026 | return; |
| 2021 | 2027 | ||
| 2022 | tcp_for_write_queue_from_safe(skb, tmp, sk) { | 2028 | tcp_for_write_queue_from_safe(skb, tmp, sk) { |
| @@ -2108,7 +2114,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
| 2108 | * since it is cheap to do so and saves bytes on the network. | 2114 | * since it is cheap to do so and saves bytes on the network. |
| 2109 | */ | 2115 | */ |
| 2110 | if (skb->len > 0 && | 2116 | if (skb->len > 0 && |
| 2111 | (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && | 2117 | (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && |
| 2112 | tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { | 2118 | tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { |
| 2113 | if (!pskb_trim(skb, 0)) { | 2119 | if (!pskb_trim(skb, 0)) { |
| 2114 | /* Reuse, even though it does some unnecessary work */ | 2120 | /* Reuse, even though it does some unnecessary work */ |
| @@ -2204,6 +2210,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
| 2204 | int mib_idx; | 2210 | int mib_idx; |
| 2205 | int fwd_rexmitting = 0; | 2211 | int fwd_rexmitting = 0; |
| 2206 | 2212 | ||
| 2213 | if (!tp->packets_out) | ||
| 2214 | return; | ||
| 2215 | |||
| 2207 | if (!tp->lost_out) | 2216 | if (!tp->lost_out) |
| 2208 | tp->retransmit_high = tp->snd_una; | 2217 | tp->retransmit_high = tp->snd_una; |
| 2209 | 2218 | ||
| @@ -2297,7 +2306,7 @@ void tcp_send_fin(struct sock *sk) | |||
| 2297 | mss_now = tcp_current_mss(sk); | 2306 | mss_now = tcp_current_mss(sk); |
| 2298 | 2307 | ||
| 2299 | if (tcp_send_head(sk) != NULL) { | 2308 | if (tcp_send_head(sk) != NULL) { |
| 2300 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; | 2309 | TCP_SKB_CB(skb)->flags |= TCPHDR_FIN; |
| 2301 | TCP_SKB_CB(skb)->end_seq++; | 2310 | TCP_SKB_CB(skb)->end_seq++; |
| 2302 | tp->write_seq++; | 2311 | tp->write_seq++; |
| 2303 | } else { | 2312 | } else { |
| @@ -2314,7 +2323,7 @@ void tcp_send_fin(struct sock *sk) | |||
| 2314 | skb_reserve(skb, MAX_TCP_HEADER); | 2323 | skb_reserve(skb, MAX_TCP_HEADER); |
| 2315 | /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ | 2324 | /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ |
| 2316 | tcp_init_nondata_skb(skb, tp->write_seq, | 2325 | tcp_init_nondata_skb(skb, tp->write_seq, |
| 2317 | TCPCB_FLAG_ACK | TCPCB_FLAG_FIN); | 2326 | TCPHDR_ACK | TCPHDR_FIN); |
| 2318 | tcp_queue_skb(sk, skb); | 2327 | tcp_queue_skb(sk, skb); |
| 2319 | } | 2328 | } |
| 2320 | __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); | 2329 | __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); |
| @@ -2339,7 +2348,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) | |||
| 2339 | /* Reserve space for headers and prepare control bits. */ | 2348 | /* Reserve space for headers and prepare control bits. */ |
| 2340 | skb_reserve(skb, MAX_TCP_HEADER); | 2349 | skb_reserve(skb, MAX_TCP_HEADER); |
| 2341 | tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), | 2350 | tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), |
| 2342 | TCPCB_FLAG_ACK | TCPCB_FLAG_RST); | 2351 | TCPHDR_ACK | TCPHDR_RST); |
| 2343 | /* Send it off. */ | 2352 | /* Send it off. */ |
| 2344 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2353 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
| 2345 | if (tcp_transmit_skb(sk, skb, 0, priority)) | 2354 | if (tcp_transmit_skb(sk, skb, 0, priority)) |
| @@ -2359,11 +2368,11 @@ int tcp_send_synack(struct sock *sk) | |||
| 2359 | struct sk_buff *skb; | 2368 | struct sk_buff *skb; |
| 2360 | 2369 | ||
| 2361 | skb = tcp_write_queue_head(sk); | 2370 | skb = tcp_write_queue_head(sk); |
| 2362 | if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)) { | 2371 | if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPHDR_SYN)) { |
| 2363 | printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); | 2372 | printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); |
| 2364 | return -EFAULT; | 2373 | return -EFAULT; |
| 2365 | } | 2374 | } |
| 2366 | if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_ACK)) { | 2375 | if (!(TCP_SKB_CB(skb)->flags & TCPHDR_ACK)) { |
| 2367 | if (skb_cloned(skb)) { | 2376 | if (skb_cloned(skb)) { |
| 2368 | struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); | 2377 | struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); |
| 2369 | if (nskb == NULL) | 2378 | if (nskb == NULL) |
| @@ -2377,7 +2386,7 @@ int tcp_send_synack(struct sock *sk) | |||
| 2377 | skb = nskb; | 2386 | skb = nskb; |
| 2378 | } | 2387 | } |
| 2379 | 2388 | ||
| 2380 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK; | 2389 | TCP_SKB_CB(skb)->flags |= TCPHDR_ACK; |
| 2381 | TCP_ECN_send_synack(tcp_sk(sk), skb); | 2390 | TCP_ECN_send_synack(tcp_sk(sk), skb); |
| 2382 | } | 2391 | } |
| 2383 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2392 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
| @@ -2393,13 +2402,17 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
| 2393 | struct tcp_extend_values *xvp = tcp_xv(rvp); | 2402 | struct tcp_extend_values *xvp = tcp_xv(rvp); |
| 2394 | struct inet_request_sock *ireq = inet_rsk(req); | 2403 | struct inet_request_sock *ireq = inet_rsk(req); |
| 2395 | struct tcp_sock *tp = tcp_sk(sk); | 2404 | struct tcp_sock *tp = tcp_sk(sk); |
| 2405 | const struct tcp_cookie_values *cvp = tp->cookie_values; | ||
| 2396 | struct tcphdr *th; | 2406 | struct tcphdr *th; |
| 2397 | struct sk_buff *skb; | 2407 | struct sk_buff *skb; |
| 2398 | struct tcp_md5sig_key *md5; | 2408 | struct tcp_md5sig_key *md5; |
| 2399 | int tcp_header_size; | 2409 | int tcp_header_size; |
| 2400 | int mss; | 2410 | int mss; |
| 2411 | int s_data_desired = 0; | ||
| 2401 | 2412 | ||
| 2402 | skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); | 2413 | if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) |
| 2414 | s_data_desired = cvp->s_data_desired; | ||
| 2415 | skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15 + s_data_desired, 1, GFP_ATOMIC); | ||
| 2403 | if (skb == NULL) | 2416 | if (skb == NULL) |
| 2404 | return NULL; | 2417 | return NULL; |
| 2405 | 2418 | ||
| @@ -2422,7 +2435,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
| 2422 | &req->rcv_wnd, | 2435 | &req->rcv_wnd, |
| 2423 | &req->window_clamp, | 2436 | &req->window_clamp, |
| 2424 | ireq->wscale_ok, | 2437 | ireq->wscale_ok, |
| 2425 | &rcv_wscale); | 2438 | &rcv_wscale, |
| 2439 | dst_metric(dst, RTAX_INITRWND)); | ||
| 2426 | ireq->rcv_wscale = rcv_wscale; | 2440 | ireq->rcv_wscale = rcv_wscale; |
| 2427 | } | 2441 | } |
| 2428 | 2442 | ||
| @@ -2451,19 +2465,15 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
| 2451 | * not even correctly set) | 2465 | * not even correctly set) |
| 2452 | */ | 2466 | */ |
| 2453 | tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, | 2467 | tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, |
| 2454 | TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); | 2468 | TCPHDR_SYN | TCPHDR_ACK); |
| 2455 | 2469 | ||
| 2456 | if (OPTION_COOKIE_EXTENSION & opts.options) { | 2470 | if (OPTION_COOKIE_EXTENSION & opts.options) { |
| 2457 | const struct tcp_cookie_values *cvp = tp->cookie_values; | 2471 | if (s_data_desired) { |
| 2458 | 2472 | u8 *buf = skb_put(skb, s_data_desired); | |
| 2459 | if (cvp != NULL && | ||
| 2460 | cvp->s_data_constant && | ||
| 2461 | cvp->s_data_desired > 0) { | ||
| 2462 | u8 *buf = skb_put(skb, cvp->s_data_desired); | ||
| 2463 | 2473 | ||
| 2464 | /* copy data directly from the listening socket. */ | 2474 | /* copy data directly from the listening socket. */ |
| 2465 | memcpy(buf, cvp->s_data_payload, cvp->s_data_desired); | 2475 | memcpy(buf, cvp->s_data_payload, s_data_desired); |
| 2466 | TCP_SKB_CB(skb)->end_seq += cvp->s_data_desired; | 2476 | TCP_SKB_CB(skb)->end_seq += s_data_desired; |
| 2467 | } | 2477 | } |
| 2468 | 2478 | ||
| 2469 | if (opts.hash_size > 0) { | 2479 | if (opts.hash_size > 0) { |
| @@ -2480,7 +2490,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
| 2480 | *tail-- ^= TCP_SKB_CB(skb)->seq + 1; | 2490 | *tail-- ^= TCP_SKB_CB(skb)->seq + 1; |
| 2481 | 2491 | ||
| 2482 | /* recommended */ | 2492 | /* recommended */ |
| 2483 | *tail-- ^= ((th->dest << 16) | th->source); | 2493 | *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source); |
| 2484 | *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */ | 2494 | *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */ |
| 2485 | 2495 | ||
| 2486 | sha_transform((__u32 *)&xvp->cookie_bakery[0], | 2496 | sha_transform((__u32 *)&xvp->cookie_bakery[0], |
| @@ -2498,7 +2508,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
| 2498 | th->window = htons(min(req->rcv_wnd, 65535U)); | 2508 | th->window = htons(min(req->rcv_wnd, 65535U)); |
| 2499 | tcp_options_write((__be32 *)(th + 1), tp, &opts); | 2509 | tcp_options_write((__be32 *)(th + 1), tp, &opts); |
| 2500 | th->doff = (tcp_header_size >> 2); | 2510 | th->doff = (tcp_header_size >> 2); |
| 2501 | TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); | 2511 | TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb)); |
| 2502 | 2512 | ||
| 2503 | #ifdef CONFIG_TCP_MD5SIG | 2513 | #ifdef CONFIG_TCP_MD5SIG |
| 2504 | /* Okay, we have all we need - do the md5 hash if needed */ | 2514 | /* Okay, we have all we need - do the md5 hash if needed */ |
| @@ -2510,6 +2520,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
| 2510 | 2520 | ||
| 2511 | return skb; | 2521 | return skb; |
| 2512 | } | 2522 | } |
| 2523 | EXPORT_SYMBOL(tcp_make_synack); | ||
| 2513 | 2524 | ||
| 2514 | /* Do all connect socket setups that can be done AF independent. */ | 2525 | /* Do all connect socket setups that can be done AF independent. */ |
| 2515 | static void tcp_connect_init(struct sock *sk) | 2526 | static void tcp_connect_init(struct sock *sk) |
| @@ -2549,7 +2560,8 @@ static void tcp_connect_init(struct sock *sk) | |||
| 2549 | &tp->rcv_wnd, | 2560 | &tp->rcv_wnd, |
| 2550 | &tp->window_clamp, | 2561 | &tp->window_clamp, |
| 2551 | sysctl_tcp_window_scaling, | 2562 | sysctl_tcp_window_scaling, |
| 2552 | &rcv_wscale); | 2563 | &rcv_wscale, |
| 2564 | dst_metric(dst, RTAX_INITRWND)); | ||
| 2553 | 2565 | ||
| 2554 | tp->rx_opt.rcv_wscale = rcv_wscale; | 2566 | tp->rx_opt.rcv_wscale = rcv_wscale; |
| 2555 | tp->rcv_ssthresh = tp->rcv_wnd; | 2567 | tp->rcv_ssthresh = tp->rcv_wnd; |
| @@ -2586,7 +2598,7 @@ int tcp_connect(struct sock *sk) | |||
| 2586 | skb_reserve(buff, MAX_TCP_HEADER); | 2598 | skb_reserve(buff, MAX_TCP_HEADER); |
| 2587 | 2599 | ||
| 2588 | tp->snd_nxt = tp->write_seq; | 2600 | tp->snd_nxt = tp->write_seq; |
| 2589 | tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN); | 2601 | tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); |
| 2590 | TCP_ECN_send_syn(sk, buff); | 2602 | TCP_ECN_send_syn(sk, buff); |
| 2591 | 2603 | ||
| 2592 | /* Send it off. */ | 2604 | /* Send it off. */ |
| @@ -2611,6 +2623,7 @@ int tcp_connect(struct sock *sk) | |||
| 2611 | inet_csk(sk)->icsk_rto, TCP_RTO_MAX); | 2623 | inet_csk(sk)->icsk_rto, TCP_RTO_MAX); |
| 2612 | return 0; | 2624 | return 0; |
| 2613 | } | 2625 | } |
| 2626 | EXPORT_SYMBOL(tcp_connect); | ||
| 2614 | 2627 | ||
| 2615 | /* Send out a delayed ack, the caller does the policy checking | 2628 | /* Send out a delayed ack, the caller does the policy checking |
| 2616 | * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check() | 2629 | * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check() |
| @@ -2692,7 +2705,7 @@ void tcp_send_ack(struct sock *sk) | |||
| 2692 | 2705 | ||
| 2693 | /* Reserve space for headers and prepare control bits. */ | 2706 | /* Reserve space for headers and prepare control bits. */ |
| 2694 | skb_reserve(buff, MAX_TCP_HEADER); | 2707 | skb_reserve(buff, MAX_TCP_HEADER); |
| 2695 | tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK); | 2708 | tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); |
| 2696 | 2709 | ||
| 2697 | /* Send it off, this clears delayed acks for us. */ | 2710 | /* Send it off, this clears delayed acks for us. */ |
| 2698 | TCP_SKB_CB(buff)->when = tcp_time_stamp; | 2711 | TCP_SKB_CB(buff)->when = tcp_time_stamp; |
| @@ -2726,7 +2739,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) | |||
| 2726 | * end to send an ack. Don't queue or clone SKB, just | 2739 | * end to send an ack. Don't queue or clone SKB, just |
| 2727 | * send it. | 2740 | * send it. |
| 2728 | */ | 2741 | */ |
| 2729 | tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPCB_FLAG_ACK); | 2742 | tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); |
| 2730 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2743 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
| 2731 | return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); | 2744 | return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); |
| 2732 | } | 2745 | } |
| @@ -2756,13 +2769,13 @@ int tcp_write_wakeup(struct sock *sk) | |||
| 2756 | if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq || | 2769 | if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq || |
| 2757 | skb->len > mss) { | 2770 | skb->len > mss) { |
| 2758 | seg_size = min(seg_size, mss); | 2771 | seg_size = min(seg_size, mss); |
| 2759 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; | 2772 | TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; |
| 2760 | if (tcp_fragment(sk, skb, seg_size, mss)) | 2773 | if (tcp_fragment(sk, skb, seg_size, mss)) |
| 2761 | return -1; | 2774 | return -1; |
| 2762 | } else if (!tcp_skb_pcount(skb)) | 2775 | } else if (!tcp_skb_pcount(skb)) |
| 2763 | tcp_set_skb_tso_segs(sk, skb, mss); | 2776 | tcp_set_skb_tso_segs(sk, skb, mss); |
| 2764 | 2777 | ||
| 2765 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; | 2778 | TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; |
| 2766 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2779 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
| 2767 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); | 2780 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
| 2768 | if (!err) | 2781 | if (!err) |
| @@ -2815,10 +2828,3 @@ void tcp_send_probe0(struct sock *sk) | |||
| 2815 | TCP_RTO_MAX); | 2828 | TCP_RTO_MAX); |
| 2816 | } | 2829 | } |
| 2817 | } | 2830 | } |
| 2818 | |||
| 2819 | EXPORT_SYMBOL(tcp_select_initial_window); | ||
| 2820 | EXPORT_SYMBOL(tcp_connect); | ||
| 2821 | EXPORT_SYMBOL(tcp_make_synack); | ||
| 2822 | EXPORT_SYMBOL(tcp_simple_retransmit); | ||
| 2823 | EXPORT_SYMBOL(tcp_sync_mss); | ||
| 2824 | EXPORT_SYMBOL(tcp_mtup_init); | ||
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 9bc805df95d2..f8efada580e8 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <linux/kprobes.h> | 22 | #include <linux/kprobes.h> |
| 23 | #include <linux/socket.h> | 23 | #include <linux/socket.h> |
| 24 | #include <linux/tcp.h> | 24 | #include <linux/tcp.h> |
| 25 | #include <linux/slab.h> | ||
| 25 | #include <linux/proc_fs.h> | 26 | #include <linux/proc_fs.h> |
| 26 | #include <linux/module.h> | 27 | #include <linux/module.h> |
| 27 | #include <linux/ktime.h> | 28 | #include <linux/ktime.h> |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 8816a20c2597..74c54b30600f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | */ | 19 | */ |
| 20 | 20 | ||
| 21 | #include <linux/module.h> | 21 | #include <linux/module.h> |
| 22 | #include <linux/gfp.h> | ||
| 22 | #include <net/tcp.h> | 23 | #include <net/tcp.h> |
| 23 | 24 | ||
| 24 | int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; | 25 | int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; |
| @@ -29,6 +30,7 @@ int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; | |||
| 29 | int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; | 30 | int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; |
| 30 | int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; | 31 | int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; |
| 31 | int sysctl_tcp_orphan_retries __read_mostly; | 32 | int sysctl_tcp_orphan_retries __read_mostly; |
| 33 | int sysctl_tcp_thin_linear_timeouts __read_mostly; | ||
| 32 | 34 | ||
| 33 | static void tcp_write_timer(unsigned long); | 35 | static void tcp_write_timer(unsigned long); |
| 34 | static void tcp_delack_timer(unsigned long); | 36 | static void tcp_delack_timer(unsigned long); |
| @@ -39,7 +41,6 @@ void tcp_init_xmit_timers(struct sock *sk) | |||
| 39 | inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, | 41 | inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, |
| 40 | &tcp_keepalive_timer); | 42 | &tcp_keepalive_timer); |
| 41 | } | 43 | } |
| 42 | |||
| 43 | EXPORT_SYMBOL(tcp_init_xmit_timers); | 44 | EXPORT_SYMBOL(tcp_init_xmit_timers); |
| 44 | 45 | ||
| 45 | static void tcp_write_err(struct sock *sk) | 46 | static void tcp_write_err(struct sock *sk) |
| @@ -65,18 +66,18 @@ static void tcp_write_err(struct sock *sk) | |||
| 65 | static int tcp_out_of_resources(struct sock *sk, int do_reset) | 66 | static int tcp_out_of_resources(struct sock *sk, int do_reset) |
| 66 | { | 67 | { |
| 67 | struct tcp_sock *tp = tcp_sk(sk); | 68 | struct tcp_sock *tp = tcp_sk(sk); |
| 68 | int orphans = percpu_counter_read_positive(&tcp_orphan_count); | 69 | int shift = 0; |
| 69 | 70 | ||
| 70 | /* If peer does not open window for long time, or did not transmit | 71 | /* If peer does not open window for long time, or did not transmit |
| 71 | * anything for long time, penalize it. */ | 72 | * anything for long time, penalize it. */ |
| 72 | if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) | 73 | if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) |
| 73 | orphans <<= 1; | 74 | shift++; |
| 74 | 75 | ||
| 75 | /* If some dubious ICMP arrived, penalize even more. */ | 76 | /* If some dubious ICMP arrived, penalize even more. */ |
| 76 | if (sk->sk_err_soft) | 77 | if (sk->sk_err_soft) |
| 77 | orphans <<= 1; | 78 | shift++; |
| 78 | 79 | ||
| 79 | if (tcp_too_many_orphans(sk, orphans)) { | 80 | if (tcp_too_many_orphans(sk, shift)) { |
| 80 | if (net_ratelimit()) | 81 | if (net_ratelimit()) |
| 81 | printk(KERN_INFO "Out of socket memory\n"); | 82 | printk(KERN_INFO "Out of socket memory\n"); |
| 82 | 83 | ||
| @@ -133,14 +134,17 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) | |||
| 133 | } | 134 | } |
| 134 | 135 | ||
| 135 | /* This function calculates a "timeout" which is equivalent to the timeout of a | 136 | /* This function calculates a "timeout" which is equivalent to the timeout of a |
| 136 | * TCP connection after "boundary" unsucessful, exponentially backed-off | 137 | * TCP connection after "boundary" unsuccessful, exponentially backed-off |
| 137 | * retransmissions with an initial RTO of TCP_RTO_MIN. | 138 | * retransmissions with an initial RTO of TCP_RTO_MIN or TCP_TIMEOUT_INIT if |
| 139 | * syn_set flag is set. | ||
| 138 | */ | 140 | */ |
| 139 | static bool retransmits_timed_out(struct sock *sk, | 141 | static bool retransmits_timed_out(struct sock *sk, |
| 140 | unsigned int boundary) | 142 | unsigned int boundary, |
| 143 | bool syn_set) | ||
| 141 | { | 144 | { |
| 142 | unsigned int timeout, linear_backoff_thresh; | 145 | unsigned int timeout, linear_backoff_thresh; |
| 143 | unsigned int start_ts; | 146 | unsigned int start_ts; |
| 147 | unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN; | ||
| 144 | 148 | ||
| 145 | if (!inet_csk(sk)->icsk_retransmits) | 149 | if (!inet_csk(sk)->icsk_retransmits) |
| 146 | return false; | 150 | return false; |
| @@ -150,12 +154,12 @@ static bool retransmits_timed_out(struct sock *sk, | |||
| 150 | else | 154 | else |
| 151 | start_ts = tcp_sk(sk)->retrans_stamp; | 155 | start_ts = tcp_sk(sk)->retrans_stamp; |
| 152 | 156 | ||
| 153 | linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); | 157 | linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base); |
| 154 | 158 | ||
| 155 | if (boundary <= linear_backoff_thresh) | 159 | if (boundary <= linear_backoff_thresh) |
| 156 | timeout = ((2 << boundary) - 1) * TCP_RTO_MIN; | 160 | timeout = ((2 << boundary) - 1) * rto_base; |
| 157 | else | 161 | else |
| 158 | timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN + | 162 | timeout = ((2 << linear_backoff_thresh) - 1) * rto_base + |
| 159 | (boundary - linear_backoff_thresh) * TCP_RTO_MAX; | 163 | (boundary - linear_backoff_thresh) * TCP_RTO_MAX; |
| 160 | 164 | ||
| 161 | return (tcp_time_stamp - start_ts) >= timeout; | 165 | return (tcp_time_stamp - start_ts) >= timeout; |
| @@ -166,18 +170,19 @@ static int tcp_write_timeout(struct sock *sk) | |||
| 166 | { | 170 | { |
| 167 | struct inet_connection_sock *icsk = inet_csk(sk); | 171 | struct inet_connection_sock *icsk = inet_csk(sk); |
| 168 | int retry_until; | 172 | int retry_until; |
| 169 | bool do_reset; | 173 | bool do_reset, syn_set = 0; |
| 170 | 174 | ||
| 171 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { | 175 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { |
| 172 | if (icsk->icsk_retransmits) | 176 | if (icsk->icsk_retransmits) |
| 173 | dst_negative_advice(&sk->sk_dst_cache, sk); | 177 | dst_negative_advice(sk); |
| 174 | retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; | 178 | retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; |
| 179 | syn_set = 1; | ||
| 175 | } else { | 180 | } else { |
| 176 | if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { | 181 | if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0)) { |
| 177 | /* Black hole detection */ | 182 | /* Black hole detection */ |
| 178 | tcp_mtu_probing(icsk, sk); | 183 | tcp_mtu_probing(icsk, sk); |
| 179 | 184 | ||
| 180 | dst_negative_advice(&sk->sk_dst_cache, sk); | 185 | dst_negative_advice(sk); |
| 181 | } | 186 | } |
| 182 | 187 | ||
| 183 | retry_until = sysctl_tcp_retries2; | 188 | retry_until = sysctl_tcp_retries2; |
| @@ -186,14 +191,14 @@ static int tcp_write_timeout(struct sock *sk) | |||
| 186 | 191 | ||
| 187 | retry_until = tcp_orphan_retries(sk, alive); | 192 | retry_until = tcp_orphan_retries(sk, alive); |
| 188 | do_reset = alive || | 193 | do_reset = alive || |
| 189 | !retransmits_timed_out(sk, retry_until); | 194 | !retransmits_timed_out(sk, retry_until, 0); |
| 190 | 195 | ||
| 191 | if (tcp_out_of_resources(sk, do_reset)) | 196 | if (tcp_out_of_resources(sk, do_reset)) |
| 192 | return 1; | 197 | return 1; |
| 193 | } | 198 | } |
| 194 | } | 199 | } |
| 195 | 200 | ||
| 196 | if (retransmits_timed_out(sk, retry_until)) { | 201 | if (retransmits_timed_out(sk, retry_until, syn_set)) { |
| 197 | /* Has it gone just too far? */ | 202 | /* Has it gone just too far? */ |
| 198 | tcp_write_err(sk); | 203 | tcp_write_err(sk); |
| 199 | return 1; | 204 | return 1; |
| @@ -415,9 +420,27 @@ void tcp_retransmit_timer(struct sock *sk) | |||
| 415 | icsk->icsk_retransmits++; | 420 | icsk->icsk_retransmits++; |
| 416 | 421 | ||
| 417 | out_reset_timer: | 422 | out_reset_timer: |
| 418 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); | 423 | /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is |
| 424 | * used to reset timer, set to 0. Recalculate 'icsk_rto' as this | ||
| 425 | * might be increased if the stream oscillates between thin and thick, | ||
| 426 | * thus the old value might already be too high compared to the value | ||
| 427 | * set by 'tcp_set_rto' in tcp_input.c which resets the rto without | ||
| 428 | * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating | ||
| 429 | * exponential backoff behaviour to avoid continue hammering | ||
| 430 | * linear-timeout retransmissions into a black hole | ||
| 431 | */ | ||
| 432 | if (sk->sk_state == TCP_ESTABLISHED && | ||
| 433 | (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) && | ||
| 434 | tcp_stream_is_thin(tp) && | ||
| 435 | icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { | ||
| 436 | icsk->icsk_backoff = 0; | ||
| 437 | icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX); | ||
| 438 | } else { | ||
| 439 | /* Use normal (exponential) backoff */ | ||
| 440 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); | ||
| 441 | } | ||
| 419 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); | 442 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); |
| 420 | if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) | 443 | if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0)) |
| 421 | __sk_dst_reset(sk); | 444 | __sk_dst_reset(sk); |
| 422 | 445 | ||
| 423 | out:; | 446 | out:; |
| @@ -474,6 +497,12 @@ static void tcp_synack_timer(struct sock *sk) | |||
| 474 | TCP_TIMEOUT_INIT, TCP_RTO_MAX); | 497 | TCP_TIMEOUT_INIT, TCP_RTO_MAX); |
| 475 | } | 498 | } |
| 476 | 499 | ||
| 500 | void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req) | ||
| 501 | { | ||
| 502 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEOUTS); | ||
| 503 | } | ||
| 504 | EXPORT_SYMBOL(tcp_syn_ack_timeout); | ||
| 505 | |||
| 477 | void tcp_set_keepalive(struct sock *sk, int val) | 506 | void tcp_set_keepalive(struct sock *sk, int val) |
| 478 | { | 507 | { |
| 479 | if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) | 508 | if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) |
| @@ -491,7 +520,7 @@ static void tcp_keepalive_timer (unsigned long data) | |||
| 491 | struct sock *sk = (struct sock *) data; | 520 | struct sock *sk = (struct sock *) data; |
| 492 | struct inet_connection_sock *icsk = inet_csk(sk); | 521 | struct inet_connection_sock *icsk = inet_csk(sk); |
| 493 | struct tcp_sock *tp = tcp_sk(sk); | 522 | struct tcp_sock *tp = tcp_sk(sk); |
| 494 | __u32 elapsed; | 523 | u32 elapsed; |
| 495 | 524 | ||
| 496 | /* Only process if socket is not in use. */ | 525 | /* Only process if socket is not in use. */ |
| 497 | bh_lock_sock(sk); | 526 | bh_lock_sock(sk); |
| @@ -528,7 +557,7 @@ static void tcp_keepalive_timer (unsigned long data) | |||
| 528 | if (tp->packets_out || tcp_send_head(sk)) | 557 | if (tp->packets_out || tcp_send_head(sk)) |
| 529 | goto resched; | 558 | goto resched; |
| 530 | 559 | ||
| 531 | elapsed = tcp_time_stamp - tp->rcv_tstamp; | 560 | elapsed = keepalive_time_elapsed(tp); |
| 532 | 561 | ||
| 533 | if (elapsed >= keepalive_time_when(tp)) { | 562 | if (elapsed >= keepalive_time_when(tp)) { |
| 534 | if (icsk->icsk_probes_out >= keepalive_probes(tp)) { | 563 | if (icsk->icsk_probes_out >= keepalive_probes(tp)) { |
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index 3959e0ca456a..59186ca7808a 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <linux/mutex.h> | 8 | #include <linux/mutex.h> |
| 9 | #include <linux/netdevice.h> | 9 | #include <linux/netdevice.h> |
| 10 | #include <linux/skbuff.h> | 10 | #include <linux/skbuff.h> |
| 11 | #include <linux/slab.h> | ||
| 11 | #include <net/icmp.h> | 12 | #include <net/icmp.h> |
| 12 | #include <net/ip.h> | 13 | #include <net/ip.h> |
| 13 | #include <net/protocol.h> | 14 | #include <net/protocol.h> |
| @@ -47,7 +48,6 @@ err: | |||
| 47 | 48 | ||
| 48 | return ret; | 49 | return ret; |
| 49 | } | 50 | } |
| 50 | |||
| 51 | EXPORT_SYMBOL(xfrm4_tunnel_register); | 51 | EXPORT_SYMBOL(xfrm4_tunnel_register); |
| 52 | 52 | ||
| 53 | int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family) | 53 | int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family) |
| @@ -71,7 +71,6 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family) | |||
| 71 | 71 | ||
| 72 | return ret; | 72 | return ret; |
| 73 | } | 73 | } |
| 74 | |||
| 75 | EXPORT_SYMBOL(xfrm4_tunnel_deregister); | 74 | EXPORT_SYMBOL(xfrm4_tunnel_deregister); |
| 76 | 75 | ||
| 77 | static int tunnel4_rcv(struct sk_buff *skb) | 76 | static int tunnel4_rcv(struct sk_buff *skb) |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f0126fdd7e04..fb23c2e63b52 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
| @@ -95,6 +95,7 @@ | |||
| 95 | #include <linux/mm.h> | 95 | #include <linux/mm.h> |
| 96 | #include <linux/inet.h> | 96 | #include <linux/inet.h> |
| 97 | #include <linux/netdevice.h> | 97 | #include <linux/netdevice.h> |
| 98 | #include <linux/slab.h> | ||
| 98 | #include <net/tcp_states.h> | 99 | #include <net/tcp_states.h> |
| 99 | #include <linux/skbuff.h> | 100 | #include <linux/skbuff.h> |
| 100 | #include <linux/proc_fs.h> | 101 | #include <linux/proc_fs.h> |
| @@ -232,7 +233,8 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
| 232 | */ | 233 | */ |
| 233 | do { | 234 | do { |
| 234 | if (low <= snum && snum <= high && | 235 | if (low <= snum && snum <= high && |
| 235 | !test_bit(snum >> udptable->log, bitmap)) | 236 | !test_bit(snum >> udptable->log, bitmap) && |
| 237 | !inet_is_reserved_local_port(snum)) | ||
| 236 | goto found; | 238 | goto found; |
| 237 | snum += rand; | 239 | snum += rand; |
| 238 | } while (snum != first); | 240 | } while (snum != first); |
| @@ -306,13 +308,13 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) | |||
| 306 | static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr, | 308 | static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr, |
| 307 | unsigned int port) | 309 | unsigned int port) |
| 308 | { | 310 | { |
| 309 | return jhash_1word(saddr, net_hash_mix(net)) ^ port; | 311 | return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port; |
| 310 | } | 312 | } |
| 311 | 313 | ||
| 312 | int udp_v4_get_port(struct sock *sk, unsigned short snum) | 314 | int udp_v4_get_port(struct sock *sk, unsigned short snum) |
| 313 | { | 315 | { |
| 314 | unsigned int hash2_nulladdr = | 316 | unsigned int hash2_nulladdr = |
| 315 | udp4_portaddr_hash(sock_net(sk), INADDR_ANY, snum); | 317 | udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum); |
| 316 | unsigned int hash2_partial = | 318 | unsigned int hash2_partial = |
| 317 | udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); | 319 | udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); |
| 318 | 320 | ||
| @@ -465,14 +467,14 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, | |||
| 465 | daddr, hnum, dif, | 467 | daddr, hnum, dif, |
| 466 | hslot2, slot2); | 468 | hslot2, slot2); |
| 467 | if (!result) { | 469 | if (!result) { |
| 468 | hash2 = udp4_portaddr_hash(net, INADDR_ANY, hnum); | 470 | hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); |
| 469 | slot2 = hash2 & udptable->mask; | 471 | slot2 = hash2 & udptable->mask; |
| 470 | hslot2 = &udptable->hash2[slot2]; | 472 | hslot2 = &udptable->hash2[slot2]; |
| 471 | if (hslot->count < hslot2->count) | 473 | if (hslot->count < hslot2->count) |
| 472 | goto begin; | 474 | goto begin; |
| 473 | 475 | ||
| 474 | result = udp4_lib_lookup2(net, INADDR_ANY, sport, | 476 | result = udp4_lib_lookup2(net, saddr, sport, |
| 475 | daddr, hnum, dif, | 477 | htonl(INADDR_ANY), hnum, dif, |
| 476 | hslot2, slot2); | 478 | hslot2, slot2); |
| 477 | } | 479 | } |
| 478 | rcu_read_unlock(); | 480 | rcu_read_unlock(); |
| @@ -631,9 +633,9 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) | |||
| 631 | if (!inet->recverr) { | 633 | if (!inet->recverr) { |
| 632 | if (!harderr || sk->sk_state != TCP_ESTABLISHED) | 634 | if (!harderr || sk->sk_state != TCP_ESTABLISHED) |
| 633 | goto out; | 635 | goto out; |
| 634 | } else { | 636 | } else |
| 635 | ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); | 637 | ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); |
| 636 | } | 638 | |
| 637 | sk->sk_err = err; | 639 | sk->sk_err = err; |
| 638 | sk->sk_error_report(sk); | 640 | sk->sk_error_report(sk); |
| 639 | out: | 641 | out: |
| @@ -912,7 +914,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 912 | !sock_flag(sk, SOCK_BROADCAST)) | 914 | !sock_flag(sk, SOCK_BROADCAST)) |
| 913 | goto out; | 915 | goto out; |
| 914 | if (connected) | 916 | if (connected) |
| 915 | sk_dst_set(sk, dst_clone(&rt->u.dst)); | 917 | sk_dst_set(sk, dst_clone(&rt->dst)); |
| 916 | } | 918 | } |
| 917 | 919 | ||
| 918 | if (msg->msg_flags&MSG_CONFIRM) | 920 | if (msg->msg_flags&MSG_CONFIRM) |
| @@ -976,7 +978,7 @@ out: | |||
| 976 | return err; | 978 | return err; |
| 977 | 979 | ||
| 978 | do_confirm: | 980 | do_confirm: |
| 979 | dst_confirm(&rt->u.dst); | 981 | dst_confirm(&rt->dst); |
| 980 | if (!(msg->msg_flags&MSG_PROBE) || len) | 982 | if (!(msg->msg_flags&MSG_PROBE) || len) |
| 981 | goto back_from_confirm; | 983 | goto back_from_confirm; |
| 982 | err = 0; | 984 | err = 0; |
| @@ -1061,10 +1063,11 @@ static unsigned int first_packet_length(struct sock *sk) | |||
| 1061 | spin_unlock_bh(&rcvq->lock); | 1063 | spin_unlock_bh(&rcvq->lock); |
| 1062 | 1064 | ||
| 1063 | if (!skb_queue_empty(&list_kill)) { | 1065 | if (!skb_queue_empty(&list_kill)) { |
| 1064 | lock_sock(sk); | 1066 | bool slow = lock_sock_fast(sk); |
| 1067 | |||
| 1065 | __skb_queue_purge(&list_kill); | 1068 | __skb_queue_purge(&list_kill); |
| 1066 | sk_mem_reclaim_partial(sk); | 1069 | sk_mem_reclaim_partial(sk); |
| 1067 | release_sock(sk); | 1070 | unlock_sock_fast(sk, slow); |
| 1068 | } | 1071 | } |
| 1069 | return res; | 1072 | return res; |
| 1070 | } | 1073 | } |
| @@ -1117,10 +1120,11 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 1117 | struct inet_sock *inet = inet_sk(sk); | 1120 | struct inet_sock *inet = inet_sk(sk); |
| 1118 | struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; | 1121 | struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; |
| 1119 | struct sk_buff *skb; | 1122 | struct sk_buff *skb; |
| 1120 | unsigned int ulen, copied; | 1123 | unsigned int ulen; |
| 1121 | int peeked; | 1124 | int peeked; |
| 1122 | int err; | 1125 | int err; |
| 1123 | int is_udplite = IS_UDPLITE(sk); | 1126 | int is_udplite = IS_UDPLITE(sk); |
| 1127 | bool slow; | ||
| 1124 | 1128 | ||
| 1125 | /* | 1129 | /* |
| 1126 | * Check any passed addresses | 1130 | * Check any passed addresses |
| @@ -1138,10 +1142,9 @@ try_again: | |||
| 1138 | goto out; | 1142 | goto out; |
| 1139 | 1143 | ||
| 1140 | ulen = skb->len - sizeof(struct udphdr); | 1144 | ulen = skb->len - sizeof(struct udphdr); |
| 1141 | copied = len; | 1145 | if (len > ulen) |
| 1142 | if (copied > ulen) | 1146 | len = ulen; |
| 1143 | copied = ulen; | 1147 | else if (len < ulen) |
| 1144 | else if (copied < ulen) | ||
| 1145 | msg->msg_flags |= MSG_TRUNC; | 1148 | msg->msg_flags |= MSG_TRUNC; |
| 1146 | 1149 | ||
| 1147 | /* | 1150 | /* |
| @@ -1150,14 +1153,14 @@ try_again: | |||
| 1150 | * coverage checksum (UDP-Lite), do it before the copy. | 1153 | * coverage checksum (UDP-Lite), do it before the copy. |
| 1151 | */ | 1154 | */ |
| 1152 | 1155 | ||
| 1153 | if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { | 1156 | if (len < ulen || UDP_SKB_CB(skb)->partial_cov) { |
| 1154 | if (udp_lib_checksum_complete(skb)) | 1157 | if (udp_lib_checksum_complete(skb)) |
| 1155 | goto csum_copy_err; | 1158 | goto csum_copy_err; |
| 1156 | } | 1159 | } |
| 1157 | 1160 | ||
| 1158 | if (skb_csum_unnecessary(skb)) | 1161 | if (skb_csum_unnecessary(skb)) |
| 1159 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), | 1162 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), |
| 1160 | msg->msg_iov, copied); | 1163 | msg->msg_iov, len); |
| 1161 | else { | 1164 | else { |
| 1162 | err = skb_copy_and_csum_datagram_iovec(skb, | 1165 | err = skb_copy_and_csum_datagram_iovec(skb, |
| 1163 | sizeof(struct udphdr), | 1166 | sizeof(struct udphdr), |
| @@ -1186,7 +1189,7 @@ try_again: | |||
| 1186 | if (inet->cmsg_flags) | 1189 | if (inet->cmsg_flags) |
| 1187 | ip_cmsg_recv(msg, skb); | 1190 | ip_cmsg_recv(msg, skb); |
| 1188 | 1191 | ||
| 1189 | err = copied; | 1192 | err = len; |
| 1190 | if (flags & MSG_TRUNC) | 1193 | if (flags & MSG_TRUNC) |
| 1191 | err = ulen; | 1194 | err = ulen; |
| 1192 | 1195 | ||
| @@ -1196,10 +1199,10 @@ out: | |||
| 1196 | return err; | 1199 | return err; |
| 1197 | 1200 | ||
| 1198 | csum_copy_err: | 1201 | csum_copy_err: |
| 1199 | lock_sock(sk); | 1202 | slow = lock_sock_fast(sk); |
| 1200 | if (!skb_kill_datagram(sk, skb, flags)) | 1203 | if (!skb_kill_datagram(sk, skb, flags)) |
| 1201 | UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); | 1204 | UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); |
| 1202 | release_sock(sk); | 1205 | unlock_sock_fast(sk, slow); |
| 1203 | 1206 | ||
| 1204 | if (noblock) | 1207 | if (noblock) |
| 1205 | return -EAGAIN; | 1208 | return -EAGAIN; |
| @@ -1217,6 +1220,7 @@ int udp_disconnect(struct sock *sk, int flags) | |||
| 1217 | sk->sk_state = TCP_CLOSE; | 1220 | sk->sk_state = TCP_CLOSE; |
| 1218 | inet->inet_daddr = 0; | 1221 | inet->inet_daddr = 0; |
| 1219 | inet->inet_dport = 0; | 1222 | inet->inet_dport = 0; |
| 1223 | sock_rps_save_rxhash(sk, 0); | ||
| 1220 | sk->sk_bound_dev_if = 0; | 1224 | sk->sk_bound_dev_if = 0; |
| 1221 | if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) | 1225 | if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) |
| 1222 | inet_reset_saddr(sk); | 1226 | inet_reset_saddr(sk); |
| @@ -1256,10 +1260,57 @@ void udp_lib_unhash(struct sock *sk) | |||
| 1256 | } | 1260 | } |
| 1257 | EXPORT_SYMBOL(udp_lib_unhash); | 1261 | EXPORT_SYMBOL(udp_lib_unhash); |
| 1258 | 1262 | ||
| 1263 | /* | ||
| 1264 | * inet_rcv_saddr was changed, we must rehash secondary hash | ||
| 1265 | */ | ||
| 1266 | void udp_lib_rehash(struct sock *sk, u16 newhash) | ||
| 1267 | { | ||
| 1268 | if (sk_hashed(sk)) { | ||
| 1269 | struct udp_table *udptable = sk->sk_prot->h.udp_table; | ||
| 1270 | struct udp_hslot *hslot, *hslot2, *nhslot2; | ||
| 1271 | |||
| 1272 | hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); | ||
| 1273 | nhslot2 = udp_hashslot2(udptable, newhash); | ||
| 1274 | udp_sk(sk)->udp_portaddr_hash = newhash; | ||
| 1275 | if (hslot2 != nhslot2) { | ||
| 1276 | hslot = udp_hashslot(udptable, sock_net(sk), | ||
| 1277 | udp_sk(sk)->udp_port_hash); | ||
| 1278 | /* we must lock primary chain too */ | ||
| 1279 | spin_lock_bh(&hslot->lock); | ||
| 1280 | |||
| 1281 | spin_lock(&hslot2->lock); | ||
| 1282 | hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); | ||
| 1283 | hslot2->count--; | ||
| 1284 | spin_unlock(&hslot2->lock); | ||
| 1285 | |||
| 1286 | spin_lock(&nhslot2->lock); | ||
| 1287 | hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, | ||
| 1288 | &nhslot2->head); | ||
| 1289 | nhslot2->count++; | ||
| 1290 | spin_unlock(&nhslot2->lock); | ||
| 1291 | |||
| 1292 | spin_unlock_bh(&hslot->lock); | ||
| 1293 | } | ||
| 1294 | } | ||
| 1295 | } | ||
| 1296 | EXPORT_SYMBOL(udp_lib_rehash); | ||
| 1297 | |||
| 1298 | static void udp_v4_rehash(struct sock *sk) | ||
| 1299 | { | ||
| 1300 | u16 new_hash = udp4_portaddr_hash(sock_net(sk), | ||
| 1301 | inet_sk(sk)->inet_rcv_saddr, | ||
| 1302 | inet_sk(sk)->inet_num); | ||
| 1303 | udp_lib_rehash(sk, new_hash); | ||
| 1304 | } | ||
| 1305 | |||
| 1259 | static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | 1306 | static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) |
| 1260 | { | 1307 | { |
| 1261 | int rc = sock_queue_rcv_skb(sk, skb); | 1308 | int rc; |
| 1262 | 1309 | ||
| 1310 | if (inet_sk(sk)->inet_daddr) | ||
| 1311 | sock_rps_save_rxhash(sk, skb->rxhash); | ||
| 1312 | |||
| 1313 | rc = ip_queue_rcv_skb(sk, skb); | ||
| 1263 | if (rc < 0) { | 1314 | if (rc < 0) { |
| 1264 | int is_udplite = IS_UDPLITE(sk); | 1315 | int is_udplite = IS_UDPLITE(sk); |
| 1265 | 1316 | ||
| @@ -1367,13 +1418,19 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
| 1367 | goto drop; | 1418 | goto drop; |
| 1368 | } | 1419 | } |
| 1369 | 1420 | ||
| 1421 | |||
| 1422 | if (sk_rcvqueues_full(sk, skb)) | ||
| 1423 | goto drop; | ||
| 1424 | |||
| 1370 | rc = 0; | 1425 | rc = 0; |
| 1371 | 1426 | ||
| 1372 | bh_lock_sock(sk); | 1427 | bh_lock_sock(sk); |
| 1373 | if (!sock_owned_by_user(sk)) | 1428 | if (!sock_owned_by_user(sk)) |
| 1374 | rc = __udp_queue_rcv_skb(sk, skb); | 1429 | rc = __udp_queue_rcv_skb(sk, skb); |
| 1375 | else | 1430 | else if (sk_add_backlog(sk, skb)) { |
| 1376 | sk_add_backlog(sk, skb); | 1431 | bh_unlock_sock(sk); |
| 1432 | goto drop; | ||
| 1433 | } | ||
| 1377 | bh_unlock_sock(sk); | 1434 | bh_unlock_sock(sk); |
| 1378 | 1435 | ||
| 1379 | return rc; | 1436 | return rc; |
| @@ -1525,6 +1582,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, | |||
| 1525 | 1582 | ||
| 1526 | uh = udp_hdr(skb); | 1583 | uh = udp_hdr(skb); |
| 1527 | ulen = ntohs(uh->len); | 1584 | ulen = ntohs(uh->len); |
| 1585 | saddr = ip_hdr(skb)->saddr; | ||
| 1586 | daddr = ip_hdr(skb)->daddr; | ||
| 1587 | |||
| 1528 | if (ulen > skb->len) | 1588 | if (ulen > skb->len) |
| 1529 | goto short_packet; | 1589 | goto short_packet; |
| 1530 | 1590 | ||
| @@ -1538,9 +1598,6 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, | |||
| 1538 | if (udp4_csum_init(skb, uh, proto)) | 1598 | if (udp4_csum_init(skb, uh, proto)) |
| 1539 | goto csum_error; | 1599 | goto csum_error; |
| 1540 | 1600 | ||
| 1541 | saddr = ip_hdr(skb)->saddr; | ||
| 1542 | daddr = ip_hdr(skb)->daddr; | ||
| 1543 | |||
| 1544 | if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) | 1601 | if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) |
| 1545 | return __udp4_lib_mcast_deliver(net, skb, uh, | 1602 | return __udp4_lib_mcast_deliver(net, skb, uh, |
| 1546 | saddr, daddr, udptable); | 1603 | saddr, daddr, udptable); |
| @@ -1613,9 +1670,9 @@ int udp_rcv(struct sk_buff *skb) | |||
| 1613 | 1670 | ||
| 1614 | void udp_destroy_sock(struct sock *sk) | 1671 | void udp_destroy_sock(struct sock *sk) |
| 1615 | { | 1672 | { |
| 1616 | lock_sock(sk); | 1673 | bool slow = lock_sock_fast(sk); |
| 1617 | udp_flush_pending_frames(sk); | 1674 | udp_flush_pending_frames(sk); |
| 1618 | release_sock(sk); | 1675 | unlock_sock_fast(sk, slow); |
| 1619 | } | 1676 | } |
| 1620 | 1677 | ||
| 1621 | /* | 1678 | /* |
| @@ -1674,8 +1731,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
| 1674 | return -ENOPROTOOPT; | 1731 | return -ENOPROTOOPT; |
| 1675 | if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ | 1732 | if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ |
| 1676 | val = 8; | 1733 | val = 8; |
| 1677 | else if (val > USHORT_MAX) | 1734 | else if (val > USHRT_MAX) |
| 1678 | val = USHORT_MAX; | 1735 | val = USHRT_MAX; |
| 1679 | up->pcslen = val; | 1736 | up->pcslen = val; |
| 1680 | up->pcflag |= UDPLITE_SEND_CC; | 1737 | up->pcflag |= UDPLITE_SEND_CC; |
| 1681 | break; | 1738 | break; |
| @@ -1688,8 +1745,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
| 1688 | return -ENOPROTOOPT; | 1745 | return -ENOPROTOOPT; |
| 1689 | if (val != 0 && val < 8) /* Avoid silly minimal values. */ | 1746 | if (val != 0 && val < 8) /* Avoid silly minimal values. */ |
| 1690 | val = 8; | 1747 | val = 8; |
| 1691 | else if (val > USHORT_MAX) | 1748 | else if (val > USHRT_MAX) |
| 1692 | val = USHORT_MAX; | 1749 | val = USHRT_MAX; |
| 1693 | up->pcrlen = val; | 1750 | up->pcrlen = val; |
| 1694 | up->pcflag |= UDPLITE_RECV_CC; | 1751 | up->pcflag |= UDPLITE_RECV_CC; |
| 1695 | break; | 1752 | break; |
| @@ -1829,6 +1886,7 @@ struct proto udp_prot = { | |||
| 1829 | .backlog_rcv = __udp_queue_rcv_skb, | 1886 | .backlog_rcv = __udp_queue_rcv_skb, |
| 1830 | .hash = udp_lib_hash, | 1887 | .hash = udp_lib_hash, |
| 1831 | .unhash = udp_lib_unhash, | 1888 | .unhash = udp_lib_unhash, |
| 1889 | .rehash = udp_v4_rehash, | ||
| 1832 | .get_port = udp_v4_get_port, | 1890 | .get_port = udp_v4_get_port, |
| 1833 | .memory_allocated = &udp_memory_allocated, | 1891 | .memory_allocated = &udp_memory_allocated, |
| 1834 | .sysctl_mem = sysctl_udp_mem, | 1892 | .sysctl_mem = sysctl_udp_mem, |
| @@ -2027,12 +2085,12 @@ static struct udp_seq_afinfo udp4_seq_afinfo = { | |||
| 2027 | }, | 2085 | }, |
| 2028 | }; | 2086 | }; |
| 2029 | 2087 | ||
| 2030 | static int udp4_proc_init_net(struct net *net) | 2088 | static int __net_init udp4_proc_init_net(struct net *net) |
| 2031 | { | 2089 | { |
| 2032 | return udp_proc_register(net, &udp4_seq_afinfo); | 2090 | return udp_proc_register(net, &udp4_seq_afinfo); |
| 2033 | } | 2091 | } |
| 2034 | 2092 | ||
| 2035 | static void udp4_proc_exit_net(struct net *net) | 2093 | static void __net_exit udp4_proc_exit_net(struct net *net) |
| 2036 | { | 2094 | { |
| 2037 | udp_proc_unregister(net, &udp4_seq_afinfo); | 2095 | udp_proc_unregister(net, &udp4_seq_afinfo); |
| 2038 | } | 2096 | } |
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 66f79513f4a5..ab76aa928fa9 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c | |||
| @@ -58,6 +58,7 @@ struct proto udplite_prot = { | |||
| 58 | .compat_getsockopt = compat_udp_getsockopt, | 58 | .compat_getsockopt = compat_udp_getsockopt, |
| 59 | #endif | 59 | #endif |
| 60 | }; | 60 | }; |
| 61 | EXPORT_SYMBOL(udplite_prot); | ||
| 61 | 62 | ||
| 62 | static struct inet_protosw udplite4_protosw = { | 63 | static struct inet_protosw udplite4_protosw = { |
| 63 | .type = SOCK_DGRAM, | 64 | .type = SOCK_DGRAM, |
| @@ -81,12 +82,12 @@ static struct udp_seq_afinfo udplite4_seq_afinfo = { | |||
| 81 | }, | 82 | }, |
| 82 | }; | 83 | }; |
| 83 | 84 | ||
| 84 | static int udplite4_proc_init_net(struct net *net) | 85 | static int __net_init udplite4_proc_init_net(struct net *net) |
| 85 | { | 86 | { |
| 86 | return udp_proc_register(net, &udplite4_seq_afinfo); | 87 | return udp_proc_register(net, &udplite4_seq_afinfo); |
| 87 | } | 88 | } |
| 88 | 89 | ||
| 89 | static void udplite4_proc_exit_net(struct net *net) | 90 | static void __net_exit udplite4_proc_exit_net(struct net *net) |
| 90 | { | 91 | { |
| 91 | udp_proc_unregister(net, &udplite4_seq_afinfo); | 92 | udp_proc_unregister(net, &udplite4_seq_afinfo); |
| 92 | } | 93 | } |
| @@ -127,5 +128,3 @@ out_unregister_proto: | |||
| 127 | out_register_err: | 128 | out_register_err: |
| 128 | printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__); | 129 | printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__); |
| 129 | } | 130 | } |
| 130 | |||
| 131 | EXPORT_SYMBOL(udplite_prot); | ||
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index f9f922a0ba88..06814b6216dc 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | * | 9 | * |
| 10 | */ | 10 | */ |
| 11 | 11 | ||
| 12 | #include <linux/slab.h> | ||
| 12 | #include <linux/module.h> | 13 | #include <linux/module.h> |
| 13 | #include <linux/string.h> | 14 | #include <linux/string.h> |
| 14 | #include <linux/netfilter.h> | 15 | #include <linux/netfilter.h> |
| @@ -26,8 +27,8 @@ static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) | |||
| 26 | if (skb_dst(skb) == NULL) { | 27 | if (skb_dst(skb) == NULL) { |
| 27 | const struct iphdr *iph = ip_hdr(skb); | 28 | const struct iphdr *iph = ip_hdr(skb); |
| 28 | 29 | ||
| 29 | if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, | 30 | if (ip_route_input_noref(skb, iph->daddr, iph->saddr, |
| 30 | skb->dev)) | 31 | iph->tos, skb->dev)) |
| 31 | goto drop; | 32 | goto drop; |
| 32 | } | 33 | } |
| 33 | return dst_input(skb); | 34 | return dst_input(skb); |
| @@ -60,7 +61,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) | |||
| 60 | iph->tot_len = htons(skb->len); | 61 | iph->tot_len = htons(skb->len); |
| 61 | ip_send_check(iph); | 62 | ip_send_check(iph); |
| 62 | 63 | ||
| 63 | NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, | 64 | NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, |
| 64 | xfrm4_rcv_encap_finish); | 65 | xfrm4_rcv_encap_finish); |
| 65 | return 0; | 66 | return 0; |
| 66 | } | 67 | } |
| @@ -162,5 +163,4 @@ int xfrm4_rcv(struct sk_buff *skb) | |||
| 162 | { | 163 | { |
| 163 | return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0); | 164 | return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0); |
| 164 | } | 165 | } |
| 165 | |||
| 166 | EXPORT_SYMBOL(xfrm4_rcv); | 166 | EXPORT_SYMBOL(xfrm4_rcv); |
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 3444f3b34eca..6f368413eb0e 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au> | 4 | * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au> |
| 5 | */ | 5 | */ |
| 6 | 6 | ||
| 7 | #include <linux/gfp.h> | ||
| 7 | #include <linux/init.h> | 8 | #include <linux/init.h> |
| 8 | #include <linux/kernel.h> | 9 | #include <linux/kernel.h> |
| 9 | #include <linux/module.h> | 10 | #include <linux/module.h> |
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index c908bd99bcba..571aa96a175c 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c | |||
| @@ -86,7 +86,7 @@ static int xfrm4_output_finish(struct sk_buff *skb) | |||
| 86 | 86 | ||
| 87 | int xfrm4_output(struct sk_buff *skb) | 87 | int xfrm4_output(struct sk_buff *skb) |
| 88 | { | 88 | { |
| 89 | return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, | 89 | return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, |
| 90 | NULL, skb_dst(skb)->dev, xfrm4_output_finish, | 90 | NULL, skb_dst(skb)->dev, xfrm4_output_finish, |
| 91 | !(IPCB(skb)->flags & IPSKB_REROUTED)); | 91 | !(IPCB(skb)->flags & IPSKB_REROUTED)); |
| 92 | } | 92 | } |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 67107d63c1cd..a580349f0b8a 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
| @@ -37,7 +37,7 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, | |||
| 37 | fl.fl4_src = saddr->a4; | 37 | fl.fl4_src = saddr->a4; |
| 38 | 38 | ||
| 39 | err = __ip_route_output_key(net, &rt, &fl); | 39 | err = __ip_route_output_key(net, &rt, &fl); |
| 40 | dst = &rt->u.dst; | 40 | dst = &rt->dst; |
| 41 | if (err) | 41 | if (err) |
| 42 | dst = ERR_PTR(err); | 42 | dst = ERR_PTR(err); |
| 43 | return dst; | 43 | return dst; |
| @@ -59,30 +59,9 @@ static int xfrm4_get_saddr(struct net *net, | |||
| 59 | return 0; | 59 | return 0; |
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | static struct dst_entry * | ||
| 63 | __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) | ||
| 64 | { | ||
| 65 | struct dst_entry *dst; | ||
| 66 | |||
| 67 | read_lock_bh(&policy->lock); | ||
| 68 | for (dst = policy->bundles; dst; dst = dst->next) { | ||
| 69 | struct xfrm_dst *xdst = (struct xfrm_dst *)dst; | ||
| 70 | if (xdst->u.rt.fl.oif == fl->oif && /*XXX*/ | ||
| 71 | xdst->u.rt.fl.fl4_dst == fl->fl4_dst && | ||
| 72 | xdst->u.rt.fl.fl4_src == fl->fl4_src && | ||
| 73 | xdst->u.rt.fl.fl4_tos == fl->fl4_tos && | ||
| 74 | xfrm_bundle_ok(policy, xdst, fl, AF_INET, 0)) { | ||
| 75 | dst_clone(dst); | ||
| 76 | break; | ||
| 77 | } | ||
| 78 | } | ||
| 79 | read_unlock_bh(&policy->lock); | ||
| 80 | return dst; | ||
| 81 | } | ||
| 82 | |||
| 83 | static int xfrm4_get_tos(struct flowi *fl) | 62 | static int xfrm4_get_tos(struct flowi *fl) |
| 84 | { | 63 | { |
| 85 | return fl->fl4_tos; | 64 | return IPTOS_RT_MASK & fl->fl4_tos; /* Strip ECN bits */ |
| 86 | } | 65 | } |
| 87 | 66 | ||
| 88 | static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, | 67 | static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, |
| @@ -91,11 +70,12 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, | |||
| 91 | return 0; | 70 | return 0; |
| 92 | } | 71 | } |
| 93 | 72 | ||
| 94 | static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) | 73 | static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, |
| 74 | struct flowi *fl) | ||
| 95 | { | 75 | { |
| 96 | struct rtable *rt = (struct rtable *)xdst->route; | 76 | struct rtable *rt = (struct rtable *)xdst->route; |
| 97 | 77 | ||
| 98 | xdst->u.rt.fl = rt->fl; | 78 | xdst->u.rt.fl = *fl; |
| 99 | 79 | ||
| 100 | xdst->u.dst.dev = dev; | 80 | xdst->u.dst.dev = dev; |
| 101 | dev_hold(dev); | 81 | dev_hold(dev); |
| @@ -128,6 +108,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
| 128 | u8 *xprth = skb_network_header(skb) + iph->ihl * 4; | 108 | u8 *xprth = skb_network_header(skb) + iph->ihl * 4; |
| 129 | 109 | ||
| 130 | memset(fl, 0, sizeof(struct flowi)); | 110 | memset(fl, 0, sizeof(struct flowi)); |
| 111 | fl->mark = skb->mark; | ||
| 112 | |||
| 131 | if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { | 113 | if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { |
| 132 | switch (iph->protocol) { | 114 | switch (iph->protocol) { |
| 133 | case IPPROTO_UDP: | 115 | case IPPROTO_UDP: |
| @@ -258,7 +240,6 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { | |||
| 258 | .dst_ops = &xfrm4_dst_ops, | 240 | .dst_ops = &xfrm4_dst_ops, |
| 259 | .dst_lookup = xfrm4_dst_lookup, | 241 | .dst_lookup = xfrm4_dst_lookup, |
| 260 | .get_saddr = xfrm4_get_saddr, | 242 | .get_saddr = xfrm4_get_saddr, |
| 261 | .find_bundle = __xfrm4_find_bundle, | ||
| 262 | .decode_session = _decode_session4, | 243 | .decode_session = _decode_session4, |
| 263 | .get_tos = xfrm4_get_tos, | 244 | .get_tos = xfrm4_get_tos, |
| 264 | .init_path = xfrm4_init_path, | 245 | .init_path = xfrm4_init_path, |
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 1ef1366a0a03..47947624eccc 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c | |||
| @@ -21,21 +21,25 @@ static int xfrm4_init_flags(struct xfrm_state *x) | |||
| 21 | } | 21 | } |
| 22 | 22 | ||
| 23 | static void | 23 | static void |
| 24 | __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, | 24 | __xfrm4_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) |
| 25 | struct xfrm_tmpl *tmpl, | 25 | { |
| 26 | xfrm_address_t *daddr, xfrm_address_t *saddr) | 26 | sel->daddr.a4 = fl->fl4_dst; |
| 27 | sel->saddr.a4 = fl->fl4_src; | ||
| 28 | sel->dport = xfrm_flowi_dport(fl); | ||
| 29 | sel->dport_mask = htons(0xffff); | ||
| 30 | sel->sport = xfrm_flowi_sport(fl); | ||
| 31 | sel->sport_mask = htons(0xffff); | ||
| 32 | sel->family = AF_INET; | ||
| 33 | sel->prefixlen_d = 32; | ||
| 34 | sel->prefixlen_s = 32; | ||
| 35 | sel->proto = fl->proto; | ||
| 36 | sel->ifindex = fl->oif; | ||
| 37 | } | ||
| 38 | |||
| 39 | static void | ||
| 40 | xfrm4_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, | ||
| 41 | xfrm_address_t *daddr, xfrm_address_t *saddr) | ||
| 27 | { | 42 | { |
| 28 | x->sel.daddr.a4 = fl->fl4_dst; | ||
| 29 | x->sel.saddr.a4 = fl->fl4_src; | ||
| 30 | x->sel.dport = xfrm_flowi_dport(fl); | ||
| 31 | x->sel.dport_mask = htons(0xffff); | ||
| 32 | x->sel.sport = xfrm_flowi_sport(fl); | ||
| 33 | x->sel.sport_mask = htons(0xffff); | ||
| 34 | x->sel.family = AF_INET; | ||
| 35 | x->sel.prefixlen_d = 32; | ||
| 36 | x->sel.prefixlen_s = 32; | ||
| 37 | x->sel.proto = fl->proto; | ||
| 38 | x->sel.ifindex = fl->oif; | ||
| 39 | x->id = tmpl->id; | 43 | x->id = tmpl->id; |
| 40 | if (x->id.daddr.a4 == 0) | 44 | if (x->id.daddr.a4 == 0) |
| 41 | x->id.daddr.a4 = daddr->a4; | 45 | x->id.daddr.a4 = daddr->a4; |
| @@ -70,6 +74,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = { | |||
| 70 | .owner = THIS_MODULE, | 74 | .owner = THIS_MODULE, |
| 71 | .init_flags = xfrm4_init_flags, | 75 | .init_flags = xfrm4_init_flags, |
| 72 | .init_tempsel = __xfrm4_init_tempsel, | 76 | .init_tempsel = __xfrm4_init_tempsel, |
| 77 | .init_temprop = xfrm4_init_temprop, | ||
| 73 | .output = xfrm4_output, | 78 | .output = xfrm4_output, |
| 74 | .extract_input = xfrm4_extract_input, | 79 | .extract_input = xfrm4_extract_input, |
| 75 | .extract_output = xfrm4_extract_output, | 80 | .extract_output = xfrm4_extract_output, |
