diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/Kconfig | 10 | ||||
-rw-r--r-- | net/ipv4/Makefile | 3 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 19 | ||||
-rw-r--r-- | net/ipv4/arp.c | 9 | ||||
-rw-r--r-- | net/ipv4/devinet.c | 21 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 101 | ||||
-rw-r--r-- | net/ipv4/igmp.c | 39 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 2 | ||||
-rw-r--r-- | net/ipv4/ip_options.c | 5 | ||||
-rw-r--r-- | net/ipv4/ipconfig.c | 7 | ||||
-rw-r--r-- | net/ipv4/netfilter/arp_tables.c | 5 | ||||
-rw-r--r-- | net/ipv4/netfilter/ipt_MASQUERADE.c | 14 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 2 | ||||
-rw-r--r-- | net/ipv4/proc.c | 5 | ||||
-rw-r--r-- | net/ipv4/route.c | 75 | ||||
-rw-r--r-- | net/ipv4/syncookies.c | 11 | ||||
-rw-r--r-- | net/ipv4/tcp_cubic.c | 35 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 30 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 1 | ||||
-rw-r--r-- | net/ipv4/udp.c | 1090 | ||||
-rw-r--r-- | net/ipv4/udp_ipv4.c | 1134 | ||||
-rw-r--r-- | net/ipv4/udplite_ipv4.c (renamed from net/ipv4/udplite.c) | 0 |
24 files changed, 1381 insertions, 1242 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 9c7e5ffb223d..5098fd2ff4d0 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -632,5 +632,15 @@ config TCP_MD5SIG | |||
632 | 632 | ||
633 | If unsure, say N. | 633 | If unsure, say N. |
634 | 634 | ||
635 | config IP_UDPLITE | ||
636 | bool "IP: UDP-Lite Protocol (RFC 3828)" | ||
637 | default n | ||
638 | ---help--- | ||
639 | UDP-Lite (RFC 3828) is a UDP-like protocol with variable-length | ||
640 | checksum. Read <file:Documentation/networking/udplite.txt> for | ||
641 | details. | ||
642 | |||
643 | If unsure, say N. | ||
644 | |||
635 | source "net/ipv4/ipvs/Kconfig" | 645 | source "net/ipv4/ipvs/Kconfig" |
636 | 646 | ||
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ad40ef3f9ebc..d5226241d5ed 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile | |||
@@ -8,7 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \ | |||
8 | inet_timewait_sock.o inet_connection_sock.o \ | 8 | inet_timewait_sock.o inet_connection_sock.o \ |
9 | tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ | 9 | tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ |
10 | tcp_minisocks.o tcp_cong.o \ | 10 | tcp_minisocks.o tcp_cong.o \ |
11 | datagram.o raw.o udp.o udplite.o \ | 11 | datagram.o raw.o udp.o udp_ipv4.o \ |
12 | arp.o icmp.o devinet.o af_inet.o igmp.o \ | 12 | arp.o icmp.o devinet.o af_inet.o igmp.o \ |
13 | fib_frontend.o fib_semantics.o \ | 13 | fib_frontend.o fib_semantics.o \ |
14 | inet_fragment.o | 14 | inet_fragment.o |
@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o | |||
49 | obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o | 49 | obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o |
50 | obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o | 50 | obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o |
51 | obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o | 51 | obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o |
52 | obj-$(CONFIG_IP_UDPLITE) += udplite_ipv4.o | ||
52 | obj-$(CONFIG_NETLABEL) += cipso_ipv4.o | 53 | obj-$(CONFIG_NETLABEL) += cipso_ipv4.o |
53 | 54 | ||
54 | obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ | 55 | obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 09ca5293d08f..67260c0eaaa8 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -784,6 +784,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
784 | { | 784 | { |
785 | struct sock *sk = sock->sk; | 785 | struct sock *sk = sock->sk; |
786 | int err = 0; | 786 | int err = 0; |
787 | struct net *net = sk->sk_net; | ||
787 | 788 | ||
788 | switch (cmd) { | 789 | switch (cmd) { |
789 | case SIOCGSTAMP: | 790 | case SIOCGSTAMP: |
@@ -795,12 +796,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
795 | case SIOCADDRT: | 796 | case SIOCADDRT: |
796 | case SIOCDELRT: | 797 | case SIOCDELRT: |
797 | case SIOCRTMSG: | 798 | case SIOCRTMSG: |
798 | err = ip_rt_ioctl(sk->sk_net, cmd, (void __user *)arg); | 799 | err = ip_rt_ioctl(net, cmd, (void __user *)arg); |
799 | break; | 800 | break; |
800 | case SIOCDARP: | 801 | case SIOCDARP: |
801 | case SIOCGARP: | 802 | case SIOCGARP: |
802 | case SIOCSARP: | 803 | case SIOCSARP: |
803 | err = arp_ioctl(sk->sk_net, cmd, (void __user *)arg); | 804 | err = arp_ioctl(net, cmd, (void __user *)arg); |
804 | break; | 805 | break; |
805 | case SIOCGIFADDR: | 806 | case SIOCGIFADDR: |
806 | case SIOCSIFADDR: | 807 | case SIOCSIFADDR: |
@@ -813,7 +814,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
813 | case SIOCSIFPFLAGS: | 814 | case SIOCSIFPFLAGS: |
814 | case SIOCGIFPFLAGS: | 815 | case SIOCGIFPFLAGS: |
815 | case SIOCSIFFLAGS: | 816 | case SIOCSIFFLAGS: |
816 | err = devinet_ioctl(cmd, (void __user *)arg); | 817 | err = devinet_ioctl(net, cmd, (void __user *)arg); |
817 | break; | 818 | break; |
818 | default: | 819 | default: |
819 | if (sk->sk_prot->ioctl) | 820 | if (sk->sk_prot->ioctl) |
@@ -1316,15 +1317,18 @@ static int __init init_ipv4_mibs(void) | |||
1316 | if (snmp_mib_init((void **)udp_statistics, | 1317 | if (snmp_mib_init((void **)udp_statistics, |
1317 | sizeof(struct udp_mib)) < 0) | 1318 | sizeof(struct udp_mib)) < 0) |
1318 | goto err_udp_mib; | 1319 | goto err_udp_mib; |
1320 | #ifdef CONFIG_IP_UDPLITE | ||
1319 | if (snmp_mib_init((void **)udplite_statistics, | 1321 | if (snmp_mib_init((void **)udplite_statistics, |
1320 | sizeof(struct udp_mib)) < 0) | 1322 | sizeof(struct udp_mib)) < 0) |
1321 | goto err_udplite_mib; | 1323 | goto err_udplite_mib; |
1322 | 1324 | #endif | |
1323 | tcp_mib_init(); | 1325 | tcp_mib_init(); |
1324 | 1326 | ||
1325 | return 0; | 1327 | return 0; |
1326 | 1328 | ||
1329 | #ifdef CONFIG_IP_UDPLITE | ||
1327 | err_udplite_mib: | 1330 | err_udplite_mib: |
1331 | #endif | ||
1328 | snmp_mib_free((void **)udp_statistics); | 1332 | snmp_mib_free((void **)udp_statistics); |
1329 | err_udp_mib: | 1333 | err_udp_mib: |
1330 | snmp_mib_free((void **)tcp_statistics); | 1334 | snmp_mib_free((void **)tcp_statistics); |
@@ -1414,7 +1418,7 @@ static int __init inet_init(void) | |||
1414 | 1418 | ||
1415 | ip_init(); | 1419 | ip_init(); |
1416 | 1420 | ||
1417 | tcp_v4_init(&inet_family_ops); | 1421 | tcp_v4_init(); |
1418 | 1422 | ||
1419 | /* Setup TCP slab cache for open requests. */ | 1423 | /* Setup TCP slab cache for open requests. */ |
1420 | tcp_init(); | 1424 | tcp_init(); |
@@ -1422,14 +1426,17 @@ static int __init inet_init(void) | |||
1422 | /* Setup UDP memory threshold */ | 1426 | /* Setup UDP memory threshold */ |
1423 | udp_init(); | 1427 | udp_init(); |
1424 | 1428 | ||
1429 | #ifdef CONFIG_IP_UDPLITE | ||
1425 | /* Add UDP-Lite (RFC 3828) */ | 1430 | /* Add UDP-Lite (RFC 3828) */ |
1426 | udplite4_register(); | 1431 | udplite4_register(); |
1432 | #endif | ||
1427 | 1433 | ||
1428 | /* | 1434 | /* |
1429 | * Set the ICMP layer up | 1435 | * Set the ICMP layer up |
1430 | */ | 1436 | */ |
1431 | 1437 | ||
1432 | icmp_init(&inet_family_ops); | 1438 | if (icmp_init() < 0) |
1439 | panic("Failed to create the ICMP control socket.\n"); | ||
1433 | 1440 | ||
1434 | /* | 1441 | /* |
1435 | * Initialise the multicast router | 1442 | * Initialise the multicast router |
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 8e17f65f4002..69e80bd9774a 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -570,14 +570,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, | |||
570 | * Allocate a buffer | 570 | * Allocate a buffer |
571 | */ | 571 | */ |
572 | 572 | ||
573 | skb = alloc_skb(sizeof(struct arphdr)+ 2*(dev->addr_len+4) | 573 | skb = alloc_skb(arp_hdr_len(dev) + LL_RESERVED_SPACE(dev), GFP_ATOMIC); |
574 | + LL_RESERVED_SPACE(dev), GFP_ATOMIC); | ||
575 | if (skb == NULL) | 574 | if (skb == NULL) |
576 | return NULL; | 575 | return NULL; |
577 | 576 | ||
578 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); | 577 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); |
579 | skb_reset_network_header(skb); | 578 | skb_reset_network_header(skb); |
580 | arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4)); | 579 | arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev)); |
581 | skb->dev = dev; | 580 | skb->dev = dev; |
582 | skb->protocol = htons(ETH_P_ARP); | 581 | skb->protocol = htons(ETH_P_ARP); |
583 | if (src_hw == NULL) | 582 | if (src_hw == NULL) |
@@ -916,9 +915,7 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev, | |||
916 | goto freeskb; | 915 | goto freeskb; |
917 | 916 | ||
918 | /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ | 917 | /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ |
919 | if (!pskb_may_pull(skb, (sizeof(struct arphdr) + | 918 | if (!pskb_may_pull(skb, arp_hdr_len(dev))) |
920 | (2 * dev->addr_len) + | ||
921 | (2 * sizeof(u32))))) | ||
922 | goto freeskb; | 919 | goto freeskb; |
923 | 920 | ||
924 | arp = arp_hdr(skb); | 921 | arp = arp_hdr(skb); |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 87490f7bb0f7..4a10dbbbe0a1 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -446,9 +446,6 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg | |||
446 | 446 | ||
447 | ASSERT_RTNL(); | 447 | ASSERT_RTNL(); |
448 | 448 | ||
449 | if (net != &init_net) | ||
450 | return -EINVAL; | ||
451 | |||
452 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); | 449 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); |
453 | if (err < 0) | 450 | if (err < 0) |
454 | goto errout; | 451 | goto errout; |
@@ -560,9 +557,6 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg | |||
560 | 557 | ||
561 | ASSERT_RTNL(); | 558 | ASSERT_RTNL(); |
562 | 559 | ||
563 | if (net != &init_net) | ||
564 | return -EINVAL; | ||
565 | |||
566 | ifa = rtm_to_ifaddr(net, nlh); | 560 | ifa = rtm_to_ifaddr(net, nlh); |
567 | if (IS_ERR(ifa)) | 561 | if (IS_ERR(ifa)) |
568 | return PTR_ERR(ifa); | 562 | return PTR_ERR(ifa); |
@@ -595,7 +589,7 @@ static __inline__ int inet_abc_len(__be32 addr) | |||
595 | } | 589 | } |
596 | 590 | ||
597 | 591 | ||
598 | int devinet_ioctl(unsigned int cmd, void __user *arg) | 592 | int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) |
599 | { | 593 | { |
600 | struct ifreq ifr; | 594 | struct ifreq ifr; |
601 | struct sockaddr_in sin_orig; | 595 | struct sockaddr_in sin_orig; |
@@ -624,7 +618,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) | |||
624 | *colon = 0; | 618 | *colon = 0; |
625 | 619 | ||
626 | #ifdef CONFIG_KMOD | 620 | #ifdef CONFIG_KMOD |
627 | dev_load(&init_net, ifr.ifr_name); | 621 | dev_load(net, ifr.ifr_name); |
628 | #endif | 622 | #endif |
629 | 623 | ||
630 | switch (cmd) { | 624 | switch (cmd) { |
@@ -665,7 +659,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) | |||
665 | rtnl_lock(); | 659 | rtnl_lock(); |
666 | 660 | ||
667 | ret = -ENODEV; | 661 | ret = -ENODEV; |
668 | if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL) | 662 | if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL) |
669 | goto done; | 663 | goto done; |
670 | 664 | ||
671 | if (colon) | 665 | if (colon) |
@@ -878,6 +872,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) | |||
878 | { | 872 | { |
879 | __be32 addr = 0; | 873 | __be32 addr = 0; |
880 | struct in_device *in_dev; | 874 | struct in_device *in_dev; |
875 | struct net *net = dev->nd_net; | ||
881 | 876 | ||
882 | rcu_read_lock(); | 877 | rcu_read_lock(); |
883 | in_dev = __in_dev_get_rcu(dev); | 878 | in_dev = __in_dev_get_rcu(dev); |
@@ -906,7 +901,7 @@ no_in_dev: | |||
906 | */ | 901 | */ |
907 | read_lock(&dev_base_lock); | 902 | read_lock(&dev_base_lock); |
908 | rcu_read_lock(); | 903 | rcu_read_lock(); |
909 | for_each_netdev(&init_net, dev) { | 904 | for_each_netdev(net, dev) { |
910 | if ((in_dev = __in_dev_get_rcu(dev)) == NULL) | 905 | if ((in_dev = __in_dev_get_rcu(dev)) == NULL) |
911 | continue; | 906 | continue; |
912 | 907 | ||
@@ -1045,9 +1040,6 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, | |||
1045 | struct net_device *dev = ptr; | 1040 | struct net_device *dev = ptr; |
1046 | struct in_device *in_dev = __in_dev_get_rtnl(dev); | 1041 | struct in_device *in_dev = __in_dev_get_rtnl(dev); |
1047 | 1042 | ||
1048 | if (dev->nd_net != &init_net) | ||
1049 | return NOTIFY_DONE; | ||
1050 | |||
1051 | ASSERT_RTNL(); | 1043 | ASSERT_RTNL(); |
1052 | 1044 | ||
1053 | if (!in_dev) { | 1045 | if (!in_dev) { |
@@ -1173,9 +1165,6 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) | |||
1173 | struct in_ifaddr *ifa; | 1165 | struct in_ifaddr *ifa; |
1174 | int s_ip_idx, s_idx = cb->args[0]; | 1166 | int s_ip_idx, s_idx = cb->args[0]; |
1175 | 1167 | ||
1176 | if (net != &init_net) | ||
1177 | return 0; | ||
1178 | |||
1179 | s_ip_idx = ip_idx = cb->args[1]; | 1168 | s_ip_idx = ip_idx = cb->args[1]; |
1180 | idx = 0; | 1169 | idx = 0; |
1181 | for_each_netdev(net, dev) { | 1170 | for_each_netdev(net, dev) { |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index a13c074dac09..cee77d606fbe 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -229,14 +229,16 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; | |||
229 | * | 229 | * |
230 | * On SMP we have one ICMP socket per-cpu. | 230 | * On SMP we have one ICMP socket per-cpu. |
231 | */ | 231 | */ |
232 | static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL; | 232 | static struct sock *icmp_sk(struct net *net) |
233 | #define icmp_socket __get_cpu_var(__icmp_socket) | 233 | { |
234 | return net->ipv4.icmp_sk[smp_processor_id()]; | ||
235 | } | ||
234 | 236 | ||
235 | static inline int icmp_xmit_lock(void) | 237 | static inline int icmp_xmit_lock(struct sock *sk) |
236 | { | 238 | { |
237 | local_bh_disable(); | 239 | local_bh_disable(); |
238 | 240 | ||
239 | if (unlikely(!spin_trylock(&icmp_socket->sk->sk_lock.slock))) { | 241 | if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { |
240 | /* This can happen if the output path signals a | 242 | /* This can happen if the output path signals a |
241 | * dst_link_failure() for an outgoing ICMP packet. | 243 | * dst_link_failure() for an outgoing ICMP packet. |
242 | */ | 244 | */ |
@@ -246,9 +248,9 @@ static inline int icmp_xmit_lock(void) | |||
246 | return 0; | 248 | return 0; |
247 | } | 249 | } |
248 | 250 | ||
249 | static inline void icmp_xmit_unlock(void) | 251 | static inline void icmp_xmit_unlock(struct sock *sk) |
250 | { | 252 | { |
251 | spin_unlock_bh(&icmp_socket->sk->sk_lock.slock); | 253 | spin_unlock_bh(&sk->sk_lock.slock); |
252 | } | 254 | } |
253 | 255 | ||
254 | /* | 256 | /* |
@@ -346,19 +348,21 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, | |||
346 | static void icmp_push_reply(struct icmp_bxm *icmp_param, | 348 | static void icmp_push_reply(struct icmp_bxm *icmp_param, |
347 | struct ipcm_cookie *ipc, struct rtable *rt) | 349 | struct ipcm_cookie *ipc, struct rtable *rt) |
348 | { | 350 | { |
351 | struct sock *sk; | ||
349 | struct sk_buff *skb; | 352 | struct sk_buff *skb; |
350 | 353 | ||
351 | if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param, | 354 | sk = icmp_sk(rt->u.dst.dev->nd_net); |
355 | if (ip_append_data(sk, icmp_glue_bits, icmp_param, | ||
352 | icmp_param->data_len+icmp_param->head_len, | 356 | icmp_param->data_len+icmp_param->head_len, |
353 | icmp_param->head_len, | 357 | icmp_param->head_len, |
354 | ipc, rt, MSG_DONTWAIT) < 0) | 358 | ipc, rt, MSG_DONTWAIT) < 0) |
355 | ip_flush_pending_frames(icmp_socket->sk); | 359 | ip_flush_pending_frames(sk); |
356 | else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { | 360 | else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { |
357 | struct icmphdr *icmph = icmp_hdr(skb); | 361 | struct icmphdr *icmph = icmp_hdr(skb); |
358 | __wsum csum = 0; | 362 | __wsum csum = 0; |
359 | struct sk_buff *skb1; | 363 | struct sk_buff *skb1; |
360 | 364 | ||
361 | skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) { | 365 | skb_queue_walk(&sk->sk_write_queue, skb1) { |
362 | csum = csum_add(csum, skb1->csum); | 366 | csum = csum_add(csum, skb1->csum); |
363 | } | 367 | } |
364 | csum = csum_partial_copy_nocheck((void *)&icmp_param->data, | 368 | csum = csum_partial_copy_nocheck((void *)&icmp_param->data, |
@@ -366,7 +370,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, | |||
366 | icmp_param->head_len, csum); | 370 | icmp_param->head_len, csum); |
367 | icmph->checksum = csum_fold(csum); | 371 | icmph->checksum = csum_fold(csum); |
368 | skb->ip_summed = CHECKSUM_NONE; | 372 | skb->ip_summed = CHECKSUM_NONE; |
369 | ip_push_pending_frames(icmp_socket->sk); | 373 | ip_push_pending_frames(sk); |
370 | } | 374 | } |
371 | } | 375 | } |
372 | 376 | ||
@@ -376,16 +380,17 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, | |||
376 | 380 | ||
377 | static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | 381 | static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) |
378 | { | 382 | { |
379 | struct sock *sk = icmp_socket->sk; | ||
380 | struct inet_sock *inet = inet_sk(sk); | ||
381 | struct ipcm_cookie ipc; | 383 | struct ipcm_cookie ipc; |
382 | struct rtable *rt = (struct rtable *)skb->dst; | 384 | struct rtable *rt = (struct rtable *)skb->dst; |
385 | struct net *net = rt->u.dst.dev->nd_net; | ||
386 | struct sock *sk = icmp_sk(net); | ||
387 | struct inet_sock *inet = inet_sk(sk); | ||
383 | __be32 daddr; | 388 | __be32 daddr; |
384 | 389 | ||
385 | if (ip_options_echo(&icmp_param->replyopts, skb)) | 390 | if (ip_options_echo(&icmp_param->replyopts, skb)) |
386 | return; | 391 | return; |
387 | 392 | ||
388 | if (icmp_xmit_lock()) | 393 | if (icmp_xmit_lock(sk)) |
389 | return; | 394 | return; |
390 | 395 | ||
391 | icmp_param->data.icmph.checksum = 0; | 396 | icmp_param->data.icmph.checksum = 0; |
@@ -405,7 +410,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
405 | .tos = RT_TOS(ip_hdr(skb)->tos) } }, | 410 | .tos = RT_TOS(ip_hdr(skb)->tos) } }, |
406 | .proto = IPPROTO_ICMP }; | 411 | .proto = IPPROTO_ICMP }; |
407 | security_skb_classify_flow(skb, &fl); | 412 | security_skb_classify_flow(skb, &fl); |
408 | if (ip_route_output_key(rt->u.dst.dev->nd_net, &rt, &fl)) | 413 | if (ip_route_output_key(net, &rt, &fl)) |
409 | goto out_unlock; | 414 | goto out_unlock; |
410 | } | 415 | } |
411 | if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, | 416 | if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, |
@@ -413,7 +418,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
413 | icmp_push_reply(icmp_param, &ipc, rt); | 418 | icmp_push_reply(icmp_param, &ipc, rt); |
414 | ip_rt_put(rt); | 419 | ip_rt_put(rt); |
415 | out_unlock: | 420 | out_unlock: |
416 | icmp_xmit_unlock(); | 421 | icmp_xmit_unlock(sk); |
417 | } | 422 | } |
418 | 423 | ||
419 | 424 | ||
@@ -438,10 +443,12 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
438 | __be32 saddr; | 443 | __be32 saddr; |
439 | u8 tos; | 444 | u8 tos; |
440 | struct net *net; | 445 | struct net *net; |
446 | struct sock *sk; | ||
441 | 447 | ||
442 | if (!rt) | 448 | if (!rt) |
443 | goto out; | 449 | goto out; |
444 | net = rt->u.dst.dev->nd_net; | 450 | net = rt->u.dst.dev->nd_net; |
451 | sk = icmp_sk(net); | ||
445 | 452 | ||
446 | /* | 453 | /* |
447 | * Find the original header. It is expected to be valid, of course. | 454 | * Find the original header. It is expected to be valid, of course. |
@@ -505,7 +512,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
505 | } | 512 | } |
506 | } | 513 | } |
507 | 514 | ||
508 | if (icmp_xmit_lock()) | 515 | if (icmp_xmit_lock(sk)) |
509 | return; | 516 | return; |
510 | 517 | ||
511 | /* | 518 | /* |
@@ -544,7 +551,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
544 | icmp_param.data.icmph.checksum = 0; | 551 | icmp_param.data.icmph.checksum = 0; |
545 | icmp_param.skb = skb_in; | 552 | icmp_param.skb = skb_in; |
546 | icmp_param.offset = skb_network_offset(skb_in); | 553 | icmp_param.offset = skb_network_offset(skb_in); |
547 | inet_sk(icmp_socket->sk)->tos = tos; | 554 | inet_sk(sk)->tos = tos; |
548 | ipc.addr = iph->saddr; | 555 | ipc.addr = iph->saddr; |
549 | ipc.opt = &icmp_param.replyopts; | 556 | ipc.opt = &icmp_param.replyopts; |
550 | 557 | ||
@@ -652,7 +659,7 @@ route_done: | |||
652 | ende: | 659 | ende: |
653 | ip_rt_put(rt); | 660 | ip_rt_put(rt); |
654 | out_unlock: | 661 | out_unlock: |
655 | icmp_xmit_unlock(); | 662 | icmp_xmit_unlock(sk); |
656 | out:; | 663 | out:; |
657 | } | 664 | } |
658 | 665 | ||
@@ -1139,29 +1146,46 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = { | |||
1139 | }, | 1146 | }, |
1140 | }; | 1147 | }; |
1141 | 1148 | ||
1142 | void __init icmp_init(struct net_proto_family *ops) | 1149 | static void __net_exit icmp_sk_exit(struct net *net) |
1143 | { | 1150 | { |
1144 | struct inet_sock *inet; | ||
1145 | int i; | 1151 | int i; |
1146 | 1152 | ||
1147 | for_each_possible_cpu(i) { | 1153 | for_each_possible_cpu(i) |
1148 | int err; | 1154 | sk_release_kernel(net->ipv4.icmp_sk[i]); |
1155 | kfree(net->ipv4.icmp_sk); | ||
1156 | net->ipv4.icmp_sk = NULL; | ||
1157 | } | ||
1149 | 1158 | ||
1150 | err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, | 1159 | int __net_init icmp_sk_init(struct net *net) |
1151 | &per_cpu(__icmp_socket, i)); | 1160 | { |
1161 | int i, err; | ||
1152 | 1162 | ||
1163 | net->ipv4.icmp_sk = | ||
1164 | kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL); | ||
1165 | if (net->ipv4.icmp_sk == NULL) | ||
1166 | return -ENOMEM; | ||
1167 | |||
1168 | for_each_possible_cpu(i) { | ||
1169 | struct sock *sk; | ||
1170 | struct socket *sock; | ||
1171 | struct inet_sock *inet; | ||
1172 | |||
1173 | err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, &sock); | ||
1153 | if (err < 0) | 1174 | if (err < 0) |
1154 | panic("Failed to create the ICMP control socket.\n"); | 1175 | goto fail; |
1176 | |||
1177 | net->ipv4.icmp_sk[i] = sk = sock->sk; | ||
1178 | sk_change_net(sk, net); | ||
1155 | 1179 | ||
1156 | per_cpu(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC; | 1180 | sk->sk_allocation = GFP_ATOMIC; |
1157 | 1181 | ||
1158 | /* Enough space for 2 64K ICMP packets, including | 1182 | /* Enough space for 2 64K ICMP packets, including |
1159 | * sk_buff struct overhead. | 1183 | * sk_buff struct overhead. |
1160 | */ | 1184 | */ |
1161 | per_cpu(__icmp_socket, i)->sk->sk_sndbuf = | 1185 | sk->sk_sndbuf = |
1162 | (2 * ((64 * 1024) + sizeof(struct sk_buff))); | 1186 | (2 * ((64 * 1024) + sizeof(struct sk_buff))); |
1163 | 1187 | ||
1164 | inet = inet_sk(per_cpu(__icmp_socket, i)->sk); | 1188 | inet = inet_sk(sk); |
1165 | inet->uc_ttl = -1; | 1189 | inet->uc_ttl = -1; |
1166 | inet->pmtudisc = IP_PMTUDISC_DONT; | 1190 | inet->pmtudisc = IP_PMTUDISC_DONT; |
1167 | 1191 | ||
@@ -1169,8 +1193,25 @@ void __init icmp_init(struct net_proto_family *ops) | |||
1169 | * see it, we do not wish this socket to see incoming | 1193 | * see it, we do not wish this socket to see incoming |
1170 | * packets. | 1194 | * packets. |
1171 | */ | 1195 | */ |
1172 | per_cpu(__icmp_socket, i)->sk->sk_prot->unhash(per_cpu(__icmp_socket, i)->sk); | 1196 | sk->sk_prot->unhash(sk); |
1173 | } | 1197 | } |
1198 | return 0; | ||
1199 | |||
1200 | fail: | ||
1201 | for_each_possible_cpu(i) | ||
1202 | sk_release_kernel(net->ipv4.icmp_sk[i]); | ||
1203 | kfree(net->ipv4.icmp_sk); | ||
1204 | return err; | ||
1205 | } | ||
1206 | |||
1207 | static struct pernet_operations __net_initdata icmp_sk_ops = { | ||
1208 | .init = icmp_sk_init, | ||
1209 | .exit = icmp_sk_exit, | ||
1210 | }; | ||
1211 | |||
1212 | int __init icmp_init(void) | ||
1213 | { | ||
1214 | return register_pernet_device(&icmp_sk_ops); | ||
1174 | } | 1215 | } |
1175 | 1216 | ||
1176 | EXPORT_SYMBOL(icmp_err_convert); | 1217 | EXPORT_SYMBOL(icmp_err_convert); |
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 732cd07e6071..d3f34a772f3b 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -1198,6 +1198,9 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) | |||
1198 | 1198 | ||
1199 | ASSERT_RTNL(); | 1199 | ASSERT_RTNL(); |
1200 | 1200 | ||
1201 | if (in_dev->dev->nd_net != &init_net) | ||
1202 | return; | ||
1203 | |||
1201 | for (im=in_dev->mc_list; im; im=im->next) { | 1204 | for (im=in_dev->mc_list; im; im=im->next) { |
1202 | if (im->multiaddr == addr) { | 1205 | if (im->multiaddr == addr) { |
1203 | im->users++; | 1206 | im->users++; |
@@ -1277,6 +1280,9 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) | |||
1277 | 1280 | ||
1278 | ASSERT_RTNL(); | 1281 | ASSERT_RTNL(); |
1279 | 1282 | ||
1283 | if (in_dev->dev->nd_net != &init_net) | ||
1284 | return; | ||
1285 | |||
1280 | for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { | 1286 | for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { |
1281 | if (i->multiaddr==addr) { | 1287 | if (i->multiaddr==addr) { |
1282 | if (--i->users == 0) { | 1288 | if (--i->users == 0) { |
@@ -1304,6 +1310,9 @@ void ip_mc_down(struct in_device *in_dev) | |||
1304 | 1310 | ||
1305 | ASSERT_RTNL(); | 1311 | ASSERT_RTNL(); |
1306 | 1312 | ||
1313 | if (in_dev->dev->nd_net != &init_net) | ||
1314 | return; | ||
1315 | |||
1307 | for (i=in_dev->mc_list; i; i=i->next) | 1316 | for (i=in_dev->mc_list; i; i=i->next) |
1308 | igmp_group_dropped(i); | 1317 | igmp_group_dropped(i); |
1309 | 1318 | ||
@@ -1324,6 +1333,9 @@ void ip_mc_init_dev(struct in_device *in_dev) | |||
1324 | { | 1333 | { |
1325 | ASSERT_RTNL(); | 1334 | ASSERT_RTNL(); |
1326 | 1335 | ||
1336 | if (in_dev->dev->nd_net != &init_net) | ||
1337 | return; | ||
1338 | |||
1327 | in_dev->mc_tomb = NULL; | 1339 | in_dev->mc_tomb = NULL; |
1328 | #ifdef CONFIG_IP_MULTICAST | 1340 | #ifdef CONFIG_IP_MULTICAST |
1329 | in_dev->mr_gq_running = 0; | 1341 | in_dev->mr_gq_running = 0; |
@@ -1347,6 +1359,9 @@ void ip_mc_up(struct in_device *in_dev) | |||
1347 | 1359 | ||
1348 | ASSERT_RTNL(); | 1360 | ASSERT_RTNL(); |
1349 | 1361 | ||
1362 | if (in_dev->dev->nd_net != &init_net) | ||
1363 | return; | ||
1364 | |||
1350 | ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); | 1365 | ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); |
1351 | 1366 | ||
1352 | for (i=in_dev->mc_list; i; i=i->next) | 1367 | for (i=in_dev->mc_list; i; i=i->next) |
@@ -1363,6 +1378,9 @@ void ip_mc_destroy_dev(struct in_device *in_dev) | |||
1363 | 1378 | ||
1364 | ASSERT_RTNL(); | 1379 | ASSERT_RTNL(); |
1365 | 1380 | ||
1381 | if (in_dev->dev->nd_net != &init_net) | ||
1382 | return; | ||
1383 | |||
1366 | /* Deactivate timers */ | 1384 | /* Deactivate timers */ |
1367 | ip_mc_down(in_dev); | 1385 | ip_mc_down(in_dev); |
1368 | 1386 | ||
@@ -1744,6 +1762,9 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) | |||
1744 | if (!ipv4_is_multicast(addr)) | 1762 | if (!ipv4_is_multicast(addr)) |
1745 | return -EINVAL; | 1763 | return -EINVAL; |
1746 | 1764 | ||
1765 | if (sk->sk_net != &init_net) | ||
1766 | return -EPROTONOSUPPORT; | ||
1767 | |||
1747 | rtnl_lock(); | 1768 | rtnl_lock(); |
1748 | 1769 | ||
1749 | in_dev = ip_mc_find_dev(imr); | 1770 | in_dev = ip_mc_find_dev(imr); |
@@ -1812,6 +1833,9 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) | |||
1812 | u32 ifindex; | 1833 | u32 ifindex; |
1813 | int ret = -EADDRNOTAVAIL; | 1834 | int ret = -EADDRNOTAVAIL; |
1814 | 1835 | ||
1836 | if (sk->sk_net != &init_net) | ||
1837 | return -EPROTONOSUPPORT; | ||
1838 | |||
1815 | rtnl_lock(); | 1839 | rtnl_lock(); |
1816 | in_dev = ip_mc_find_dev(imr); | 1840 | in_dev = ip_mc_find_dev(imr); |
1817 | ifindex = imr->imr_ifindex; | 1841 | ifindex = imr->imr_ifindex; |
@@ -1857,6 +1881,9 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct | |||
1857 | if (!ipv4_is_multicast(addr)) | 1881 | if (!ipv4_is_multicast(addr)) |
1858 | return -EINVAL; | 1882 | return -EINVAL; |
1859 | 1883 | ||
1884 | if (sk->sk_net != &init_net) | ||
1885 | return -EPROTONOSUPPORT; | ||
1886 | |||
1860 | rtnl_lock(); | 1887 | rtnl_lock(); |
1861 | 1888 | ||
1862 | imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr; | 1889 | imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr; |
@@ -1990,6 +2017,9 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) | |||
1990 | msf->imsf_fmode != MCAST_EXCLUDE) | 2017 | msf->imsf_fmode != MCAST_EXCLUDE) |
1991 | return -EINVAL; | 2018 | return -EINVAL; |
1992 | 2019 | ||
2020 | if (sk->sk_net != &init_net) | ||
2021 | return -EPROTONOSUPPORT; | ||
2022 | |||
1993 | rtnl_lock(); | 2023 | rtnl_lock(); |
1994 | 2024 | ||
1995 | imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; | 2025 | imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; |
@@ -2070,6 +2100,9 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, | |||
2070 | if (!ipv4_is_multicast(addr)) | 2100 | if (!ipv4_is_multicast(addr)) |
2071 | return -EINVAL; | 2101 | return -EINVAL; |
2072 | 2102 | ||
2103 | if (sk->sk_net != &init_net) | ||
2104 | return -EPROTONOSUPPORT; | ||
2105 | |||
2073 | rtnl_lock(); | 2106 | rtnl_lock(); |
2074 | 2107 | ||
2075 | imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; | 2108 | imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; |
@@ -2132,6 +2165,9 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, | |||
2132 | if (!ipv4_is_multicast(addr)) | 2165 | if (!ipv4_is_multicast(addr)) |
2133 | return -EINVAL; | 2166 | return -EINVAL; |
2134 | 2167 | ||
2168 | if (sk->sk_net != &init_net) | ||
2169 | return -EPROTONOSUPPORT; | ||
2170 | |||
2135 | rtnl_lock(); | 2171 | rtnl_lock(); |
2136 | 2172 | ||
2137 | err = -EADDRNOTAVAIL; | 2173 | err = -EADDRNOTAVAIL; |
@@ -2216,6 +2252,9 @@ void ip_mc_drop_socket(struct sock *sk) | |||
2216 | if (inet->mc_list == NULL) | 2252 | if (inet->mc_list == NULL) |
2217 | return; | 2253 | return; |
2218 | 2254 | ||
2255 | if (sk->sk_net != &init_net) | ||
2256 | return; | ||
2257 | |||
2219 | rtnl_lock(); | 2258 | rtnl_lock(); |
2220 | while ((iml = inet->mc_list) != NULL) { | 2259 | while ((iml = inet->mc_list) != NULL) { |
2221 | struct in_device *in_dev; | 2260 | struct in_device *in_dev; |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index b189278c7bc1..c0e0fa03fce1 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -463,7 +463,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, | |||
463 | if (time_after_eq(now, req->expires)) { | 463 | if (time_after_eq(now, req->expires)) { |
464 | if ((req->retrans < thresh || | 464 | if ((req->retrans < thresh || |
465 | (inet_rsk(req)->acked && req->retrans < max_retries)) | 465 | (inet_rsk(req)->acked && req->retrans < max_retries)) |
466 | && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) { | 466 | && !req->rsk_ops->rtx_syn_ack(parent, req)) { |
467 | unsigned long timeo; | 467 | unsigned long timeo; |
468 | 468 | ||
469 | if (req->retrans++ == 0) | 469 | if (req->retrans++ == 0) |
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 4d315158fd3c..baaedd9689a0 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c | |||
@@ -107,10 +107,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) | |||
107 | sptr = skb_network_header(skb); | 107 | sptr = skb_network_header(skb); |
108 | dptr = dopt->__data; | 108 | dptr = dopt->__data; |
109 | 109 | ||
110 | if (skb->dst) | 110 | daddr = ((struct rtable*)skb->dst)->rt_spec_dst; |
111 | daddr = ((struct rtable*)skb->dst)->rt_spec_dst; | ||
112 | else | ||
113 | daddr = ip_hdr(skb)->daddr; | ||
114 | 111 | ||
115 | if (sopt->rr) { | 112 | if (sopt->rr) { |
116 | optlen = sptr[sopt->rr+1]; | 113 | optlen = sptr[sopt->rr+1]; |
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 5dd938579eeb..4afce0572806 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
@@ -291,7 +291,7 @@ static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg) | |||
291 | 291 | ||
292 | mm_segment_t oldfs = get_fs(); | 292 | mm_segment_t oldfs = get_fs(); |
293 | set_fs(get_ds()); | 293 | set_fs(get_ds()); |
294 | res = devinet_ioctl(cmd, (struct ifreq __user *) arg); | 294 | res = devinet_ioctl(&init_net, cmd, (struct ifreq __user *) arg); |
295 | set_fs(oldfs); | 295 | set_fs(oldfs); |
296 | return res; | 296 | return res; |
297 | } | 297 | } |
@@ -459,10 +459,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt | |||
459 | if (rarp->ar_pro != htons(ETH_P_IP)) | 459 | if (rarp->ar_pro != htons(ETH_P_IP)) |
460 | goto drop; | 460 | goto drop; |
461 | 461 | ||
462 | if (!pskb_may_pull(skb, | 462 | if (!pskb_may_pull(skb, arp_hdr_len(dev))) |
463 | sizeof(struct arphdr) + | ||
464 | (2 * dev->addr_len) + | ||
465 | (2 * 4))) | ||
466 | goto drop; | 463 | goto drop; |
467 | 464 | ||
468 | /* OK, it is all there and looks valid, process... */ | 465 | /* OK, it is all there and looks valid, process... */ |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index a7591ce344d2..9b5904486184 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -233,10 +233,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
233 | void *table_base; | 233 | void *table_base; |
234 | struct xt_table_info *private; | 234 | struct xt_table_info *private; |
235 | 235 | ||
236 | /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ | 236 | if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) |
237 | if (!pskb_may_pull(skb, (sizeof(struct arphdr) + | ||
238 | (2 * skb->dev->addr_len) + | ||
239 | (2 * sizeof(u32))))) | ||
240 | return NF_DROP; | 237 | return NF_DROP; |
241 | 238 | ||
242 | indev = in ? in->name : nulldevname; | 239 | indev = in ? in->name : nulldevname; |
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index d80fee8327e4..313b3fcf387e 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c | |||
@@ -139,18 +139,8 @@ static int masq_inet_event(struct notifier_block *this, | |||
139 | unsigned long event, | 139 | unsigned long event, |
140 | void *ptr) | 140 | void *ptr) |
141 | { | 141 | { |
142 | const struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; | 142 | struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; |
143 | 143 | return masq_device_event(this, event, dev); | |
144 | if (event == NETDEV_DOWN) { | ||
145 | /* IP address was deleted. Search entire table for | ||
146 | conntracks which were associated with that device, | ||
147 | and forget them. */ | ||
148 | NF_CT_ASSERT(dev->ifindex != 0); | ||
149 | |||
150 | nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex); | ||
151 | } | ||
152 | |||
153 | return NOTIFY_DONE; | ||
154 | } | 144 | } |
155 | 145 | ||
156 | static struct notifier_block masq_dev_notifier = { | 146 | static struct notifier_block masq_dev_notifier = { |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 089252e82c01..9668c3a23efe 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | |||
@@ -379,7 +379,7 @@ static const struct file_operations ct_cpu_seq_fops = { | |||
379 | .open = ct_cpu_seq_open, | 379 | .open = ct_cpu_seq_open, |
380 | .read = seq_read, | 380 | .read = seq_read, |
381 | .llseek = seq_lseek, | 381 | .llseek = seq_lseek, |
382 | .release = seq_release_private, | 382 | .release = seq_release, |
383 | }; | 383 | }; |
384 | 384 | ||
385 | int __init nf_conntrack_ipv4_compat_init(void) | 385 | int __init nf_conntrack_ipv4_compat_init(void) |
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index d63474c6b400..d75ddb7fa4b8 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -59,7 +59,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) | |||
59 | atomic_read(&tcp_memory_allocated)); | 59 | atomic_read(&tcp_memory_allocated)); |
60 | seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(&udp_prot), | 60 | seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(&udp_prot), |
61 | atomic_read(&udp_memory_allocated)); | 61 | atomic_read(&udp_memory_allocated)); |
62 | #ifdef CONFIG_IP_UDPLITE | ||
62 | seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot)); | 63 | seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot)); |
64 | #endif | ||
63 | seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot)); | 65 | seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot)); |
64 | seq_printf(seq, "FRAG: inuse %d memory %d\n", | 66 | seq_printf(seq, "FRAG: inuse %d memory %d\n", |
65 | ip_frag_nqueues(&init_net), ip_frag_mem(&init_net)); | 67 | ip_frag_nqueues(&init_net), ip_frag_mem(&init_net)); |
@@ -349,6 +351,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
349 | snmp_fold_field((void **)udp_statistics, | 351 | snmp_fold_field((void **)udp_statistics, |
350 | snmp4_udp_list[i].entry)); | 352 | snmp4_udp_list[i].entry)); |
351 | 353 | ||
354 | #ifdef CONFIG_IP_UDPLITE | ||
352 | /* the UDP and UDP-Lite MIBs are the same */ | 355 | /* the UDP and UDP-Lite MIBs are the same */ |
353 | seq_puts(seq, "\nUdpLite:"); | 356 | seq_puts(seq, "\nUdpLite:"); |
354 | for (i = 0; snmp4_udp_list[i].name != NULL; i++) | 357 | for (i = 0; snmp4_udp_list[i].name != NULL; i++) |
@@ -359,7 +362,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
359 | seq_printf(seq, " %lu", | 362 | seq_printf(seq, " %lu", |
360 | snmp_fold_field((void **)udplite_statistics, | 363 | snmp_fold_field((void **)udplite_statistics, |
361 | snmp4_udp_list[i].entry)); | 364 | snmp4_udp_list[i].entry)); |
362 | 365 | #endif | |
363 | seq_putc(seq, '\n'); | 366 | seq_putc(seq, '\n'); |
364 | return 0; | 367 | return 0; |
365 | } | 368 | } |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 7b5e8e1d94be..8c3e165f0034 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -273,6 +273,7 @@ static unsigned int rt_hash_code(u32 daddr, u32 saddr) | |||
273 | 273 | ||
274 | #ifdef CONFIG_PROC_FS | 274 | #ifdef CONFIG_PROC_FS |
275 | struct rt_cache_iter_state { | 275 | struct rt_cache_iter_state { |
276 | struct seq_net_private p; | ||
276 | int bucket; | 277 | int bucket; |
277 | int genid; | 278 | int genid; |
278 | }; | 279 | }; |
@@ -285,7 +286,8 @@ static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st) | |||
285 | rcu_read_lock_bh(); | 286 | rcu_read_lock_bh(); |
286 | r = rcu_dereference(rt_hash_table[st->bucket].chain); | 287 | r = rcu_dereference(rt_hash_table[st->bucket].chain); |
287 | while (r) { | 288 | while (r) { |
288 | if (r->rt_genid == st->genid) | 289 | if (r->u.dst.dev->nd_net == st->p.net && |
290 | r->rt_genid == st->genid) | ||
289 | return r; | 291 | return r; |
290 | r = rcu_dereference(r->u.dst.rt_next); | 292 | r = rcu_dereference(r->u.dst.rt_next); |
291 | } | 293 | } |
@@ -294,7 +296,8 @@ static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st) | |||
294 | return r; | 296 | return r; |
295 | } | 297 | } |
296 | 298 | ||
297 | static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct rtable *r) | 299 | static struct rtable *__rt_cache_get_next(struct rt_cache_iter_state *st, |
300 | struct rtable *r) | ||
298 | { | 301 | { |
299 | r = r->u.dst.rt_next; | 302 | r = r->u.dst.rt_next; |
300 | while (!r) { | 303 | while (!r) { |
@@ -307,16 +310,25 @@ static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct r | |||
307 | return rcu_dereference(r); | 310 | return rcu_dereference(r); |
308 | } | 311 | } |
309 | 312 | ||
313 | static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, | ||
314 | struct rtable *r) | ||
315 | { | ||
316 | while ((r = __rt_cache_get_next(st, r)) != NULL) { | ||
317 | if (r->u.dst.dev->nd_net != st->p.net) | ||
318 | continue; | ||
319 | if (r->rt_genid == st->genid) | ||
320 | break; | ||
321 | } | ||
322 | return r; | ||
323 | } | ||
324 | |||
310 | static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t pos) | 325 | static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t pos) |
311 | { | 326 | { |
312 | struct rtable *r = rt_cache_get_first(st); | 327 | struct rtable *r = rt_cache_get_first(st); |
313 | 328 | ||
314 | if (r) | 329 | if (r) |
315 | while (pos && (r = rt_cache_get_next(st, r))) { | 330 | while (pos && (r = rt_cache_get_next(st, r))) |
316 | if (r->rt_genid != st->genid) | ||
317 | continue; | ||
318 | --pos; | 331 | --pos; |
319 | } | ||
320 | return pos ? NULL : r; | 332 | return pos ? NULL : r; |
321 | } | 333 | } |
322 | 334 | ||
@@ -390,7 +402,7 @@ static const struct seq_operations rt_cache_seq_ops = { | |||
390 | 402 | ||
391 | static int rt_cache_seq_open(struct inode *inode, struct file *file) | 403 | static int rt_cache_seq_open(struct inode *inode, struct file *file) |
392 | { | 404 | { |
393 | return seq_open_private(file, &rt_cache_seq_ops, | 405 | return seq_open_net(inode, file, &rt_cache_seq_ops, |
394 | sizeof(struct rt_cache_iter_state)); | 406 | sizeof(struct rt_cache_iter_state)); |
395 | } | 407 | } |
396 | 408 | ||
@@ -399,7 +411,7 @@ static const struct file_operations rt_cache_seq_fops = { | |||
399 | .open = rt_cache_seq_open, | 411 | .open = rt_cache_seq_open, |
400 | .read = seq_read, | 412 | .read = seq_read, |
401 | .llseek = seq_lseek, | 413 | .llseek = seq_lseek, |
402 | .release = seq_release_private, | 414 | .release = seq_release_net, |
403 | }; | 415 | }; |
404 | 416 | ||
405 | 417 | ||
@@ -533,7 +545,7 @@ static int ip_rt_acct_read(char *buffer, char **start, off_t offset, | |||
533 | } | 545 | } |
534 | #endif | 546 | #endif |
535 | 547 | ||
536 | static __init int ip_rt_proc_init(struct net *net) | 548 | static int __net_init ip_rt_do_proc_init(struct net *net) |
537 | { | 549 | { |
538 | struct proc_dir_entry *pde; | 550 | struct proc_dir_entry *pde; |
539 | 551 | ||
@@ -564,8 +576,26 @@ err2: | |||
564 | err1: | 576 | err1: |
565 | return -ENOMEM; | 577 | return -ENOMEM; |
566 | } | 578 | } |
579 | |||
580 | static void __net_exit ip_rt_do_proc_exit(struct net *net) | ||
581 | { | ||
582 | remove_proc_entry("rt_cache", net->proc_net_stat); | ||
583 | remove_proc_entry("rt_cache", net->proc_net); | ||
584 | remove_proc_entry("rt_acct", net->proc_net); | ||
585 | } | ||
586 | |||
587 | static struct pernet_operations ip_rt_proc_ops __net_initdata = { | ||
588 | .init = ip_rt_do_proc_init, | ||
589 | .exit = ip_rt_do_proc_exit, | ||
590 | }; | ||
591 | |||
592 | static int __init ip_rt_proc_init(void) | ||
593 | { | ||
594 | return register_pernet_subsys(&ip_rt_proc_ops); | ||
595 | } | ||
596 | |||
567 | #else | 597 | #else |
568 | static inline int ip_rt_proc_init(struct net *net) | 598 | static inline int ip_rt_proc_init(void) |
569 | { | 599 | { |
570 | return 0; | 600 | return 0; |
571 | } | 601 | } |
@@ -1131,10 +1161,12 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1131 | __be32 skeys[2] = { saddr, 0 }; | 1161 | __be32 skeys[2] = { saddr, 0 }; |
1132 | int ikeys[2] = { dev->ifindex, 0 }; | 1162 | int ikeys[2] = { dev->ifindex, 0 }; |
1133 | struct netevent_redirect netevent; | 1163 | struct netevent_redirect netevent; |
1164 | struct net *net; | ||
1134 | 1165 | ||
1135 | if (!in_dev) | 1166 | if (!in_dev) |
1136 | return; | 1167 | return; |
1137 | 1168 | ||
1169 | net = dev->nd_net; | ||
1138 | if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) | 1170 | if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) |
1139 | || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) | 1171 | || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) |
1140 | || ipv4_is_zeronet(new_gw)) | 1172 | || ipv4_is_zeronet(new_gw)) |
@@ -1146,7 +1178,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1146 | if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) | 1178 | if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) |
1147 | goto reject_redirect; | 1179 | goto reject_redirect; |
1148 | } else { | 1180 | } else { |
1149 | if (inet_addr_type(&init_net, new_gw) != RTN_UNICAST) | 1181 | if (inet_addr_type(net, new_gw) != RTN_UNICAST) |
1150 | goto reject_redirect; | 1182 | goto reject_redirect; |
1151 | } | 1183 | } |
1152 | 1184 | ||
@@ -1164,7 +1196,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1164 | rth->fl.fl4_src != skeys[i] || | 1196 | rth->fl.fl4_src != skeys[i] || |
1165 | rth->fl.oif != ikeys[k] || | 1197 | rth->fl.oif != ikeys[k] || |
1166 | rth->fl.iif != 0 || | 1198 | rth->fl.iif != 0 || |
1167 | rth->rt_genid != atomic_read(&rt_genid)) { | 1199 | rth->rt_genid != atomic_read(&rt_genid) || |
1200 | rth->u.dst.dev->nd_net != net) { | ||
1168 | rthp = &rth->u.dst.rt_next; | 1201 | rthp = &rth->u.dst.rt_next; |
1169 | continue; | 1202 | continue; |
1170 | } | 1203 | } |
@@ -2668,9 +2701,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
2668 | int err; | 2701 | int err; |
2669 | struct sk_buff *skb; | 2702 | struct sk_buff *skb; |
2670 | 2703 | ||
2671 | if (net != &init_net) | ||
2672 | return -EINVAL; | ||
2673 | |||
2674 | err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); | 2704 | err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); |
2675 | if (err < 0) | 2705 | if (err < 0) |
2676 | goto errout; | 2706 | goto errout; |
@@ -2700,7 +2730,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
2700 | if (iif) { | 2730 | if (iif) { |
2701 | struct net_device *dev; | 2731 | struct net_device *dev; |
2702 | 2732 | ||
2703 | dev = __dev_get_by_index(&init_net, iif); | 2733 | dev = __dev_get_by_index(net, iif); |
2704 | if (dev == NULL) { | 2734 | if (dev == NULL) { |
2705 | err = -ENODEV; | 2735 | err = -ENODEV; |
2706 | goto errout_free; | 2736 | goto errout_free; |
@@ -2726,7 +2756,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
2726 | }, | 2756 | }, |
2727 | .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, | 2757 | .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, |
2728 | }; | 2758 | }; |
2729 | err = ip_route_output_key(&init_net, &rt, &fl); | 2759 | err = ip_route_output_key(net, &rt, &fl); |
2730 | } | 2760 | } |
2731 | 2761 | ||
2732 | if (err) | 2762 | if (err) |
@@ -2737,11 +2767,11 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
2737 | rt->rt_flags |= RTCF_NOTIFY; | 2767 | rt->rt_flags |= RTCF_NOTIFY; |
2738 | 2768 | ||
2739 | err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, | 2769 | err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, |
2740 | RTM_NEWROUTE, 0, 0); | 2770 | RTM_NEWROUTE, 0, 0); |
2741 | if (err <= 0) | 2771 | if (err <= 0) |
2742 | goto errout_free; | 2772 | goto errout_free; |
2743 | 2773 | ||
2744 | err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); | 2774 | err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); |
2745 | errout: | 2775 | errout: |
2746 | return err; | 2776 | return err; |
2747 | 2777 | ||
@@ -2755,6 +2785,9 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
2755 | struct rtable *rt; | 2785 | struct rtable *rt; |
2756 | int h, s_h; | 2786 | int h, s_h; |
2757 | int idx, s_idx; | 2787 | int idx, s_idx; |
2788 | struct net *net; | ||
2789 | |||
2790 | net = skb->sk->sk_net; | ||
2758 | 2791 | ||
2759 | s_h = cb->args[0]; | 2792 | s_h = cb->args[0]; |
2760 | if (s_h < 0) | 2793 | if (s_h < 0) |
@@ -2764,7 +2797,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
2764 | rcu_read_lock_bh(); | 2797 | rcu_read_lock_bh(); |
2765 | for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; | 2798 | for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; |
2766 | rt = rcu_dereference(rt->u.dst.rt_next), idx++) { | 2799 | rt = rcu_dereference(rt->u.dst.rt_next), idx++) { |
2767 | if (idx < s_idx) | 2800 | if (rt->u.dst.dev->nd_net != net || idx < s_idx) |
2768 | continue; | 2801 | continue; |
2769 | if (rt->rt_genid != atomic_read(&rt_genid)) | 2802 | if (rt->rt_genid != atomic_read(&rt_genid)) |
2770 | continue; | 2803 | continue; |
@@ -3040,7 +3073,7 @@ int __init ip_rt_init(void) | |||
3040 | ip_rt_secret_interval; | 3073 | ip_rt_secret_interval; |
3041 | add_timer(&rt_secret_timer); | 3074 | add_timer(&rt_secret_timer); |
3042 | 3075 | ||
3043 | if (ip_rt_proc_init(&init_net)) | 3076 | if (ip_rt_proc_init()) |
3044 | printk(KERN_ERR "Unable to create route proc files\n"); | 3077 | printk(KERN_ERR "Unable to create route proc files\n"); |
3045 | #ifdef CONFIG_XFRM | 3078 | #ifdef CONFIG_XFRM |
3046 | xfrm_init(); | 3079 | xfrm_init(); |
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index f470fe4511db..4704f27f6c0b 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
@@ -10,8 +10,6 @@ | |||
10 | * 2 of the License, or (at your option) any later version. | 10 | * 2 of the License, or (at your option) any later version. |
11 | * | 11 | * |
12 | * $Id: syncookies.c,v 1.18 2002/02/01 22:01:04 davem Exp $ | 12 | * $Id: syncookies.c,v 1.18 2002/02/01 22:01:04 davem Exp $ |
13 | * | ||
14 | * Missing: IPv6 support. | ||
15 | */ | 13 | */ |
16 | 14 | ||
17 | #include <linux/tcp.h> | 15 | #include <linux/tcp.h> |
@@ -23,22 +21,25 @@ | |||
23 | 21 | ||
24 | extern int sysctl_tcp_syncookies; | 22 | extern int sysctl_tcp_syncookies; |
25 | 23 | ||
26 | static __u32 syncookie_secret[2][16-3+SHA_DIGEST_WORDS]; | 24 | __u32 syncookie_secret[2][16-3+SHA_DIGEST_WORDS]; |
25 | EXPORT_SYMBOL(syncookie_secret); | ||
27 | 26 | ||
28 | static __init int init_syncookies(void) | 27 | static __init int init_syncookies(void) |
29 | { | 28 | { |
30 | get_random_bytes(syncookie_secret, sizeof(syncookie_secret)); | 29 | get_random_bytes(syncookie_secret, sizeof(syncookie_secret)); |
31 | return 0; | 30 | return 0; |
32 | } | 31 | } |
33 | module_init(init_syncookies); | 32 | __initcall(init_syncookies); |
34 | 33 | ||
35 | #define COOKIEBITS 24 /* Upper bits store count */ | 34 | #define COOKIEBITS 24 /* Upper bits store count */ |
36 | #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) | 35 | #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) |
37 | 36 | ||
37 | static DEFINE_PER_CPU(__u32, cookie_scratch)[16 + 5 + SHA_WORKSPACE_WORDS]; | ||
38 | |||
38 | static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, | 39 | static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, |
39 | u32 count, int c) | 40 | u32 count, int c) |
40 | { | 41 | { |
41 | __u32 tmp[16 + 5 + SHA_WORKSPACE_WORDS]; | 42 | __u32 *tmp = __get_cpu_var(cookie_scratch); |
42 | 43 | ||
43 | memcpy(tmp + 3, syncookie_secret[c], sizeof(syncookie_secret[c])); | 44 | memcpy(tmp + 3, syncookie_secret[c], sizeof(syncookie_secret[c])); |
44 | tmp[0] = (__force u32)saddr; | 45 | tmp[0] = (__force u32)saddr; |
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 3aa0b23c1ea0..eb5b9854c8c7 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c | |||
@@ -1,12 +1,13 @@ | |||
1 | /* | 1 | /* |
2 | * TCP CUBIC: Binary Increase Congestion control for TCP v2.1 | 2 | * TCP CUBIC: Binary Increase Congestion control for TCP v2.2 |
3 | * | 3 | * Home page: |
4 | * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC | ||
4 | * This is from the implementation of CUBIC TCP in | 5 | * This is from the implementation of CUBIC TCP in |
5 | * Injong Rhee, Lisong Xu. | 6 | * Injong Rhee, Lisong Xu. |
6 | * "CUBIC: A New TCP-Friendly High-Speed TCP Variant | 7 | * "CUBIC: A New TCP-Friendly High-Speed TCP Variant |
7 | * in PFLDnet 2005 | 8 | * in PFLDnet 2005 |
8 | * Available from: | 9 | * Available from: |
9 | * http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf | 10 | * http://netsrv.csc.ncsu.edu/export/cubic-paper.pdf |
10 | * | 11 | * |
11 | * Unless CUBIC is enabled and congestion window is large | 12 | * Unless CUBIC is enabled and congestion window is large |
12 | * this behaves the same as the original Reno. | 13 | * this behaves the same as the original Reno. |
@@ -20,15 +21,10 @@ | |||
20 | #define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation | 21 | #define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation |
21 | * max_cwnd = snd_cwnd * beta | 22 | * max_cwnd = snd_cwnd * beta |
22 | */ | 23 | */ |
23 | #define BICTCP_B 4 /* | ||
24 | * In binary search, | ||
25 | * go to point (max+min)/N | ||
26 | */ | ||
27 | #define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ | 24 | #define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ |
28 | 25 | ||
29 | static int fast_convergence __read_mostly = 1; | 26 | static int fast_convergence __read_mostly = 1; |
30 | static int max_increment __read_mostly = 16; | 27 | static int beta __read_mostly = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */ |
31 | static int beta __read_mostly = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */ | ||
32 | static int initial_ssthresh __read_mostly; | 28 | static int initial_ssthresh __read_mostly; |
33 | static int bic_scale __read_mostly = 41; | 29 | static int bic_scale __read_mostly = 41; |
34 | static int tcp_friendliness __read_mostly = 1; | 30 | static int tcp_friendliness __read_mostly = 1; |
@@ -40,9 +36,7 @@ static u64 cube_factor __read_mostly; | |||
40 | /* Note parameters that are used for precomputing scale factors are read-only */ | 36 | /* Note parameters that are used for precomputing scale factors are read-only */ |
41 | module_param(fast_convergence, int, 0644); | 37 | module_param(fast_convergence, int, 0644); |
42 | MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence"); | 38 | MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence"); |
43 | module_param(max_increment, int, 0644); | 39 | module_param(beta, int, 0644); |
44 | MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary search"); | ||
45 | module_param(beta, int, 0444); | ||
46 | MODULE_PARM_DESC(beta, "beta for multiplicative increase"); | 40 | MODULE_PARM_DESC(beta, "beta for multiplicative increase"); |
47 | module_param(initial_ssthresh, int, 0644); | 41 | module_param(initial_ssthresh, int, 0644); |
48 | MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); | 42 | MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); |
@@ -145,7 +139,7 @@ static u32 cubic_root(u64 a) | |||
145 | static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | 139 | static inline void bictcp_update(struct bictcp *ca, u32 cwnd) |
146 | { | 140 | { |
147 | u64 offs; | 141 | u64 offs; |
148 | u32 delta, t, bic_target, min_cnt, max_cnt; | 142 | u32 delta, t, bic_target, max_cnt; |
149 | 143 | ||
150 | ca->ack_cnt++; /* count the number of ACKs */ | 144 | ca->ack_cnt++; /* count the number of ACKs */ |
151 | 145 | ||
@@ -211,19 +205,6 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
211 | ca->cnt = 100 * cwnd; /* very small increment*/ | 205 | ca->cnt = 100 * cwnd; /* very small increment*/ |
212 | } | 206 | } |
213 | 207 | ||
214 | if (ca->delay_min > 0) { | ||
215 | /* max increment = Smax * rtt / 0.1 */ | ||
216 | min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min); | ||
217 | |||
218 | /* use concave growth when the target is above the origin */ | ||
219 | if (ca->cnt < min_cnt && t >= ca->bic_K) | ||
220 | ca->cnt = min_cnt; | ||
221 | } | ||
222 | |||
223 | /* slow start and low utilization */ | ||
224 | if (ca->loss_cwnd == 0) /* could be aggressive in slow start */ | ||
225 | ca->cnt = 50; | ||
226 | |||
227 | /* TCP Friendly */ | 208 | /* TCP Friendly */ |
228 | if (tcp_friendliness) { | 209 | if (tcp_friendliness) { |
229 | u32 scale = beta_scale; | 210 | u32 scale = beta_scale; |
@@ -391,4 +372,4 @@ module_exit(cubictcp_unregister); | |||
391 | MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); | 372 | MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); |
392 | MODULE_LICENSE("GPL"); | 373 | MODULE_LICENSE("GPL"); |
393 | MODULE_DESCRIPTION("CUBIC TCP"); | 374 | MODULE_DESCRIPTION("CUBIC TCP"); |
394 | MODULE_VERSION("2.1"); | 375 | MODULE_VERSION("2.2"); |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7facdb0f6960..c4679f343675 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -5330,6 +5330,7 @@ discard: | |||
5330 | 5330 | ||
5331 | EXPORT_SYMBOL(sysctl_tcp_ecn); | 5331 | EXPORT_SYMBOL(sysctl_tcp_ecn); |
5332 | EXPORT_SYMBOL(sysctl_tcp_reordering); | 5332 | EXPORT_SYMBOL(sysctl_tcp_reordering); |
5333 | EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); | ||
5333 | EXPORT_SYMBOL(tcp_parse_options); | 5334 | EXPORT_SYMBOL(tcp_parse_options); |
5334 | EXPORT_SYMBOL(tcp_rcv_established); | 5335 | EXPORT_SYMBOL(tcp_rcv_established); |
5335 | EXPORT_SYMBOL(tcp_rcv_state_process); | 5336 | EXPORT_SYMBOL(tcp_rcv_state_process); |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 00156bf421ca..3873c4dbeaeb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -723,8 +723,8 @@ static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, | |||
723 | * This still operates on a request_sock only, not on a big | 723 | * This still operates on a request_sock only, not on a big |
724 | * socket. | 724 | * socket. |
725 | */ | 725 | */ |
726 | static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, | 726 | static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, |
727 | struct dst_entry *dst) | 727 | struct dst_entry *dst) |
728 | { | 728 | { |
729 | const struct inet_request_sock *ireq = inet_rsk(req); | 729 | const struct inet_request_sock *ireq = inet_rsk(req); |
730 | int err = -1; | 730 | int err = -1; |
@@ -732,7 +732,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, | |||
732 | 732 | ||
733 | /* First, grab a route. */ | 733 | /* First, grab a route. */ |
734 | if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) | 734 | if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) |
735 | goto out; | 735 | return -1; |
736 | 736 | ||
737 | skb = tcp_make_synack(sk, dst, req); | 737 | skb = tcp_make_synack(sk, dst, req); |
738 | 738 | ||
@@ -751,11 +751,15 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, | |||
751 | err = net_xmit_eval(err); | 751 | err = net_xmit_eval(err); |
752 | } | 752 | } |
753 | 753 | ||
754 | out: | ||
755 | dst_release(dst); | 754 | dst_release(dst); |
756 | return err; | 755 | return err; |
757 | } | 756 | } |
758 | 757 | ||
758 | static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req) | ||
759 | { | ||
760 | return __tcp_v4_send_synack(sk, req, NULL); | ||
761 | } | ||
762 | |||
759 | /* | 763 | /* |
760 | * IPv4 request_sock destructor. | 764 | * IPv4 request_sock destructor. |
761 | */ | 765 | */ |
@@ -1351,8 +1355,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1351 | (s32)(peer->tcp_ts - req->ts_recent) > | 1355 | (s32)(peer->tcp_ts - req->ts_recent) > |
1352 | TCP_PAWS_WINDOW) { | 1356 | TCP_PAWS_WINDOW) { |
1353 | NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED); | 1357 | NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED); |
1354 | dst_release(dst); | 1358 | goto drop_and_release; |
1355 | goto drop_and_free; | ||
1356 | } | 1359 | } |
1357 | } | 1360 | } |
1358 | /* Kill the following clause, if you dislike this way. */ | 1361 | /* Kill the following clause, if you dislike this way. */ |
@@ -1372,24 +1375,21 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1372 | "request from %u.%u.%u.%u/%u\n", | 1375 | "request from %u.%u.%u.%u/%u\n", |
1373 | NIPQUAD(saddr), | 1376 | NIPQUAD(saddr), |
1374 | ntohs(tcp_hdr(skb)->source)); | 1377 | ntohs(tcp_hdr(skb)->source)); |
1375 | dst_release(dst); | 1378 | goto drop_and_release; |
1376 | goto drop_and_free; | ||
1377 | } | 1379 | } |
1378 | 1380 | ||
1379 | isn = tcp_v4_init_sequence(skb); | 1381 | isn = tcp_v4_init_sequence(skb); |
1380 | } | 1382 | } |
1381 | tcp_rsk(req)->snt_isn = isn; | 1383 | tcp_rsk(req)->snt_isn = isn; |
1382 | 1384 | ||
1383 | if (tcp_v4_send_synack(sk, req, dst)) | 1385 | if (__tcp_v4_send_synack(sk, req, dst) || want_cookie) |
1384 | goto drop_and_free; | 1386 | goto drop_and_free; |
1385 | 1387 | ||
1386 | if (want_cookie) { | 1388 | inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); |
1387 | reqsk_free(req); | ||
1388 | } else { | ||
1389 | inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); | ||
1390 | } | ||
1391 | return 0; | 1389 | return 0; |
1392 | 1390 | ||
1391 | drop_and_release: | ||
1392 | dst_release(dst); | ||
1393 | drop_and_free: | 1393 | drop_and_free: |
1394 | reqsk_free(req); | 1394 | reqsk_free(req); |
1395 | drop: | 1395 | drop: |
@@ -2443,7 +2443,7 @@ struct proto tcp_prot = { | |||
2443 | REF_PROTO_INUSE(tcp) | 2443 | REF_PROTO_INUSE(tcp) |
2444 | }; | 2444 | }; |
2445 | 2445 | ||
2446 | void __init tcp_v4_init(struct net_proto_family *ops) | 2446 | void __init tcp_v4_init(void) |
2447 | { | 2447 | { |
2448 | if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, | 2448 | if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, |
2449 | IPPROTO_TCP) < 0) | 2449 | IPPROTO_TCP) < 0) |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b61b76847ad9..8245247a6ceb 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -35,6 +35,8 @@ | |||
35 | #endif | 35 | #endif |
36 | 36 | ||
37 | int sysctl_tcp_syncookies __read_mostly = SYNC_INIT; | 37 | int sysctl_tcp_syncookies __read_mostly = SYNC_INIT; |
38 | EXPORT_SYMBOL(sysctl_tcp_syncookies); | ||
39 | |||
38 | int sysctl_tcp_abort_on_overflow __read_mostly; | 40 | int sysctl_tcp_abort_on_overflow __read_mostly; |
39 | 41 | ||
40 | struct inet_timewait_death_row tcp_death_row = { | 42 | struct inet_timewait_death_row tcp_death_row = { |
@@ -536,7 +538,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, | |||
536 | * Enforce "SYN-ACK" according to figure 8, figure 6 | 538 | * Enforce "SYN-ACK" according to figure 8, figure 6 |
537 | * of RFC793, fixed by RFC1122. | 539 | * of RFC793, fixed by RFC1122. |
538 | */ | 540 | */ |
539 | req->rsk_ops->rtx_syn_ack(sk, req, NULL); | 541 | req->rsk_ops->rtx_syn_ack(sk, req); |
540 | return NULL; | 542 | return NULL; |
541 | } | 543 | } |
542 | 544 | ||
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ed750f9ceb07..cbfef8b1f5e8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -2560,6 +2560,7 @@ void tcp_send_probe0(struct sock *sk) | |||
2560 | } | 2560 | } |
2561 | } | 2561 | } |
2562 | 2562 | ||
2563 | EXPORT_SYMBOL(tcp_select_initial_window); | ||
2563 | EXPORT_SYMBOL(tcp_connect); | 2564 | EXPORT_SYMBOL(tcp_connect); |
2564 | EXPORT_SYMBOL(tcp_make_synack); | 2565 | EXPORT_SYMBOL(tcp_make_synack); |
2565 | EXPORT_SYMBOL(tcp_simple_retransmit); | 2566 | EXPORT_SYMBOL(tcp_simple_retransmit); |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7ea1b67b6de1..c53d7673b57d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -246,553 +246,6 @@ int udp_get_port(struct sock *sk, unsigned short snum, | |||
246 | return __udp_lib_get_port(sk, snum, udp_hash, scmp); | 246 | return __udp_lib_get_port(sk, snum, udp_hash, scmp); |
247 | } | 247 | } |
248 | 248 | ||
249 | int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) | ||
250 | { | ||
251 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); | ||
252 | |||
253 | return ( !ipv6_only_sock(sk2) && | ||
254 | (!inet1->rcv_saddr || !inet2->rcv_saddr || | ||
255 | inet1->rcv_saddr == inet2->rcv_saddr )); | ||
256 | } | ||
257 | |||
258 | static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) | ||
259 | { | ||
260 | return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); | ||
261 | } | ||
262 | |||
263 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try | ||
264 | * harder than this. -DaveM | ||
265 | */ | ||
266 | static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, | ||
267 | __be16 sport, __be32 daddr, __be16 dport, | ||
268 | int dif, struct hlist_head udptable[]) | ||
269 | { | ||
270 | struct sock *sk, *result = NULL; | ||
271 | struct hlist_node *node; | ||
272 | unsigned short hnum = ntohs(dport); | ||
273 | int badness = -1; | ||
274 | |||
275 | read_lock(&udp_hash_lock); | ||
276 | sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { | ||
277 | struct inet_sock *inet = inet_sk(sk); | ||
278 | |||
279 | if (sk->sk_net == net && sk->sk_hash == hnum && | ||
280 | !ipv6_only_sock(sk)) { | ||
281 | int score = (sk->sk_family == PF_INET ? 1 : 0); | ||
282 | if (inet->rcv_saddr) { | ||
283 | if (inet->rcv_saddr != daddr) | ||
284 | continue; | ||
285 | score+=2; | ||
286 | } | ||
287 | if (inet->daddr) { | ||
288 | if (inet->daddr != saddr) | ||
289 | continue; | ||
290 | score+=2; | ||
291 | } | ||
292 | if (inet->dport) { | ||
293 | if (inet->dport != sport) | ||
294 | continue; | ||
295 | score+=2; | ||
296 | } | ||
297 | if (sk->sk_bound_dev_if) { | ||
298 | if (sk->sk_bound_dev_if != dif) | ||
299 | continue; | ||
300 | score+=2; | ||
301 | } | ||
302 | if (score == 9) { | ||
303 | result = sk; | ||
304 | break; | ||
305 | } else if (score > badness) { | ||
306 | result = sk; | ||
307 | badness = score; | ||
308 | } | ||
309 | } | ||
310 | } | ||
311 | if (result) | ||
312 | sock_hold(result); | ||
313 | read_unlock(&udp_hash_lock); | ||
314 | return result; | ||
315 | } | ||
316 | |||
317 | static inline struct sock *udp_v4_mcast_next(struct sock *sk, | ||
318 | __be16 loc_port, __be32 loc_addr, | ||
319 | __be16 rmt_port, __be32 rmt_addr, | ||
320 | int dif) | ||
321 | { | ||
322 | struct hlist_node *node; | ||
323 | struct sock *s = sk; | ||
324 | unsigned short hnum = ntohs(loc_port); | ||
325 | |||
326 | sk_for_each_from(s, node) { | ||
327 | struct inet_sock *inet = inet_sk(s); | ||
328 | |||
329 | if (s->sk_hash != hnum || | ||
330 | (inet->daddr && inet->daddr != rmt_addr) || | ||
331 | (inet->dport != rmt_port && inet->dport) || | ||
332 | (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || | ||
333 | ipv6_only_sock(s) || | ||
334 | (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) | ||
335 | continue; | ||
336 | if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) | ||
337 | continue; | ||
338 | goto found; | ||
339 | } | ||
340 | s = NULL; | ||
341 | found: | ||
342 | return s; | ||
343 | } | ||
344 | |||
345 | /* | ||
346 | * This routine is called by the ICMP module when it gets some | ||
347 | * sort of error condition. If err < 0 then the socket should | ||
348 | * be closed and the error returned to the user. If err > 0 | ||
349 | * it's just the icmp type << 8 | icmp code. | ||
350 | * Header points to the ip header of the error packet. We move | ||
351 | * on past this. Then (as it used to claim before adjustment) | ||
352 | * header points to the first 8 bytes of the udp header. We need | ||
353 | * to find the appropriate port. | ||
354 | */ | ||
355 | |||
356 | void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) | ||
357 | { | ||
358 | struct inet_sock *inet; | ||
359 | struct iphdr *iph = (struct iphdr*)skb->data; | ||
360 | struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); | ||
361 | const int type = icmp_hdr(skb)->type; | ||
362 | const int code = icmp_hdr(skb)->code; | ||
363 | struct sock *sk; | ||
364 | int harderr; | ||
365 | int err; | ||
366 | |||
367 | sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest, | ||
368 | iph->saddr, uh->source, skb->dev->ifindex, udptable); | ||
369 | if (sk == NULL) { | ||
370 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); | ||
371 | return; /* No socket for error */ | ||
372 | } | ||
373 | |||
374 | err = 0; | ||
375 | harderr = 0; | ||
376 | inet = inet_sk(sk); | ||
377 | |||
378 | switch (type) { | ||
379 | default: | ||
380 | case ICMP_TIME_EXCEEDED: | ||
381 | err = EHOSTUNREACH; | ||
382 | break; | ||
383 | case ICMP_SOURCE_QUENCH: | ||
384 | goto out; | ||
385 | case ICMP_PARAMETERPROB: | ||
386 | err = EPROTO; | ||
387 | harderr = 1; | ||
388 | break; | ||
389 | case ICMP_DEST_UNREACH: | ||
390 | if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ | ||
391 | if (inet->pmtudisc != IP_PMTUDISC_DONT) { | ||
392 | err = EMSGSIZE; | ||
393 | harderr = 1; | ||
394 | break; | ||
395 | } | ||
396 | goto out; | ||
397 | } | ||
398 | err = EHOSTUNREACH; | ||
399 | if (code <= NR_ICMP_UNREACH) { | ||
400 | harderr = icmp_err_convert[code].fatal; | ||
401 | err = icmp_err_convert[code].errno; | ||
402 | } | ||
403 | break; | ||
404 | } | ||
405 | |||
406 | /* | ||
407 | * RFC1122: OK. Passes ICMP errors back to application, as per | ||
408 | * 4.1.3.3. | ||
409 | */ | ||
410 | if (!inet->recverr) { | ||
411 | if (!harderr || sk->sk_state != TCP_ESTABLISHED) | ||
412 | goto out; | ||
413 | } else { | ||
414 | ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1)); | ||
415 | } | ||
416 | sk->sk_err = err; | ||
417 | sk->sk_error_report(sk); | ||
418 | out: | ||
419 | sock_put(sk); | ||
420 | } | ||
421 | |||
422 | void udp_err(struct sk_buff *skb, u32 info) | ||
423 | { | ||
424 | __udp4_lib_err(skb, info, udp_hash); | ||
425 | } | ||
426 | |||
427 | /* | ||
428 | * Throw away all pending data and cancel the corking. Socket is locked. | ||
429 | */ | ||
430 | static void udp_flush_pending_frames(struct sock *sk) | ||
431 | { | ||
432 | struct udp_sock *up = udp_sk(sk); | ||
433 | |||
434 | if (up->pending) { | ||
435 | up->len = 0; | ||
436 | up->pending = 0; | ||
437 | ip_flush_pending_frames(sk); | ||
438 | } | ||
439 | } | ||
440 | |||
441 | /** | ||
442 | * udp4_hwcsum_outgoing - handle outgoing HW checksumming | ||
443 | * @sk: socket we are sending on | ||
444 | * @skb: sk_buff containing the filled-in UDP header | ||
445 | * (checksum field must be zeroed out) | ||
446 | */ | ||
447 | static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, | ||
448 | __be32 src, __be32 dst, int len ) | ||
449 | { | ||
450 | unsigned int offset; | ||
451 | struct udphdr *uh = udp_hdr(skb); | ||
452 | __wsum csum = 0; | ||
453 | |||
454 | if (skb_queue_len(&sk->sk_write_queue) == 1) { | ||
455 | /* | ||
456 | * Only one fragment on the socket. | ||
457 | */ | ||
458 | skb->csum_start = skb_transport_header(skb) - skb->head; | ||
459 | skb->csum_offset = offsetof(struct udphdr, check); | ||
460 | uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); | ||
461 | } else { | ||
462 | /* | ||
463 | * HW-checksum won't work as there are two or more | ||
464 | * fragments on the socket so that all csums of sk_buffs | ||
465 | * should be together | ||
466 | */ | ||
467 | offset = skb_transport_offset(skb); | ||
468 | skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); | ||
469 | |||
470 | skb->ip_summed = CHECKSUM_NONE; | ||
471 | |||
472 | skb_queue_walk(&sk->sk_write_queue, skb) { | ||
473 | csum = csum_add(csum, skb->csum); | ||
474 | } | ||
475 | |||
476 | uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); | ||
477 | if (uh->check == 0) | ||
478 | uh->check = CSUM_MANGLED_0; | ||
479 | } | ||
480 | } | ||
481 | |||
482 | /* | ||
483 | * Push out all pending data as one UDP datagram. Socket is locked. | ||
484 | */ | ||
485 | static int udp_push_pending_frames(struct sock *sk) | ||
486 | { | ||
487 | struct udp_sock *up = udp_sk(sk); | ||
488 | struct inet_sock *inet = inet_sk(sk); | ||
489 | struct flowi *fl = &inet->cork.fl; | ||
490 | struct sk_buff *skb; | ||
491 | struct udphdr *uh; | ||
492 | int err = 0; | ||
493 | int is_udplite = IS_UDPLITE(sk); | ||
494 | __wsum csum = 0; | ||
495 | |||
496 | /* Grab the skbuff where UDP header space exists. */ | ||
497 | if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) | ||
498 | goto out; | ||
499 | |||
500 | /* | ||
501 | * Create a UDP header | ||
502 | */ | ||
503 | uh = udp_hdr(skb); | ||
504 | uh->source = fl->fl_ip_sport; | ||
505 | uh->dest = fl->fl_ip_dport; | ||
506 | uh->len = htons(up->len); | ||
507 | uh->check = 0; | ||
508 | |||
509 | if (is_udplite) /* UDP-Lite */ | ||
510 | csum = udplite_csum_outgoing(sk, skb); | ||
511 | |||
512 | else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ | ||
513 | |||
514 | skb->ip_summed = CHECKSUM_NONE; | ||
515 | goto send; | ||
516 | |||
517 | } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ | ||
518 | |||
519 | udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len); | ||
520 | goto send; | ||
521 | |||
522 | } else /* `normal' UDP */ | ||
523 | csum = udp_csum_outgoing(sk, skb); | ||
524 | |||
525 | /* add protocol-dependent pseudo-header */ | ||
526 | uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, | ||
527 | sk->sk_protocol, csum ); | ||
528 | if (uh->check == 0) | ||
529 | uh->check = CSUM_MANGLED_0; | ||
530 | |||
531 | send: | ||
532 | err = ip_push_pending_frames(sk); | ||
533 | out: | ||
534 | up->len = 0; | ||
535 | up->pending = 0; | ||
536 | if (!err) | ||
537 | UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); | ||
538 | return err; | ||
539 | } | ||
540 | |||
541 | int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
542 | size_t len) | ||
543 | { | ||
544 | struct inet_sock *inet = inet_sk(sk); | ||
545 | struct udp_sock *up = udp_sk(sk); | ||
546 | int ulen = len; | ||
547 | struct ipcm_cookie ipc; | ||
548 | struct rtable *rt = NULL; | ||
549 | int free = 0; | ||
550 | int connected = 0; | ||
551 | __be32 daddr, faddr, saddr; | ||
552 | __be16 dport; | ||
553 | u8 tos; | ||
554 | int err, is_udplite = IS_UDPLITE(sk); | ||
555 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; | ||
556 | int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); | ||
557 | |||
558 | if (len > 0xFFFF) | ||
559 | return -EMSGSIZE; | ||
560 | |||
561 | /* | ||
562 | * Check the flags. | ||
563 | */ | ||
564 | |||
565 | if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */ | ||
566 | return -EOPNOTSUPP; | ||
567 | |||
568 | ipc.opt = NULL; | ||
569 | |||
570 | if (up->pending) { | ||
571 | /* | ||
572 | * There are pending frames. | ||
573 | * The socket lock must be held while it's corked. | ||
574 | */ | ||
575 | lock_sock(sk); | ||
576 | if (likely(up->pending)) { | ||
577 | if (unlikely(up->pending != AF_INET)) { | ||
578 | release_sock(sk); | ||
579 | return -EINVAL; | ||
580 | } | ||
581 | goto do_append_data; | ||
582 | } | ||
583 | release_sock(sk); | ||
584 | } | ||
585 | ulen += sizeof(struct udphdr); | ||
586 | |||
587 | /* | ||
588 | * Get and verify the address. | ||
589 | */ | ||
590 | if (msg->msg_name) { | ||
591 | struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name; | ||
592 | if (msg->msg_namelen < sizeof(*usin)) | ||
593 | return -EINVAL; | ||
594 | if (usin->sin_family != AF_INET) { | ||
595 | if (usin->sin_family != AF_UNSPEC) | ||
596 | return -EAFNOSUPPORT; | ||
597 | } | ||
598 | |||
599 | daddr = usin->sin_addr.s_addr; | ||
600 | dport = usin->sin_port; | ||
601 | if (dport == 0) | ||
602 | return -EINVAL; | ||
603 | } else { | ||
604 | if (sk->sk_state != TCP_ESTABLISHED) | ||
605 | return -EDESTADDRREQ; | ||
606 | daddr = inet->daddr; | ||
607 | dport = inet->dport; | ||
608 | /* Open fast path for connected socket. | ||
609 | Route will not be used, if at least one option is set. | ||
610 | */ | ||
611 | connected = 1; | ||
612 | } | ||
613 | ipc.addr = inet->saddr; | ||
614 | |||
615 | ipc.oif = sk->sk_bound_dev_if; | ||
616 | if (msg->msg_controllen) { | ||
617 | err = ip_cmsg_send(msg, &ipc); | ||
618 | if (err) | ||
619 | return err; | ||
620 | if (ipc.opt) | ||
621 | free = 1; | ||
622 | connected = 0; | ||
623 | } | ||
624 | if (!ipc.opt) | ||
625 | ipc.opt = inet->opt; | ||
626 | |||
627 | saddr = ipc.addr; | ||
628 | ipc.addr = faddr = daddr; | ||
629 | |||
630 | if (ipc.opt && ipc.opt->srr) { | ||
631 | if (!daddr) | ||
632 | return -EINVAL; | ||
633 | faddr = ipc.opt->faddr; | ||
634 | connected = 0; | ||
635 | } | ||
636 | tos = RT_TOS(inet->tos); | ||
637 | if (sock_flag(sk, SOCK_LOCALROUTE) || | ||
638 | (msg->msg_flags & MSG_DONTROUTE) || | ||
639 | (ipc.opt && ipc.opt->is_strictroute)) { | ||
640 | tos |= RTO_ONLINK; | ||
641 | connected = 0; | ||
642 | } | ||
643 | |||
644 | if (ipv4_is_multicast(daddr)) { | ||
645 | if (!ipc.oif) | ||
646 | ipc.oif = inet->mc_index; | ||
647 | if (!saddr) | ||
648 | saddr = inet->mc_addr; | ||
649 | connected = 0; | ||
650 | } | ||
651 | |||
652 | if (connected) | ||
653 | rt = (struct rtable*)sk_dst_check(sk, 0); | ||
654 | |||
655 | if (rt == NULL) { | ||
656 | struct flowi fl = { .oif = ipc.oif, | ||
657 | .nl_u = { .ip4_u = | ||
658 | { .daddr = faddr, | ||
659 | .saddr = saddr, | ||
660 | .tos = tos } }, | ||
661 | .proto = sk->sk_protocol, | ||
662 | .uli_u = { .ports = | ||
663 | { .sport = inet->sport, | ||
664 | .dport = dport } } }; | ||
665 | security_sk_classify_flow(sk, &fl); | ||
666 | err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1); | ||
667 | if (err) { | ||
668 | if (err == -ENETUNREACH) | ||
669 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | ||
670 | goto out; | ||
671 | } | ||
672 | |||
673 | err = -EACCES; | ||
674 | if ((rt->rt_flags & RTCF_BROADCAST) && | ||
675 | !sock_flag(sk, SOCK_BROADCAST)) | ||
676 | goto out; | ||
677 | if (connected) | ||
678 | sk_dst_set(sk, dst_clone(&rt->u.dst)); | ||
679 | } | ||
680 | |||
681 | if (msg->msg_flags&MSG_CONFIRM) | ||
682 | goto do_confirm; | ||
683 | back_from_confirm: | ||
684 | |||
685 | saddr = rt->rt_src; | ||
686 | if (!ipc.addr) | ||
687 | daddr = ipc.addr = rt->rt_dst; | ||
688 | |||
689 | lock_sock(sk); | ||
690 | if (unlikely(up->pending)) { | ||
691 | /* The socket is already corked while preparing it. */ | ||
692 | /* ... which is an evident application bug. --ANK */ | ||
693 | release_sock(sk); | ||
694 | |||
695 | LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); | ||
696 | err = -EINVAL; | ||
697 | goto out; | ||
698 | } | ||
699 | /* | ||
700 | * Now cork the socket to pend data. | ||
701 | */ | ||
702 | inet->cork.fl.fl4_dst = daddr; | ||
703 | inet->cork.fl.fl_ip_dport = dport; | ||
704 | inet->cork.fl.fl4_src = saddr; | ||
705 | inet->cork.fl.fl_ip_sport = inet->sport; | ||
706 | up->pending = AF_INET; | ||
707 | |||
708 | do_append_data: | ||
709 | up->len += ulen; | ||
710 | getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; | ||
711 | err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, | ||
712 | sizeof(struct udphdr), &ipc, rt, | ||
713 | corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); | ||
714 | if (err) | ||
715 | udp_flush_pending_frames(sk); | ||
716 | else if (!corkreq) | ||
717 | err = udp_push_pending_frames(sk); | ||
718 | else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) | ||
719 | up->pending = 0; | ||
720 | release_sock(sk); | ||
721 | |||
722 | out: | ||
723 | ip_rt_put(rt); | ||
724 | if (free) | ||
725 | kfree(ipc.opt); | ||
726 | if (!err) | ||
727 | return len; | ||
728 | /* | ||
729 | * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting | ||
730 | * ENOBUFS might not be good (it's not tunable per se), but otherwise | ||
731 | * we don't have a good statistic (IpOutDiscards but it can be too many | ||
732 | * things). We could add another new stat but at least for now that | ||
733 | * seems like overkill. | ||
734 | */ | ||
735 | if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { | ||
736 | UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); | ||
737 | } | ||
738 | return err; | ||
739 | |||
740 | do_confirm: | ||
741 | dst_confirm(&rt->u.dst); | ||
742 | if (!(msg->msg_flags&MSG_PROBE) || len) | ||
743 | goto back_from_confirm; | ||
744 | err = 0; | ||
745 | goto out; | ||
746 | } | ||
747 | |||
748 | int udp_sendpage(struct sock *sk, struct page *page, int offset, | ||
749 | size_t size, int flags) | ||
750 | { | ||
751 | struct udp_sock *up = udp_sk(sk); | ||
752 | int ret; | ||
753 | |||
754 | if (!up->pending) { | ||
755 | struct msghdr msg = { .msg_flags = flags|MSG_MORE }; | ||
756 | |||
757 | /* Call udp_sendmsg to specify destination address which | ||
758 | * sendpage interface can't pass. | ||
759 | * This will succeed only when the socket is connected. | ||
760 | */ | ||
761 | ret = udp_sendmsg(NULL, sk, &msg, 0); | ||
762 | if (ret < 0) | ||
763 | return ret; | ||
764 | } | ||
765 | |||
766 | lock_sock(sk); | ||
767 | |||
768 | if (unlikely(!up->pending)) { | ||
769 | release_sock(sk); | ||
770 | |||
771 | LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n"); | ||
772 | return -EINVAL; | ||
773 | } | ||
774 | |||
775 | ret = ip_append_page(sk, page, offset, size, flags); | ||
776 | if (ret == -EOPNOTSUPP) { | ||
777 | release_sock(sk); | ||
778 | return sock_no_sendpage(sk->sk_socket, page, offset, | ||
779 | size, flags); | ||
780 | } | ||
781 | if (ret < 0) { | ||
782 | udp_flush_pending_frames(sk); | ||
783 | goto out; | ||
784 | } | ||
785 | |||
786 | up->len += size; | ||
787 | if (!(up->corkflag || (flags&MSG_MORE))) | ||
788 | ret = udp_push_pending_frames(sk); | ||
789 | if (!ret) | ||
790 | ret = size; | ||
791 | out: | ||
792 | release_sock(sk); | ||
793 | return ret; | ||
794 | } | ||
795 | |||
796 | /* | 249 | /* |
797 | * IOCTL requests applicable to the UDP protocol | 250 | * IOCTL requests applicable to the UDP protocol |
798 | */ | 251 | */ |
@@ -833,107 +286,6 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
833 | return 0; | 286 | return 0; |
834 | } | 287 | } |
835 | 288 | ||
836 | /* | ||
837 | * This should be easy, if there is something there we | ||
838 | * return it, otherwise we block. | ||
839 | */ | ||
840 | |||
841 | int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
842 | size_t len, int noblock, int flags, int *addr_len) | ||
843 | { | ||
844 | struct inet_sock *inet = inet_sk(sk); | ||
845 | struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; | ||
846 | struct sk_buff *skb; | ||
847 | unsigned int ulen, copied; | ||
848 | int peeked; | ||
849 | int err; | ||
850 | int is_udplite = IS_UDPLITE(sk); | ||
851 | |||
852 | /* | ||
853 | * Check any passed addresses | ||
854 | */ | ||
855 | if (addr_len) | ||
856 | *addr_len=sizeof(*sin); | ||
857 | |||
858 | if (flags & MSG_ERRQUEUE) | ||
859 | return ip_recv_error(sk, msg, len); | ||
860 | |||
861 | try_again: | ||
862 | skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), | ||
863 | &peeked, &err); | ||
864 | if (!skb) | ||
865 | goto out; | ||
866 | |||
867 | ulen = skb->len - sizeof(struct udphdr); | ||
868 | copied = len; | ||
869 | if (copied > ulen) | ||
870 | copied = ulen; | ||
871 | else if (copied < ulen) | ||
872 | msg->msg_flags |= MSG_TRUNC; | ||
873 | |||
874 | /* | ||
875 | * If checksum is needed at all, try to do it while copying the | ||
876 | * data. If the data is truncated, or if we only want a partial | ||
877 | * coverage checksum (UDP-Lite), do it before the copy. | ||
878 | */ | ||
879 | |||
880 | if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { | ||
881 | if (udp_lib_checksum_complete(skb)) | ||
882 | goto csum_copy_err; | ||
883 | } | ||
884 | |||
885 | if (skb_csum_unnecessary(skb)) | ||
886 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), | ||
887 | msg->msg_iov, copied ); | ||
888 | else { | ||
889 | err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); | ||
890 | |||
891 | if (err == -EINVAL) | ||
892 | goto csum_copy_err; | ||
893 | } | ||
894 | |||
895 | if (err) | ||
896 | goto out_free; | ||
897 | |||
898 | if (!peeked) | ||
899 | UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); | ||
900 | |||
901 | sock_recv_timestamp(msg, sk, skb); | ||
902 | |||
903 | /* Copy the address. */ | ||
904 | if (sin) | ||
905 | { | ||
906 | sin->sin_family = AF_INET; | ||
907 | sin->sin_port = udp_hdr(skb)->source; | ||
908 | sin->sin_addr.s_addr = ip_hdr(skb)->saddr; | ||
909 | memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); | ||
910 | } | ||
911 | if (inet->cmsg_flags) | ||
912 | ip_cmsg_recv(msg, skb); | ||
913 | |||
914 | err = copied; | ||
915 | if (flags & MSG_TRUNC) | ||
916 | err = ulen; | ||
917 | |||
918 | out_free: | ||
919 | lock_sock(sk); | ||
920 | skb_free_datagram(sk, skb); | ||
921 | release_sock(sk); | ||
922 | out: | ||
923 | return err; | ||
924 | |||
925 | csum_copy_err: | ||
926 | lock_sock(sk); | ||
927 | if (!skb_kill_datagram(sk, skb, flags)) | ||
928 | UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); | ||
929 | release_sock(sk); | ||
930 | |||
931 | if (noblock) | ||
932 | return -EAGAIN; | ||
933 | goto try_again; | ||
934 | } | ||
935 | |||
936 | |||
937 | int udp_disconnect(struct sock *sk, int flags) | 289 | int udp_disconnect(struct sock *sk, int flags) |
938 | { | 290 | { |
939 | struct inet_sock *inet = inet_sk(sk); | 291 | struct inet_sock *inet = inet_sk(sk); |
@@ -956,319 +308,6 @@ int udp_disconnect(struct sock *sk, int flags) | |||
956 | return 0; | 308 | return 0; |
957 | } | 309 | } |
958 | 310 | ||
959 | /* returns: | ||
960 | * -1: error | ||
961 | * 0: success | ||
962 | * >0: "udp encap" protocol resubmission | ||
963 | * | ||
964 | * Note that in the success and error cases, the skb is assumed to | ||
965 | * have either been requeued or freed. | ||
966 | */ | ||
967 | int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | ||
968 | { | ||
969 | struct udp_sock *up = udp_sk(sk); | ||
970 | int rc; | ||
971 | int is_udplite = IS_UDPLITE(sk); | ||
972 | |||
973 | /* | ||
974 | * Charge it to the socket, dropping if the queue is full. | ||
975 | */ | ||
976 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) | ||
977 | goto drop; | ||
978 | nf_reset(skb); | ||
979 | |||
980 | if (up->encap_type) { | ||
981 | /* | ||
982 | * This is an encapsulation socket so pass the skb to | ||
983 | * the socket's udp_encap_rcv() hook. Otherwise, just | ||
984 | * fall through and pass this up the UDP socket. | ||
985 | * up->encap_rcv() returns the following value: | ||
986 | * =0 if skb was successfully passed to the encap | ||
987 | * handler or was discarded by it. | ||
988 | * >0 if skb should be passed on to UDP. | ||
989 | * <0 if skb should be resubmitted as proto -N | ||
990 | */ | ||
991 | |||
992 | /* if we're overly short, let UDP handle it */ | ||
993 | if (skb->len > sizeof(struct udphdr) && | ||
994 | up->encap_rcv != NULL) { | ||
995 | int ret; | ||
996 | |||
997 | ret = (*up->encap_rcv)(sk, skb); | ||
998 | if (ret <= 0) { | ||
999 | UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, | ||
1000 | is_udplite); | ||
1001 | return -ret; | ||
1002 | } | ||
1003 | } | ||
1004 | |||
1005 | /* FALLTHROUGH -- it's a UDP Packet */ | ||
1006 | } | ||
1007 | |||
1008 | /* | ||
1009 | * UDP-Lite specific tests, ignored on UDP sockets | ||
1010 | */ | ||
1011 | if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { | ||
1012 | |||
1013 | /* | ||
1014 | * MIB statistics other than incrementing the error count are | ||
1015 | * disabled for the following two types of errors: these depend | ||
1016 | * on the application settings, not on the functioning of the | ||
1017 | * protocol stack as such. | ||
1018 | * | ||
1019 | * RFC 3828 here recommends (sec 3.3): "There should also be a | ||
1020 | * way ... to ... at least let the receiving application block | ||
1021 | * delivery of packets with coverage values less than a value | ||
1022 | * provided by the application." | ||
1023 | */ | ||
1024 | if (up->pcrlen == 0) { /* full coverage was set */ | ||
1025 | LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage " | ||
1026 | "%d while full coverage %d requested\n", | ||
1027 | UDP_SKB_CB(skb)->cscov, skb->len); | ||
1028 | goto drop; | ||
1029 | } | ||
1030 | /* The next case involves violating the min. coverage requested | ||
1031 | * by the receiver. This is subtle: if receiver wants x and x is | ||
1032 | * greater than the buffersize/MTU then receiver will complain | ||
1033 | * that it wants x while sender emits packets of smaller size y. | ||
1034 | * Therefore the above ...()->partial_cov statement is essential. | ||
1035 | */ | ||
1036 | if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { | ||
1037 | LIMIT_NETDEBUG(KERN_WARNING | ||
1038 | "UDPLITE: coverage %d too small, need min %d\n", | ||
1039 | UDP_SKB_CB(skb)->cscov, up->pcrlen); | ||
1040 | goto drop; | ||
1041 | } | ||
1042 | } | ||
1043 | |||
1044 | if (sk->sk_filter) { | ||
1045 | if (udp_lib_checksum_complete(skb)) | ||
1046 | goto drop; | ||
1047 | } | ||
1048 | |||
1049 | if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { | ||
1050 | /* Note that an ENOMEM error is charged twice */ | ||
1051 | if (rc == -ENOMEM) | ||
1052 | UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); | ||
1053 | goto drop; | ||
1054 | } | ||
1055 | |||
1056 | return 0; | ||
1057 | |||
1058 | drop: | ||
1059 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); | ||
1060 | kfree_skb(skb); | ||
1061 | return -1; | ||
1062 | } | ||
1063 | |||
1064 | /* | ||
1065 | * Multicasts and broadcasts go to each listener. | ||
1066 | * | ||
1067 | * Note: called only from the BH handler context, | ||
1068 | * so we don't need to lock the hashes. | ||
1069 | */ | ||
1070 | static int __udp4_lib_mcast_deliver(struct sk_buff *skb, | ||
1071 | struct udphdr *uh, | ||
1072 | __be32 saddr, __be32 daddr, | ||
1073 | struct hlist_head udptable[]) | ||
1074 | { | ||
1075 | struct sock *sk; | ||
1076 | int dif; | ||
1077 | |||
1078 | read_lock(&udp_hash_lock); | ||
1079 | sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); | ||
1080 | dif = skb->dev->ifindex; | ||
1081 | sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); | ||
1082 | if (sk) { | ||
1083 | struct sock *sknext = NULL; | ||
1084 | |||
1085 | do { | ||
1086 | struct sk_buff *skb1 = skb; | ||
1087 | |||
1088 | sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, | ||
1089 | uh->source, saddr, dif); | ||
1090 | if (sknext) | ||
1091 | skb1 = skb_clone(skb, GFP_ATOMIC); | ||
1092 | |||
1093 | if (skb1) { | ||
1094 | int ret = 0; | ||
1095 | |||
1096 | bh_lock_sock_nested(sk); | ||
1097 | if (!sock_owned_by_user(sk)) | ||
1098 | ret = udp_queue_rcv_skb(sk, skb1); | ||
1099 | else | ||
1100 | sk_add_backlog(sk, skb1); | ||
1101 | bh_unlock_sock(sk); | ||
1102 | |||
1103 | if (ret > 0) | ||
1104 | /* we should probably re-process instead | ||
1105 | * of dropping packets here. */ | ||
1106 | kfree_skb(skb1); | ||
1107 | } | ||
1108 | sk = sknext; | ||
1109 | } while (sknext); | ||
1110 | } else | ||
1111 | kfree_skb(skb); | ||
1112 | read_unlock(&udp_hash_lock); | ||
1113 | return 0; | ||
1114 | } | ||
1115 | |||
1116 | /* Initialize UDP checksum. If exited with zero value (success), | ||
1117 | * CHECKSUM_UNNECESSARY means, that no more checks are required. | ||
1118 | * Otherwise, csum completion requires chacksumming packet body, | ||
1119 | * including udp header and folding it to skb->csum. | ||
1120 | */ | ||
1121 | static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, | ||
1122 | int proto) | ||
1123 | { | ||
1124 | const struct iphdr *iph; | ||
1125 | int err; | ||
1126 | |||
1127 | UDP_SKB_CB(skb)->partial_cov = 0; | ||
1128 | UDP_SKB_CB(skb)->cscov = skb->len; | ||
1129 | |||
1130 | if (proto == IPPROTO_UDPLITE) { | ||
1131 | err = udplite_checksum_init(skb, uh); | ||
1132 | if (err) | ||
1133 | return err; | ||
1134 | } | ||
1135 | |||
1136 | iph = ip_hdr(skb); | ||
1137 | if (uh->check == 0) { | ||
1138 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
1139 | } else if (skb->ip_summed == CHECKSUM_COMPLETE) { | ||
1140 | if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, | ||
1141 | proto, skb->csum)) | ||
1142 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
1143 | } | ||
1144 | if (!skb_csum_unnecessary(skb)) | ||
1145 | skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, | ||
1146 | skb->len, proto, 0); | ||
1147 | /* Probably, we should checksum udp header (it should be in cache | ||
1148 | * in any case) and data in tiny packets (< rx copybreak). | ||
1149 | */ | ||
1150 | |||
1151 | return 0; | ||
1152 | } | ||
1153 | |||
1154 | /* | ||
1155 | * All we need to do is get the socket, and then do a checksum. | ||
1156 | */ | ||
1157 | |||
1158 | int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], | ||
1159 | int proto) | ||
1160 | { | ||
1161 | struct sock *sk; | ||
1162 | struct udphdr *uh = udp_hdr(skb); | ||
1163 | unsigned short ulen; | ||
1164 | struct rtable *rt = (struct rtable*)skb->dst; | ||
1165 | __be32 saddr = ip_hdr(skb)->saddr; | ||
1166 | __be32 daddr = ip_hdr(skb)->daddr; | ||
1167 | |||
1168 | /* | ||
1169 | * Validate the packet. | ||
1170 | */ | ||
1171 | if (!pskb_may_pull(skb, sizeof(struct udphdr))) | ||
1172 | goto drop; /* No space for header. */ | ||
1173 | |||
1174 | ulen = ntohs(uh->len); | ||
1175 | if (ulen > skb->len) | ||
1176 | goto short_packet; | ||
1177 | |||
1178 | if (proto == IPPROTO_UDP) { | ||
1179 | /* UDP validates ulen. */ | ||
1180 | if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) | ||
1181 | goto short_packet; | ||
1182 | uh = udp_hdr(skb); | ||
1183 | } | ||
1184 | |||
1185 | if (udp4_csum_init(skb, uh, proto)) | ||
1186 | goto csum_error; | ||
1187 | |||
1188 | if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) | ||
1189 | return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); | ||
1190 | |||
1191 | sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr, | ||
1192 | uh->dest, inet_iif(skb), udptable); | ||
1193 | |||
1194 | if (sk != NULL) { | ||
1195 | int ret = 0; | ||
1196 | bh_lock_sock_nested(sk); | ||
1197 | if (!sock_owned_by_user(sk)) | ||
1198 | ret = udp_queue_rcv_skb(sk, skb); | ||
1199 | else | ||
1200 | sk_add_backlog(sk, skb); | ||
1201 | bh_unlock_sock(sk); | ||
1202 | sock_put(sk); | ||
1203 | |||
1204 | /* a return value > 0 means to resubmit the input, but | ||
1205 | * it wants the return to be -protocol, or 0 | ||
1206 | */ | ||
1207 | if (ret > 0) | ||
1208 | return -ret; | ||
1209 | return 0; | ||
1210 | } | ||
1211 | |||
1212 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) | ||
1213 | goto drop; | ||
1214 | nf_reset(skb); | ||
1215 | |||
1216 | /* No socket. Drop packet silently, if checksum is wrong */ | ||
1217 | if (udp_lib_checksum_complete(skb)) | ||
1218 | goto csum_error; | ||
1219 | |||
1220 | UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); | ||
1221 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | ||
1222 | |||
1223 | /* | ||
1224 | * Hmm. We got an UDP packet to a port to which we | ||
1225 | * don't wanna listen. Ignore it. | ||
1226 | */ | ||
1227 | kfree_skb(skb); | ||
1228 | return 0; | ||
1229 | |||
1230 | short_packet: | ||
1231 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", | ||
1232 | proto == IPPROTO_UDPLITE ? "-Lite" : "", | ||
1233 | NIPQUAD(saddr), | ||
1234 | ntohs(uh->source), | ||
1235 | ulen, | ||
1236 | skb->len, | ||
1237 | NIPQUAD(daddr), | ||
1238 | ntohs(uh->dest)); | ||
1239 | goto drop; | ||
1240 | |||
1241 | csum_error: | ||
1242 | /* | ||
1243 | * RFC1122: OK. Discards the bad packet silently (as far as | ||
1244 | * the network is concerned, anyway) as per 4.1.3.4 (MUST). | ||
1245 | */ | ||
1246 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", | ||
1247 | proto == IPPROTO_UDPLITE ? "-Lite" : "", | ||
1248 | NIPQUAD(saddr), | ||
1249 | ntohs(uh->source), | ||
1250 | NIPQUAD(daddr), | ||
1251 | ntohs(uh->dest), | ||
1252 | ulen); | ||
1253 | drop: | ||
1254 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); | ||
1255 | kfree_skb(skb); | ||
1256 | return 0; | ||
1257 | } | ||
1258 | |||
1259 | int udp_rcv(struct sk_buff *skb) | ||
1260 | { | ||
1261 | return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); | ||
1262 | } | ||
1263 | |||
1264 | int udp_destroy_sock(struct sock *sk) | ||
1265 | { | ||
1266 | lock_sock(sk); | ||
1267 | udp_flush_pending_frames(sk); | ||
1268 | release_sock(sk); | ||
1269 | return 0; | ||
1270 | } | ||
1271 | |||
1272 | /* | 311 | /* |
1273 | * Socket option code for UDP | 312 | * Socket option code for UDP |
1274 | */ | 313 | */ |
@@ -1279,7 +318,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
1279 | struct udp_sock *up = udp_sk(sk); | 318 | struct udp_sock *up = udp_sk(sk); |
1280 | int val; | 319 | int val; |
1281 | int err = 0; | 320 | int err = 0; |
321 | #ifdef CONFIG_IP_UDPLITE | ||
1282 | int is_udplite = IS_UDPLITE(sk); | 322 | int is_udplite = IS_UDPLITE(sk); |
323 | #endif | ||
1283 | 324 | ||
1284 | if (optlen<sizeof(int)) | 325 | if (optlen<sizeof(int)) |
1285 | return -EINVAL; | 326 | return -EINVAL; |
@@ -1315,6 +356,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
1315 | } | 356 | } |
1316 | break; | 357 | break; |
1317 | 358 | ||
359 | #ifdef CONFIG_IP_UDPLITE | ||
1318 | /* | 360 | /* |
1319 | * UDP-Lite's partial checksum coverage (RFC 3828). | 361 | * UDP-Lite's partial checksum coverage (RFC 3828). |
1320 | */ | 362 | */ |
@@ -1340,6 +382,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
1340 | up->pcrlen = val; | 382 | up->pcrlen = val; |
1341 | up->pcflag |= UDPLITE_RECV_CC; | 383 | up->pcflag |= UDPLITE_RECV_CC; |
1342 | break; | 384 | break; |
385 | #endif | ||
1343 | 386 | ||
1344 | default: | 387 | default: |
1345 | err = -ENOPROTOOPT; | 388 | err = -ENOPROTOOPT; |
@@ -1349,26 +392,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
1349 | return err; | 392 | return err; |
1350 | } | 393 | } |
1351 | 394 | ||
1352 | int udp_setsockopt(struct sock *sk, int level, int optname, | ||
1353 | char __user *optval, int optlen) | ||
1354 | { | ||
1355 | if (level == SOL_UDP || level == SOL_UDPLITE) | ||
1356 | return udp_lib_setsockopt(sk, level, optname, optval, optlen, | ||
1357 | udp_push_pending_frames); | ||
1358 | return ip_setsockopt(sk, level, optname, optval, optlen); | ||
1359 | } | ||
1360 | |||
1361 | #ifdef CONFIG_COMPAT | ||
1362 | int compat_udp_setsockopt(struct sock *sk, int level, int optname, | ||
1363 | char __user *optval, int optlen) | ||
1364 | { | ||
1365 | if (level == SOL_UDP || level == SOL_UDPLITE) | ||
1366 | return udp_lib_setsockopt(sk, level, optname, optval, optlen, | ||
1367 | udp_push_pending_frames); | ||
1368 | return compat_ip_setsockopt(sk, level, optname, optval, optlen); | ||
1369 | } | ||
1370 | #endif | ||
1371 | |||
1372 | int udp_lib_getsockopt(struct sock *sk, int level, int optname, | 395 | int udp_lib_getsockopt(struct sock *sk, int level, int optname, |
1373 | char __user *optval, int __user *optlen) | 396 | char __user *optval, int __user *optlen) |
1374 | { | 397 | { |
@@ -1413,23 +436,6 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, | |||
1413 | return 0; | 436 | return 0; |
1414 | } | 437 | } |
1415 | 438 | ||
1416 | int udp_getsockopt(struct sock *sk, int level, int optname, | ||
1417 | char __user *optval, int __user *optlen) | ||
1418 | { | ||
1419 | if (level == SOL_UDP || level == SOL_UDPLITE) | ||
1420 | return udp_lib_getsockopt(sk, level, optname, optval, optlen); | ||
1421 | return ip_getsockopt(sk, level, optname, optval, optlen); | ||
1422 | } | ||
1423 | |||
1424 | #ifdef CONFIG_COMPAT | ||
1425 | int compat_udp_getsockopt(struct sock *sk, int level, int optname, | ||
1426 | char __user *optval, int __user *optlen) | ||
1427 | { | ||
1428 | if (level == SOL_UDP || level == SOL_UDPLITE) | ||
1429 | return udp_lib_getsockopt(sk, level, optname, optval, optlen); | ||
1430 | return compat_ip_getsockopt(sk, level, optname, optval, optlen); | ||
1431 | } | ||
1432 | #endif | ||
1433 | /** | 439 | /** |
1434 | * udp_poll - wait for a UDP event. | 440 | * udp_poll - wait for a UDP event. |
1435 | * @file - file struct | 441 | * @file - file struct |
@@ -1474,36 +480,6 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
1474 | 480 | ||
1475 | } | 481 | } |
1476 | 482 | ||
1477 | DEFINE_PROTO_INUSE(udp) | ||
1478 | |||
1479 | struct proto udp_prot = { | ||
1480 | .name = "UDP", | ||
1481 | .owner = THIS_MODULE, | ||
1482 | .close = udp_lib_close, | ||
1483 | .connect = ip4_datagram_connect, | ||
1484 | .disconnect = udp_disconnect, | ||
1485 | .ioctl = udp_ioctl, | ||
1486 | .destroy = udp_destroy_sock, | ||
1487 | .setsockopt = udp_setsockopt, | ||
1488 | .getsockopt = udp_getsockopt, | ||
1489 | .sendmsg = udp_sendmsg, | ||
1490 | .recvmsg = udp_recvmsg, | ||
1491 | .sendpage = udp_sendpage, | ||
1492 | .backlog_rcv = udp_queue_rcv_skb, | ||
1493 | .hash = udp_lib_hash, | ||
1494 | .unhash = udp_lib_unhash, | ||
1495 | .get_port = udp_v4_get_port, | ||
1496 | .memory_allocated = &udp_memory_allocated, | ||
1497 | .sysctl_mem = sysctl_udp_mem, | ||
1498 | .sysctl_wmem = &sysctl_udp_wmem_min, | ||
1499 | .sysctl_rmem = &sysctl_udp_rmem_min, | ||
1500 | .obj_size = sizeof(struct udp_sock), | ||
1501 | #ifdef CONFIG_COMPAT | ||
1502 | .compat_setsockopt = compat_udp_setsockopt, | ||
1503 | .compat_getsockopt = compat_udp_getsockopt, | ||
1504 | #endif | ||
1505 | REF_PROTO_INUSE(udp) | ||
1506 | }; | ||
1507 | 483 | ||
1508 | /* ------------------------------------------------------------------------ */ | 484 | /* ------------------------------------------------------------------------ */ |
1509 | #ifdef CONFIG_PROC_FS | 485 | #ifdef CONFIG_PROC_FS |
@@ -1636,62 +612,6 @@ void udp_proc_unregister(struct udp_seq_afinfo *afinfo) | |||
1636 | proc_net_remove(&init_net, afinfo->name); | 612 | proc_net_remove(&init_net, afinfo->name); |
1637 | memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); | 613 | memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); |
1638 | } | 614 | } |
1639 | |||
1640 | /* ------------------------------------------------------------------------ */ | ||
1641 | static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket) | ||
1642 | { | ||
1643 | struct inet_sock *inet = inet_sk(sp); | ||
1644 | __be32 dest = inet->daddr; | ||
1645 | __be32 src = inet->rcv_saddr; | ||
1646 | __u16 destp = ntohs(inet->dport); | ||
1647 | __u16 srcp = ntohs(inet->sport); | ||
1648 | |||
1649 | sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" | ||
1650 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p", | ||
1651 | bucket, src, srcp, dest, destp, sp->sk_state, | ||
1652 | atomic_read(&sp->sk_wmem_alloc), | ||
1653 | atomic_read(&sp->sk_rmem_alloc), | ||
1654 | 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), | ||
1655 | atomic_read(&sp->sk_refcnt), sp); | ||
1656 | } | ||
1657 | |||
1658 | int udp4_seq_show(struct seq_file *seq, void *v) | ||
1659 | { | ||
1660 | if (v == SEQ_START_TOKEN) | ||
1661 | seq_printf(seq, "%-127s\n", | ||
1662 | " sl local_address rem_address st tx_queue " | ||
1663 | "rx_queue tr tm->when retrnsmt uid timeout " | ||
1664 | "inode"); | ||
1665 | else { | ||
1666 | char tmpbuf[129]; | ||
1667 | struct udp_iter_state *state = seq->private; | ||
1668 | |||
1669 | udp4_format_sock(v, tmpbuf, state->bucket); | ||
1670 | seq_printf(seq, "%-127s\n", tmpbuf); | ||
1671 | } | ||
1672 | return 0; | ||
1673 | } | ||
1674 | |||
1675 | /* ------------------------------------------------------------------------ */ | ||
1676 | static struct file_operations udp4_seq_fops; | ||
1677 | static struct udp_seq_afinfo udp4_seq_afinfo = { | ||
1678 | .owner = THIS_MODULE, | ||
1679 | .name = "udp", | ||
1680 | .family = AF_INET, | ||
1681 | .hashtable = udp_hash, | ||
1682 | .seq_show = udp4_seq_show, | ||
1683 | .seq_fops = &udp4_seq_fops, | ||
1684 | }; | ||
1685 | |||
1686 | int __init udp4_proc_init(void) | ||
1687 | { | ||
1688 | return udp_proc_register(&udp4_seq_afinfo); | ||
1689 | } | ||
1690 | |||
1691 | void udp4_proc_exit(void) | ||
1692 | { | ||
1693 | udp_proc_unregister(&udp4_seq_afinfo); | ||
1694 | } | ||
1695 | #endif /* CONFIG_PROC_FS */ | 615 | #endif /* CONFIG_PROC_FS */ |
1696 | 616 | ||
1697 | void __init udp_init(void) | 617 | void __init udp_init(void) |
@@ -1718,8 +638,6 @@ EXPORT_SYMBOL(udp_hash); | |||
1718 | EXPORT_SYMBOL(udp_hash_lock); | 638 | EXPORT_SYMBOL(udp_hash_lock); |
1719 | EXPORT_SYMBOL(udp_ioctl); | 639 | EXPORT_SYMBOL(udp_ioctl); |
1720 | EXPORT_SYMBOL(udp_get_port); | 640 | EXPORT_SYMBOL(udp_get_port); |
1721 | EXPORT_SYMBOL(udp_prot); | ||
1722 | EXPORT_SYMBOL(udp_sendmsg); | ||
1723 | EXPORT_SYMBOL(udp_lib_getsockopt); | 641 | EXPORT_SYMBOL(udp_lib_getsockopt); |
1724 | EXPORT_SYMBOL(udp_lib_setsockopt); | 642 | EXPORT_SYMBOL(udp_lib_setsockopt); |
1725 | EXPORT_SYMBOL(udp_poll); | 643 | EXPORT_SYMBOL(udp_poll); |
diff --git a/net/ipv4/udp_ipv4.c b/net/ipv4/udp_ipv4.c new file mode 100644 index 000000000000..40978de7fb51 --- /dev/null +++ b/net/ipv4/udp_ipv4.c | |||
@@ -0,0 +1,1134 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * UDP for IPv4. | ||
7 | * | ||
8 | * For full credits, see net/ipv4/udp.c. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or (at your option) any later version. | ||
14 | */ | ||
15 | |||
16 | #include <asm/system.h> | ||
17 | #include <asm/uaccess.h> | ||
18 | #include <asm/ioctls.h> | ||
19 | #include <linux/bootmem.h> | ||
20 | #include <linux/types.h> | ||
21 | #include <linux/fcntl.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/socket.h> | ||
24 | #include <linux/sockios.h> | ||
25 | #include <linux/igmp.h> | ||
26 | #include <linux/in.h> | ||
27 | #include <linux/errno.h> | ||
28 | #include <linux/timer.h> | ||
29 | #include <linux/mm.h> | ||
30 | #include <linux/inet.h> | ||
31 | #include <linux/netdevice.h> | ||
32 | #include <net/tcp_states.h> | ||
33 | #include <linux/skbuff.h> | ||
34 | #include <linux/proc_fs.h> | ||
35 | #include <linux/seq_file.h> | ||
36 | #include <net/net_namespace.h> | ||
37 | #include <net/icmp.h> | ||
38 | #include <net/route.h> | ||
39 | #include <net/checksum.h> | ||
40 | #include <net/xfrm.h> | ||
41 | #include "udp_impl.h" | ||
42 | |||
43 | int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) | ||
44 | { | ||
45 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); | ||
46 | |||
47 | return ( !ipv6_only_sock(sk2) && | ||
48 | (!inet1->rcv_saddr || !inet2->rcv_saddr || | ||
49 | inet1->rcv_saddr == inet2->rcv_saddr )); | ||
50 | } | ||
51 | |||
52 | static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) | ||
53 | { | ||
54 | return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); | ||
55 | } | ||
56 | |||
57 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try | ||
58 | * harder than this. -DaveM | ||
59 | */ | ||
60 | static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, | ||
61 | __be16 sport, __be32 daddr, __be16 dport, | ||
62 | int dif, struct hlist_head udptable[]) | ||
63 | { | ||
64 | struct sock *sk, *result = NULL; | ||
65 | struct hlist_node *node; | ||
66 | unsigned short hnum = ntohs(dport); | ||
67 | int badness = -1; | ||
68 | |||
69 | read_lock(&udp_hash_lock); | ||
70 | sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { | ||
71 | struct inet_sock *inet = inet_sk(sk); | ||
72 | |||
73 | if (sk->sk_net == net && sk->sk_hash == hnum && | ||
74 | !ipv6_only_sock(sk)) { | ||
75 | int score = (sk->sk_family == PF_INET ? 1 : 0); | ||
76 | if (inet->rcv_saddr) { | ||
77 | if (inet->rcv_saddr != daddr) | ||
78 | continue; | ||
79 | score+=2; | ||
80 | } | ||
81 | if (inet->daddr) { | ||
82 | if (inet->daddr != saddr) | ||
83 | continue; | ||
84 | score+=2; | ||
85 | } | ||
86 | if (inet->dport) { | ||
87 | if (inet->dport != sport) | ||
88 | continue; | ||
89 | score+=2; | ||
90 | } | ||
91 | if (sk->sk_bound_dev_if) { | ||
92 | if (sk->sk_bound_dev_if != dif) | ||
93 | continue; | ||
94 | score+=2; | ||
95 | } | ||
96 | if (score == 9) { | ||
97 | result = sk; | ||
98 | break; | ||
99 | } else if (score > badness) { | ||
100 | result = sk; | ||
101 | badness = score; | ||
102 | } | ||
103 | } | ||
104 | } | ||
105 | if (result) | ||
106 | sock_hold(result); | ||
107 | read_unlock(&udp_hash_lock); | ||
108 | return result; | ||
109 | } | ||
110 | |||
111 | static inline struct sock *udp_v4_mcast_next(struct sock *sk, | ||
112 | __be16 loc_port, __be32 loc_addr, | ||
113 | __be16 rmt_port, __be32 rmt_addr, | ||
114 | int dif) | ||
115 | { | ||
116 | struct hlist_node *node; | ||
117 | struct sock *s = sk; | ||
118 | unsigned short hnum = ntohs(loc_port); | ||
119 | |||
120 | sk_for_each_from(s, node) { | ||
121 | struct inet_sock *inet = inet_sk(s); | ||
122 | |||
123 | if (s->sk_hash != hnum || | ||
124 | (inet->daddr && inet->daddr != rmt_addr) || | ||
125 | (inet->dport != rmt_port && inet->dport) || | ||
126 | (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || | ||
127 | ipv6_only_sock(s) || | ||
128 | (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) | ||
129 | continue; | ||
130 | if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) | ||
131 | continue; | ||
132 | goto found; | ||
133 | } | ||
134 | s = NULL; | ||
135 | found: | ||
136 | return s; | ||
137 | } | ||
138 | |||
139 | /* | ||
140 | * This routine is called by the ICMP module when it gets some | ||
141 | * sort of error condition. If err < 0 then the socket should | ||
142 | * be closed and the error returned to the user. If err > 0 | ||
143 | * it's just the icmp type << 8 | icmp code. | ||
144 | * Header points to the ip header of the error packet. We move | ||
145 | * on past this. Then (as it used to claim before adjustment) | ||
146 | * header points to the first 8 bytes of the udp header. We need | ||
147 | * to find the appropriate port. | ||
148 | */ | ||
149 | |||
150 | void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) | ||
151 | { | ||
152 | struct inet_sock *inet; | ||
153 | struct iphdr *iph = (struct iphdr*)skb->data; | ||
154 | struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); | ||
155 | const int type = icmp_hdr(skb)->type; | ||
156 | const int code = icmp_hdr(skb)->code; | ||
157 | struct sock *sk; | ||
158 | int harderr; | ||
159 | int err; | ||
160 | |||
161 | sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest, | ||
162 | iph->saddr, uh->source, skb->dev->ifindex, udptable); | ||
163 | if (sk == NULL) { | ||
164 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); | ||
165 | return; /* No socket for error */ | ||
166 | } | ||
167 | |||
168 | err = 0; | ||
169 | harderr = 0; | ||
170 | inet = inet_sk(sk); | ||
171 | |||
172 | switch (type) { | ||
173 | default: | ||
174 | case ICMP_TIME_EXCEEDED: | ||
175 | err = EHOSTUNREACH; | ||
176 | break; | ||
177 | case ICMP_SOURCE_QUENCH: | ||
178 | goto out; | ||
179 | case ICMP_PARAMETERPROB: | ||
180 | err = EPROTO; | ||
181 | harderr = 1; | ||
182 | break; | ||
183 | case ICMP_DEST_UNREACH: | ||
184 | if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ | ||
185 | if (inet->pmtudisc != IP_PMTUDISC_DONT) { | ||
186 | err = EMSGSIZE; | ||
187 | harderr = 1; | ||
188 | break; | ||
189 | } | ||
190 | goto out; | ||
191 | } | ||
192 | err = EHOSTUNREACH; | ||
193 | if (code <= NR_ICMP_UNREACH) { | ||
194 | harderr = icmp_err_convert[code].fatal; | ||
195 | err = icmp_err_convert[code].errno; | ||
196 | } | ||
197 | break; | ||
198 | } | ||
199 | |||
200 | /* | ||
201 | * RFC1122: OK. Passes ICMP errors back to application, as per | ||
202 | * 4.1.3.3. | ||
203 | */ | ||
204 | if (!inet->recverr) { | ||
205 | if (!harderr || sk->sk_state != TCP_ESTABLISHED) | ||
206 | goto out; | ||
207 | } else { | ||
208 | ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1)); | ||
209 | } | ||
210 | sk->sk_err = err; | ||
211 | sk->sk_error_report(sk); | ||
212 | out: | ||
213 | sock_put(sk); | ||
214 | } | ||
215 | |||
216 | void udp_err(struct sk_buff *skb, u32 info) | ||
217 | { | ||
218 | __udp4_lib_err(skb, info, udp_hash); | ||
219 | } | ||
220 | |||
221 | /* | ||
222 | * Throw away all pending data and cancel the corking. Socket is locked. | ||
223 | */ | ||
224 | static void udp_flush_pending_frames(struct sock *sk) | ||
225 | { | ||
226 | struct udp_sock *up = udp_sk(sk); | ||
227 | |||
228 | if (up->pending) { | ||
229 | up->len = 0; | ||
230 | up->pending = 0; | ||
231 | ip_flush_pending_frames(sk); | ||
232 | } | ||
233 | } | ||
234 | |||
235 | /** | ||
236 | * udp4_hwcsum_outgoing - handle outgoing HW checksumming | ||
237 | * @sk: socket we are sending on | ||
238 | * @skb: sk_buff containing the filled-in UDP header | ||
239 | * (checksum field must be zeroed out) | ||
240 | */ | ||
241 | static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, | ||
242 | __be32 src, __be32 dst, int len ) | ||
243 | { | ||
244 | unsigned int offset; | ||
245 | struct udphdr *uh = udp_hdr(skb); | ||
246 | __wsum csum = 0; | ||
247 | |||
248 | if (skb_queue_len(&sk->sk_write_queue) == 1) { | ||
249 | /* | ||
250 | * Only one fragment on the socket. | ||
251 | */ | ||
252 | skb->csum_start = skb_transport_header(skb) - skb->head; | ||
253 | skb->csum_offset = offsetof(struct udphdr, check); | ||
254 | uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); | ||
255 | } else { | ||
256 | /* | ||
257 | * HW-checksum won't work as there are two or more | ||
258 | * fragments on the socket so that all csums of sk_buffs | ||
259 | * should be together | ||
260 | */ | ||
261 | offset = skb_transport_offset(skb); | ||
262 | skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); | ||
263 | |||
264 | skb->ip_summed = CHECKSUM_NONE; | ||
265 | |||
266 | skb_queue_walk(&sk->sk_write_queue, skb) { | ||
267 | csum = csum_add(csum, skb->csum); | ||
268 | } | ||
269 | |||
270 | uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); | ||
271 | if (uh->check == 0) | ||
272 | uh->check = CSUM_MANGLED_0; | ||
273 | } | ||
274 | } | ||
275 | |||
276 | /* | ||
277 | * Push out all pending data as one UDP datagram. Socket is locked. | ||
278 | */ | ||
279 | static int udp_push_pending_frames(struct sock *sk) | ||
280 | { | ||
281 | struct udp_sock *up = udp_sk(sk); | ||
282 | struct inet_sock *inet = inet_sk(sk); | ||
283 | struct flowi *fl = &inet->cork.fl; | ||
284 | struct sk_buff *skb; | ||
285 | struct udphdr *uh; | ||
286 | int err = 0; | ||
287 | int is_udplite = IS_UDPLITE(sk); | ||
288 | __wsum csum = 0; | ||
289 | |||
290 | /* Grab the skbuff where UDP header space exists. */ | ||
291 | if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) | ||
292 | goto out; | ||
293 | |||
294 | /* | ||
295 | * Create a UDP header | ||
296 | */ | ||
297 | uh = udp_hdr(skb); | ||
298 | uh->source = fl->fl_ip_sport; | ||
299 | uh->dest = fl->fl_ip_dport; | ||
300 | uh->len = htons(up->len); | ||
301 | uh->check = 0; | ||
302 | |||
303 | if (is_udplite) /* UDP-Lite */ | ||
304 | csum = udplite_csum_outgoing(sk, skb); | ||
305 | |||
306 | else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ | ||
307 | |||
308 | skb->ip_summed = CHECKSUM_NONE; | ||
309 | goto send; | ||
310 | |||
311 | } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ | ||
312 | |||
313 | udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len); | ||
314 | goto send; | ||
315 | |||
316 | } else /* `normal' UDP */ | ||
317 | csum = udp_csum_outgoing(sk, skb); | ||
318 | |||
319 | /* add protocol-dependent pseudo-header */ | ||
320 | uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, | ||
321 | sk->sk_protocol, csum ); | ||
322 | if (uh->check == 0) | ||
323 | uh->check = CSUM_MANGLED_0; | ||
324 | |||
325 | send: | ||
326 | err = ip_push_pending_frames(sk); | ||
327 | out: | ||
328 | up->len = 0; | ||
329 | up->pending = 0; | ||
330 | if (!err) | ||
331 | UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); | ||
332 | return err; | ||
333 | } | ||
334 | |||
335 | int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
336 | size_t len) | ||
337 | { | ||
338 | struct inet_sock *inet = inet_sk(sk); | ||
339 | struct udp_sock *up = udp_sk(sk); | ||
340 | int ulen = len; | ||
341 | struct ipcm_cookie ipc; | ||
342 | struct rtable *rt = NULL; | ||
343 | int free = 0; | ||
344 | int connected = 0; | ||
345 | __be32 daddr, faddr, saddr; | ||
346 | __be16 dport; | ||
347 | u8 tos; | ||
348 | int err, is_udplite = IS_UDPLITE(sk); | ||
349 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; | ||
350 | int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); | ||
351 | |||
352 | if (len > 0xFFFF) | ||
353 | return -EMSGSIZE; | ||
354 | |||
355 | /* | ||
356 | * Check the flags. | ||
357 | */ | ||
358 | |||
359 | if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */ | ||
360 | return -EOPNOTSUPP; | ||
361 | |||
362 | ipc.opt = NULL; | ||
363 | |||
364 | if (up->pending) { | ||
365 | /* | ||
366 | * There are pending frames. | ||
367 | * The socket lock must be held while it's corked. | ||
368 | */ | ||
369 | lock_sock(sk); | ||
370 | if (likely(up->pending)) { | ||
371 | if (unlikely(up->pending != AF_INET)) { | ||
372 | release_sock(sk); | ||
373 | return -EINVAL; | ||
374 | } | ||
375 | goto do_append_data; | ||
376 | } | ||
377 | release_sock(sk); | ||
378 | } | ||
379 | ulen += sizeof(struct udphdr); | ||
380 | |||
381 | /* | ||
382 | * Get and verify the address. | ||
383 | */ | ||
384 | if (msg->msg_name) { | ||
385 | struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name; | ||
386 | if (msg->msg_namelen < sizeof(*usin)) | ||
387 | return -EINVAL; | ||
388 | if (usin->sin_family != AF_INET) { | ||
389 | if (usin->sin_family != AF_UNSPEC) | ||
390 | return -EAFNOSUPPORT; | ||
391 | } | ||
392 | |||
393 | daddr = usin->sin_addr.s_addr; | ||
394 | dport = usin->sin_port; | ||
395 | if (dport == 0) | ||
396 | return -EINVAL; | ||
397 | } else { | ||
398 | if (sk->sk_state != TCP_ESTABLISHED) | ||
399 | return -EDESTADDRREQ; | ||
400 | daddr = inet->daddr; | ||
401 | dport = inet->dport; | ||
402 | /* Open fast path for connected socket. | ||
403 | Route will not be used, if at least one option is set. | ||
404 | */ | ||
405 | connected = 1; | ||
406 | } | ||
407 | ipc.addr = inet->saddr; | ||
408 | |||
409 | ipc.oif = sk->sk_bound_dev_if; | ||
410 | if (msg->msg_controllen) { | ||
411 | err = ip_cmsg_send(msg, &ipc); | ||
412 | if (err) | ||
413 | return err; | ||
414 | if (ipc.opt) | ||
415 | free = 1; | ||
416 | connected = 0; | ||
417 | } | ||
418 | if (!ipc.opt) | ||
419 | ipc.opt = inet->opt; | ||
420 | |||
421 | saddr = ipc.addr; | ||
422 | ipc.addr = faddr = daddr; | ||
423 | |||
424 | if (ipc.opt && ipc.opt->srr) { | ||
425 | if (!daddr) | ||
426 | return -EINVAL; | ||
427 | faddr = ipc.opt->faddr; | ||
428 | connected = 0; | ||
429 | } | ||
430 | tos = RT_TOS(inet->tos); | ||
431 | if (sock_flag(sk, SOCK_LOCALROUTE) || | ||
432 | (msg->msg_flags & MSG_DONTROUTE) || | ||
433 | (ipc.opt && ipc.opt->is_strictroute)) { | ||
434 | tos |= RTO_ONLINK; | ||
435 | connected = 0; | ||
436 | } | ||
437 | |||
438 | if (ipv4_is_multicast(daddr)) { | ||
439 | if (!ipc.oif) | ||
440 | ipc.oif = inet->mc_index; | ||
441 | if (!saddr) | ||
442 | saddr = inet->mc_addr; | ||
443 | connected = 0; | ||
444 | } | ||
445 | |||
446 | if (connected) | ||
447 | rt = (struct rtable*)sk_dst_check(sk, 0); | ||
448 | |||
449 | if (rt == NULL) { | ||
450 | struct flowi fl = { .oif = ipc.oif, | ||
451 | .nl_u = { .ip4_u = | ||
452 | { .daddr = faddr, | ||
453 | .saddr = saddr, | ||
454 | .tos = tos } }, | ||
455 | .proto = sk->sk_protocol, | ||
456 | .uli_u = { .ports = | ||
457 | { .sport = inet->sport, | ||
458 | .dport = dport } } }; | ||
459 | security_sk_classify_flow(sk, &fl); | ||
460 | err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1); | ||
461 | if (err) { | ||
462 | if (err == -ENETUNREACH) | ||
463 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | ||
464 | goto out; | ||
465 | } | ||
466 | |||
467 | err = -EACCES; | ||
468 | if ((rt->rt_flags & RTCF_BROADCAST) && | ||
469 | !sock_flag(sk, SOCK_BROADCAST)) | ||
470 | goto out; | ||
471 | if (connected) | ||
472 | sk_dst_set(sk, dst_clone(&rt->u.dst)); | ||
473 | } | ||
474 | |||
475 | if (msg->msg_flags&MSG_CONFIRM) | ||
476 | goto do_confirm; | ||
477 | back_from_confirm: | ||
478 | |||
479 | saddr = rt->rt_src; | ||
480 | if (!ipc.addr) | ||
481 | daddr = ipc.addr = rt->rt_dst; | ||
482 | |||
483 | lock_sock(sk); | ||
484 | if (unlikely(up->pending)) { | ||
485 | /* The socket is already corked while preparing it. */ | ||
486 | /* ... which is an evident application bug. --ANK */ | ||
487 | release_sock(sk); | ||
488 | |||
489 | LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); | ||
490 | err = -EINVAL; | ||
491 | goto out; | ||
492 | } | ||
493 | /* | ||
494 | * Now cork the socket to pend data. | ||
495 | */ | ||
496 | inet->cork.fl.fl4_dst = daddr; | ||
497 | inet->cork.fl.fl_ip_dport = dport; | ||
498 | inet->cork.fl.fl4_src = saddr; | ||
499 | inet->cork.fl.fl_ip_sport = inet->sport; | ||
500 | up->pending = AF_INET; | ||
501 | |||
502 | do_append_data: | ||
503 | up->len += ulen; | ||
504 | getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; | ||
505 | err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, | ||
506 | sizeof(struct udphdr), &ipc, rt, | ||
507 | corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); | ||
508 | if (err) | ||
509 | udp_flush_pending_frames(sk); | ||
510 | else if (!corkreq) | ||
511 | err = udp_push_pending_frames(sk); | ||
512 | else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) | ||
513 | up->pending = 0; | ||
514 | release_sock(sk); | ||
515 | |||
516 | out: | ||
517 | ip_rt_put(rt); | ||
518 | if (free) | ||
519 | kfree(ipc.opt); | ||
520 | if (!err) | ||
521 | return len; | ||
522 | /* | ||
523 | * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting | ||
524 | * ENOBUFS might not be good (it's not tunable per se), but otherwise | ||
525 | * we don't have a good statistic (IpOutDiscards but it can be too many | ||
526 | * things). We could add another new stat but at least for now that | ||
527 | * seems like overkill. | ||
528 | */ | ||
529 | if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { | ||
530 | UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); | ||
531 | } | ||
532 | return err; | ||
533 | |||
534 | do_confirm: | ||
535 | dst_confirm(&rt->u.dst); | ||
536 | if (!(msg->msg_flags&MSG_PROBE) || len) | ||
537 | goto back_from_confirm; | ||
538 | err = 0; | ||
539 | goto out; | ||
540 | } | ||
541 | |||
542 | int udp_sendpage(struct sock *sk, struct page *page, int offset, | ||
543 | size_t size, int flags) | ||
544 | { | ||
545 | struct udp_sock *up = udp_sk(sk); | ||
546 | int ret; | ||
547 | |||
548 | if (!up->pending) { | ||
549 | struct msghdr msg = { .msg_flags = flags|MSG_MORE }; | ||
550 | |||
551 | /* Call udp_sendmsg to specify destination address which | ||
552 | * sendpage interface can't pass. | ||
553 | * This will succeed only when the socket is connected. | ||
554 | */ | ||
555 | ret = udp_sendmsg(NULL, sk, &msg, 0); | ||
556 | if (ret < 0) | ||
557 | return ret; | ||
558 | } | ||
559 | |||
560 | lock_sock(sk); | ||
561 | |||
562 | if (unlikely(!up->pending)) { | ||
563 | release_sock(sk); | ||
564 | |||
565 | LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n"); | ||
566 | return -EINVAL; | ||
567 | } | ||
568 | |||
569 | ret = ip_append_page(sk, page, offset, size, flags); | ||
570 | if (ret == -EOPNOTSUPP) { | ||
571 | release_sock(sk); | ||
572 | return sock_no_sendpage(sk->sk_socket, page, offset, | ||
573 | size, flags); | ||
574 | } | ||
575 | if (ret < 0) { | ||
576 | udp_flush_pending_frames(sk); | ||
577 | goto out; | ||
578 | } | ||
579 | |||
580 | up->len += size; | ||
581 | if (!(up->corkflag || (flags&MSG_MORE))) | ||
582 | ret = udp_push_pending_frames(sk); | ||
583 | if (!ret) | ||
584 | ret = size; | ||
585 | out: | ||
586 | release_sock(sk); | ||
587 | return ret; | ||
588 | } | ||
589 | |||
590 | /* | ||
591 | * This should be easy, if there is something there we | ||
592 | * return it, otherwise we block. | ||
593 | */ | ||
594 | |||
595 | int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
596 | size_t len, int noblock, int flags, int *addr_len) | ||
597 | { | ||
598 | struct inet_sock *inet = inet_sk(sk); | ||
599 | struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; | ||
600 | struct sk_buff *skb; | ||
601 | unsigned int ulen, copied; | ||
602 | int peeked; | ||
603 | int err; | ||
604 | int is_udplite = IS_UDPLITE(sk); | ||
605 | |||
606 | /* | ||
607 | * Check any passed addresses | ||
608 | */ | ||
609 | if (addr_len) | ||
610 | *addr_len=sizeof(*sin); | ||
611 | |||
612 | if (flags & MSG_ERRQUEUE) | ||
613 | return ip_recv_error(sk, msg, len); | ||
614 | |||
615 | try_again: | ||
616 | skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), | ||
617 | &peeked, &err); | ||
618 | if (!skb) | ||
619 | goto out; | ||
620 | |||
621 | ulen = skb->len - sizeof(struct udphdr); | ||
622 | copied = len; | ||
623 | if (copied > ulen) | ||
624 | copied = ulen; | ||
625 | else if (copied < ulen) | ||
626 | msg->msg_flags |= MSG_TRUNC; | ||
627 | |||
628 | /* | ||
629 | * If checksum is needed at all, try to do it while copying the | ||
630 | * data. If the data is truncated, or if we only want a partial | ||
631 | * coverage checksum (UDP-Lite), do it before the copy. | ||
632 | */ | ||
633 | |||
634 | if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { | ||
635 | if (udp_lib_checksum_complete(skb)) | ||
636 | goto csum_copy_err; | ||
637 | } | ||
638 | |||
639 | if (skb_csum_unnecessary(skb)) | ||
640 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), | ||
641 | msg->msg_iov, copied ); | ||
642 | else { | ||
643 | err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); | ||
644 | |||
645 | if (err == -EINVAL) | ||
646 | goto csum_copy_err; | ||
647 | } | ||
648 | |||
649 | if (err) | ||
650 | goto out_free; | ||
651 | |||
652 | if (!peeked) | ||
653 | UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); | ||
654 | |||
655 | sock_recv_timestamp(msg, sk, skb); | ||
656 | |||
657 | /* Copy the address. */ | ||
658 | if (sin) | ||
659 | { | ||
660 | sin->sin_family = AF_INET; | ||
661 | sin->sin_port = udp_hdr(skb)->source; | ||
662 | sin->sin_addr.s_addr = ip_hdr(skb)->saddr; | ||
663 | memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); | ||
664 | } | ||
665 | if (inet->cmsg_flags) | ||
666 | ip_cmsg_recv(msg, skb); | ||
667 | |||
668 | err = copied; | ||
669 | if (flags & MSG_TRUNC) | ||
670 | err = ulen; | ||
671 | |||
672 | out_free: | ||
673 | lock_sock(sk); | ||
674 | skb_free_datagram(sk, skb); | ||
675 | release_sock(sk); | ||
676 | out: | ||
677 | return err; | ||
678 | |||
679 | csum_copy_err: | ||
680 | lock_sock(sk); | ||
681 | if (!skb_kill_datagram(sk, skb, flags)) | ||
682 | UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); | ||
683 | release_sock(sk); | ||
684 | |||
685 | if (noblock) | ||
686 | return -EAGAIN; | ||
687 | goto try_again; | ||
688 | } | ||
689 | |||
690 | |||
691 | /* returns: | ||
692 | * -1: error | ||
693 | * 0: success | ||
694 | * >0: "udp encap" protocol resubmission | ||
695 | * | ||
696 | * Note that in the success and error cases, the skb is assumed to | ||
697 | * have either been requeued or freed. | ||
698 | */ | ||
699 | int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | ||
700 | { | ||
701 | struct udp_sock *up = udp_sk(sk); | ||
702 | int rc; | ||
703 | int is_udplite = IS_UDPLITE(sk); | ||
704 | |||
705 | /* | ||
706 | * Charge it to the socket, dropping if the queue is full. | ||
707 | */ | ||
708 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) | ||
709 | goto drop; | ||
710 | nf_reset(skb); | ||
711 | |||
712 | if (up->encap_type) { | ||
713 | /* | ||
714 | * This is an encapsulation socket so pass the skb to | ||
715 | * the socket's udp_encap_rcv() hook. Otherwise, just | ||
716 | * fall through and pass this up the UDP socket. | ||
717 | * up->encap_rcv() returns the following value: | ||
718 | * =0 if skb was successfully passed to the encap | ||
719 | * handler or was discarded by it. | ||
720 | * >0 if skb should be passed on to UDP. | ||
721 | * <0 if skb should be resubmitted as proto -N | ||
722 | */ | ||
723 | |||
724 | /* if we're overly short, let UDP handle it */ | ||
725 | if (skb->len > sizeof(struct udphdr) && | ||
726 | up->encap_rcv != NULL) { | ||
727 | int ret; | ||
728 | |||
729 | ret = (*up->encap_rcv)(sk, skb); | ||
730 | if (ret <= 0) { | ||
731 | UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, | ||
732 | is_udplite); | ||
733 | return -ret; | ||
734 | } | ||
735 | } | ||
736 | |||
737 | /* FALLTHROUGH -- it's a UDP Packet */ | ||
738 | } | ||
739 | |||
740 | /* | ||
741 | * UDP-Lite specific tests, ignored on UDP sockets | ||
742 | */ | ||
743 | if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { | ||
744 | |||
745 | /* | ||
746 | * MIB statistics other than incrementing the error count are | ||
747 | * disabled for the following two types of errors: these depend | ||
748 | * on the application settings, not on the functioning of the | ||
749 | * protocol stack as such. | ||
750 | * | ||
751 | * RFC 3828 here recommends (sec 3.3): "There should also be a | ||
752 | * way ... to ... at least let the receiving application block | ||
753 | * delivery of packets with coverage values less than a value | ||
754 | * provided by the application." | ||
755 | */ | ||
756 | if (up->pcrlen == 0) { /* full coverage was set */ | ||
757 | LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage " | ||
758 | "%d while full coverage %d requested\n", | ||
759 | UDP_SKB_CB(skb)->cscov, skb->len); | ||
760 | goto drop; | ||
761 | } | ||
762 | /* The next case involves violating the min. coverage requested | ||
763 | * by the receiver. This is subtle: if receiver wants x and x is | ||
764 | * greater than the buffersize/MTU then receiver will complain | ||
765 | * that it wants x while sender emits packets of smaller size y. | ||
766 | * Therefore the above ...()->partial_cov statement is essential. | ||
767 | */ | ||
768 | if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { | ||
769 | LIMIT_NETDEBUG(KERN_WARNING | ||
770 | "UDPLITE: coverage %d too small, need min %d\n", | ||
771 | UDP_SKB_CB(skb)->cscov, up->pcrlen); | ||
772 | goto drop; | ||
773 | } | ||
774 | } | ||
775 | |||
776 | if (sk->sk_filter) { | ||
777 | if (udp_lib_checksum_complete(skb)) | ||
778 | goto drop; | ||
779 | } | ||
780 | |||
781 | if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { | ||
782 | /* Note that an ENOMEM error is charged twice */ | ||
783 | if (rc == -ENOMEM) | ||
784 | UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); | ||
785 | goto drop; | ||
786 | } | ||
787 | |||
788 | return 0; | ||
789 | |||
790 | drop: | ||
791 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); | ||
792 | kfree_skb(skb); | ||
793 | return -1; | ||
794 | } | ||
795 | |||
796 | /* | ||
797 | * Multicasts and broadcasts go to each listener. | ||
798 | * | ||
799 | * Note: called only from the BH handler context, | ||
800 | * so we don't need to lock the hashes. | ||
801 | */ | ||
802 | static int __udp4_lib_mcast_deliver(struct sk_buff *skb, | ||
803 | struct udphdr *uh, | ||
804 | __be32 saddr, __be32 daddr, | ||
805 | struct hlist_head udptable[]) | ||
806 | { | ||
807 | struct sock *sk; | ||
808 | int dif; | ||
809 | |||
810 | read_lock(&udp_hash_lock); | ||
811 | sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); | ||
812 | dif = skb->dev->ifindex; | ||
813 | sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); | ||
814 | if (sk) { | ||
815 | struct sock *sknext = NULL; | ||
816 | |||
817 | do { | ||
818 | struct sk_buff *skb1 = skb; | ||
819 | |||
820 | sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, | ||
821 | uh->source, saddr, dif); | ||
822 | if (sknext) | ||
823 | skb1 = skb_clone(skb, GFP_ATOMIC); | ||
824 | |||
825 | if (skb1) { | ||
826 | int ret = 0; | ||
827 | |||
828 | bh_lock_sock_nested(sk); | ||
829 | if (!sock_owned_by_user(sk)) | ||
830 | ret = udp_queue_rcv_skb(sk, skb1); | ||
831 | else | ||
832 | sk_add_backlog(sk, skb1); | ||
833 | bh_unlock_sock(sk); | ||
834 | |||
835 | if (ret > 0) | ||
836 | /* we should probably re-process instead | ||
837 | * of dropping packets here. */ | ||
838 | kfree_skb(skb1); | ||
839 | } | ||
840 | sk = sknext; | ||
841 | } while (sknext); | ||
842 | } else | ||
843 | kfree_skb(skb); | ||
844 | read_unlock(&udp_hash_lock); | ||
845 | return 0; | ||
846 | } | ||
847 | |||
848 | /* Initialize UDP checksum. If exited with zero value (success), | ||
849 | * CHECKSUM_UNNECESSARY means, that no more checks are required. | ||
850 | * Otherwise, csum completion requires chacksumming packet body, | ||
851 | * including udp header and folding it to skb->csum. | ||
852 | */ | ||
853 | static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, | ||
854 | int proto) | ||
855 | { | ||
856 | const struct iphdr *iph; | ||
857 | int err; | ||
858 | |||
859 | UDP_SKB_CB(skb)->partial_cov = 0; | ||
860 | UDP_SKB_CB(skb)->cscov = skb->len; | ||
861 | |||
862 | if (IS_PROTO_UDPLITE(proto)) { | ||
863 | err = udplite_checksum_init(skb, uh); | ||
864 | if (err) | ||
865 | return err; | ||
866 | } | ||
867 | |||
868 | iph = ip_hdr(skb); | ||
869 | if (uh->check == 0) { | ||
870 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
871 | } else if (skb->ip_summed == CHECKSUM_COMPLETE) { | ||
872 | if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, | ||
873 | proto, skb->csum)) | ||
874 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
875 | } | ||
876 | if (!skb_csum_unnecessary(skb)) | ||
877 | skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, | ||
878 | skb->len, proto, 0); | ||
879 | /* Probably, we should checksum udp header (it should be in cache | ||
880 | * in any case) and data in tiny packets (< rx copybreak). | ||
881 | */ | ||
882 | |||
883 | return 0; | ||
884 | } | ||
885 | |||
886 | /* | ||
887 | * All we need to do is get the socket, and then do a checksum. | ||
888 | */ | ||
889 | |||
890 | int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], | ||
891 | int proto) | ||
892 | { | ||
893 | struct sock *sk; | ||
894 | struct udphdr *uh = udp_hdr(skb); | ||
895 | unsigned short ulen; | ||
896 | struct rtable *rt = (struct rtable*)skb->dst; | ||
897 | __be32 saddr = ip_hdr(skb)->saddr; | ||
898 | __be32 daddr = ip_hdr(skb)->daddr; | ||
899 | |||
900 | /* | ||
901 | * Validate the packet. | ||
902 | */ | ||
903 | if (!pskb_may_pull(skb, sizeof(struct udphdr))) | ||
904 | goto drop; /* No space for header. */ | ||
905 | |||
906 | ulen = ntohs(uh->len); | ||
907 | if (ulen > skb->len) | ||
908 | goto short_packet; | ||
909 | |||
910 | if (IS_PROTO_UDPLITE(proto)) { | ||
911 | /* UDP validates ulen. */ | ||
912 | if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) | ||
913 | goto short_packet; | ||
914 | uh = udp_hdr(skb); | ||
915 | } | ||
916 | |||
917 | if (udp4_csum_init(skb, uh, proto)) | ||
918 | goto csum_error; | ||
919 | |||
920 | if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) | ||
921 | return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); | ||
922 | |||
923 | sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr, | ||
924 | uh->dest, inet_iif(skb), udptable); | ||
925 | |||
926 | if (sk != NULL) { | ||
927 | int ret = 0; | ||
928 | bh_lock_sock_nested(sk); | ||
929 | if (!sock_owned_by_user(sk)) | ||
930 | ret = udp_queue_rcv_skb(sk, skb); | ||
931 | else | ||
932 | sk_add_backlog(sk, skb); | ||
933 | bh_unlock_sock(sk); | ||
934 | sock_put(sk); | ||
935 | |||
936 | /* a return value > 0 means to resubmit the input, but | ||
937 | * it wants the return to be -protocol, or 0 | ||
938 | */ | ||
939 | if (ret > 0) | ||
940 | return -ret; | ||
941 | return 0; | ||
942 | } | ||
943 | |||
944 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) | ||
945 | goto drop; | ||
946 | nf_reset(skb); | ||
947 | |||
948 | /* No socket. Drop packet silently, if checksum is wrong */ | ||
949 | if (udp_lib_checksum_complete(skb)) | ||
950 | goto csum_error; | ||
951 | |||
952 | UDP_INC_STATS_BH(UDP_MIB_NOPORTS, IS_PROTO_UDPLITE(proto)); | ||
953 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | ||
954 | |||
955 | /* | ||
956 | * Hmm. We got an UDP packet to a port to which we | ||
957 | * don't wanna listen. Ignore it. | ||
958 | */ | ||
959 | kfree_skb(skb); | ||
960 | return 0; | ||
961 | |||
962 | short_packet: | ||
963 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", | ||
964 | IS_PROTO_UDPLITE(proto) ? "-Lite" : "", | ||
965 | NIPQUAD(saddr), | ||
966 | ntohs(uh->source), | ||
967 | ulen, | ||
968 | skb->len, | ||
969 | NIPQUAD(daddr), | ||
970 | ntohs(uh->dest)); | ||
971 | goto drop; | ||
972 | |||
973 | csum_error: | ||
974 | /* | ||
975 | * RFC1122: OK. Discards the bad packet silently (as far as | ||
976 | * the network is concerned, anyway) as per 4.1.3.4 (MUST). | ||
977 | */ | ||
978 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", | ||
979 | IS_PROTO_UDPLITE(proto) ? "-Lite" : "", | ||
980 | NIPQUAD(saddr), | ||
981 | ntohs(uh->source), | ||
982 | NIPQUAD(daddr), | ||
983 | ntohs(uh->dest), | ||
984 | ulen); | ||
985 | drop: | ||
986 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, IS_PROTO_UDPLITE(proto)); | ||
987 | kfree_skb(skb); | ||
988 | return 0; | ||
989 | } | ||
990 | |||
991 | int udp_rcv(struct sk_buff *skb) | ||
992 | { | ||
993 | return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); | ||
994 | } | ||
995 | |||
996 | int udp_destroy_sock(struct sock *sk) | ||
997 | { | ||
998 | lock_sock(sk); | ||
999 | udp_flush_pending_frames(sk); | ||
1000 | release_sock(sk); | ||
1001 | return 0; | ||
1002 | } | ||
1003 | |||
1004 | int udp_setsockopt(struct sock *sk, int level, int optname, | ||
1005 | char __user *optval, int optlen) | ||
1006 | { | ||
1007 | if (IS_SOL_UDPFAMILY(level)) | ||
1008 | return udp_lib_setsockopt(sk, level, optname, optval, optlen, | ||
1009 | udp_push_pending_frames); | ||
1010 | return ip_setsockopt(sk, level, optname, optval, optlen); | ||
1011 | } | ||
1012 | |||
1013 | #ifdef CONFIG_COMPAT | ||
1014 | int compat_udp_setsockopt(struct sock *sk, int level, int optname, | ||
1015 | char __user *optval, int optlen) | ||
1016 | { | ||
1017 | if (IS_SOL_UDPFAMILY(level)) | ||
1018 | return udp_lib_setsockopt(sk, level, optname, optval, optlen, | ||
1019 | udp_push_pending_frames); | ||
1020 | return compat_ip_setsockopt(sk, level, optname, optval, optlen); | ||
1021 | } | ||
1022 | #endif | ||
1023 | |||
1024 | int udp_getsockopt(struct sock *sk, int level, int optname, | ||
1025 | char __user *optval, int __user *optlen) | ||
1026 | { | ||
1027 | if (IS_SOL_UDPFAMILY(level)) | ||
1028 | return udp_lib_getsockopt(sk, level, optname, optval, optlen); | ||
1029 | return ip_getsockopt(sk, level, optname, optval, optlen); | ||
1030 | } | ||
1031 | |||
1032 | #ifdef CONFIG_COMPAT | ||
1033 | int compat_udp_getsockopt(struct sock *sk, int level, int optname, | ||
1034 | char __user *optval, int __user *optlen) | ||
1035 | { | ||
1036 | if (IS_SOL_UDPFAMILY(level)) | ||
1037 | return udp_lib_getsockopt(sk, level, optname, optval, optlen); | ||
1038 | return compat_ip_getsockopt(sk, level, optname, optval, optlen); | ||
1039 | } | ||
1040 | #endif | ||
1041 | |||
1042 | /* ------------------------------------------------------------------------ */ | ||
1043 | DEFINE_PROTO_INUSE(udp) | ||
1044 | |||
1045 | struct proto udp_prot = { | ||
1046 | .name = "UDP", | ||
1047 | .owner = THIS_MODULE, | ||
1048 | .close = udp_lib_close, | ||
1049 | .connect = ip4_datagram_connect, | ||
1050 | .disconnect = udp_disconnect, | ||
1051 | .ioctl = udp_ioctl, | ||
1052 | .destroy = udp_destroy_sock, | ||
1053 | .setsockopt = udp_setsockopt, | ||
1054 | .getsockopt = udp_getsockopt, | ||
1055 | .sendmsg = udp_sendmsg, | ||
1056 | .recvmsg = udp_recvmsg, | ||
1057 | .sendpage = udp_sendpage, | ||
1058 | .backlog_rcv = udp_queue_rcv_skb, | ||
1059 | .hash = udp_lib_hash, | ||
1060 | .unhash = udp_lib_unhash, | ||
1061 | .get_port = udp_v4_get_port, | ||
1062 | .memory_allocated = &udp_memory_allocated, | ||
1063 | .sysctl_mem = sysctl_udp_mem, | ||
1064 | .sysctl_wmem = &sysctl_udp_wmem_min, | ||
1065 | .sysctl_rmem = &sysctl_udp_rmem_min, | ||
1066 | .obj_size = sizeof(struct udp_sock), | ||
1067 | #ifdef CONFIG_COMPAT | ||
1068 | .compat_setsockopt = compat_udp_setsockopt, | ||
1069 | .compat_getsockopt = compat_udp_getsockopt, | ||
1070 | #endif | ||
1071 | REF_PROTO_INUSE(udp) | ||
1072 | }; | ||
1073 | |||
1074 | /* ------------------------------------------------------------------------ */ | ||
1075 | static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket) | ||
1076 | { | ||
1077 | struct inet_sock *inet = inet_sk(sp); | ||
1078 | __be32 dest = inet->daddr; | ||
1079 | __be32 src = inet->rcv_saddr; | ||
1080 | __u16 destp = ntohs(inet->dport); | ||
1081 | __u16 srcp = ntohs(inet->sport); | ||
1082 | |||
1083 | sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" | ||
1084 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p", | ||
1085 | bucket, src, srcp, dest, destp, sp->sk_state, | ||
1086 | atomic_read(&sp->sk_wmem_alloc), | ||
1087 | atomic_read(&sp->sk_rmem_alloc), | ||
1088 | 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), | ||
1089 | atomic_read(&sp->sk_refcnt), sp); | ||
1090 | } | ||
1091 | |||
1092 | int udp4_seq_show(struct seq_file *seq, void *v) | ||
1093 | { | ||
1094 | if (v == SEQ_START_TOKEN) | ||
1095 | seq_printf(seq, "%-127s\n", | ||
1096 | " sl local_address rem_address st tx_queue " | ||
1097 | "rx_queue tr tm->when retrnsmt uid timeout " | ||
1098 | "inode"); | ||
1099 | else { | ||
1100 | char tmpbuf[129]; | ||
1101 | struct udp_iter_state *state = seq->private; | ||
1102 | |||
1103 | udp4_format_sock(v, tmpbuf, state->bucket); | ||
1104 | seq_printf(seq, "%-127s\n", tmpbuf); | ||
1105 | } | ||
1106 | return 0; | ||
1107 | } | ||
1108 | |||
1109 | /* ------------------------------------------------------------------------ */ | ||
1110 | #ifdef CONFIG_PROC_FS | ||
1111 | static struct file_operations udp4_seq_fops; | ||
1112 | static struct udp_seq_afinfo udp4_seq_afinfo = { | ||
1113 | .owner = THIS_MODULE, | ||
1114 | .name = "udp", | ||
1115 | .family = AF_INET, | ||
1116 | .hashtable = udp_hash, | ||
1117 | .seq_show = udp4_seq_show, | ||
1118 | .seq_fops = &udp4_seq_fops, | ||
1119 | }; | ||
1120 | |||
1121 | int __init udp4_proc_init(void) | ||
1122 | { | ||
1123 | return udp_proc_register(&udp4_seq_afinfo); | ||
1124 | } | ||
1125 | |||
1126 | void udp4_proc_exit(void) | ||
1127 | { | ||
1128 | udp_proc_unregister(&udp4_seq_afinfo); | ||
1129 | } | ||
1130 | #endif /* CONFIG_PROC_FS */ | ||
1131 | |||
1132 | EXPORT_SYMBOL(udp_prot); | ||
1133 | EXPORT_SYMBOL(udp_sendmsg); | ||
1134 | |||
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite_ipv4.c index 001b881ca36f..001b881ca36f 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite_ipv4.c | |||