aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig10
-rw-r--r--net/ipv4/Makefile3
-rw-r--r--net/ipv4/af_inet.c19
-rw-r--r--net/ipv4/arp.c9
-rw-r--r--net/ipv4/devinet.c21
-rw-r--r--net/ipv4/icmp.c101
-rw-r--r--net/ipv4/igmp.c39
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/ip_options.c5
-rw-r--r--net/ipv4/ipconfig.c7
-rw-r--r--net/ipv4/netfilter/arp_tables.c5
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c14
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c2
-rw-r--r--net/ipv4/proc.c5
-rw-r--r--net/ipv4/route.c75
-rw-r--r--net/ipv4/syncookies.c11
-rw-r--r--net/ipv4/tcp_cubic.c35
-rw-r--r--net/ipv4/tcp_input.c1
-rw-r--r--net/ipv4/tcp_ipv4.c30
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_output.c1
-rw-r--r--net/ipv4/udp.c1090
-rw-r--r--net/ipv4/udp_ipv4.c1134
-rw-r--r--net/ipv4/udplite_ipv4.c (renamed from net/ipv4/udplite.c)0
24 files changed, 1381 insertions, 1242 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 9c7e5ffb223d..5098fd2ff4d0 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -632,5 +632,15 @@ config TCP_MD5SIG
632 632
633 If unsure, say N. 633 If unsure, say N.
634 634
635config IP_UDPLITE
636 bool "IP: UDP-Lite Protocol (RFC 3828)"
637 default n
638 ---help---
639 UDP-Lite (RFC 3828) is a UDP-like protocol with variable-length
640 checksum. Read <file:Documentation/networking/udplite.txt> for
641 details.
642
643 If unsure, say N.
644
635source "net/ipv4/ipvs/Kconfig" 645source "net/ipv4/ipvs/Kconfig"
636 646
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index ad40ef3f9ebc..d5226241d5ed 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -8,7 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \
8 inet_timewait_sock.o inet_connection_sock.o \ 8 inet_timewait_sock.o inet_connection_sock.o \
9 tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ 9 tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
10 tcp_minisocks.o tcp_cong.o \ 10 tcp_minisocks.o tcp_cong.o \
11 datagram.o raw.o udp.o udplite.o \ 11 datagram.o raw.o udp.o udp_ipv4.o \
12 arp.o icmp.o devinet.o af_inet.o igmp.o \ 12 arp.o icmp.o devinet.o af_inet.o igmp.o \
13 fib_frontend.o fib_semantics.o \ 13 fib_frontend.o fib_semantics.o \
14 inet_fragment.o 14 inet_fragment.o
@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
49obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o 49obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
50obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o 50obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
51obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o 51obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
52obj-$(CONFIG_IP_UDPLITE) += udplite_ipv4.o
52obj-$(CONFIG_NETLABEL) += cipso_ipv4.o 53obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
53 54
54obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ 55obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 09ca5293d08f..67260c0eaaa8 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -784,6 +784,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
784{ 784{
785 struct sock *sk = sock->sk; 785 struct sock *sk = sock->sk;
786 int err = 0; 786 int err = 0;
787 struct net *net = sk->sk_net;
787 788
788 switch (cmd) { 789 switch (cmd) {
789 case SIOCGSTAMP: 790 case SIOCGSTAMP:
@@ -795,12 +796,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
795 case SIOCADDRT: 796 case SIOCADDRT:
796 case SIOCDELRT: 797 case SIOCDELRT:
797 case SIOCRTMSG: 798 case SIOCRTMSG:
798 err = ip_rt_ioctl(sk->sk_net, cmd, (void __user *)arg); 799 err = ip_rt_ioctl(net, cmd, (void __user *)arg);
799 break; 800 break;
800 case SIOCDARP: 801 case SIOCDARP:
801 case SIOCGARP: 802 case SIOCGARP:
802 case SIOCSARP: 803 case SIOCSARP:
803 err = arp_ioctl(sk->sk_net, cmd, (void __user *)arg); 804 err = arp_ioctl(net, cmd, (void __user *)arg);
804 break; 805 break;
805 case SIOCGIFADDR: 806 case SIOCGIFADDR:
806 case SIOCSIFADDR: 807 case SIOCSIFADDR:
@@ -813,7 +814,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
813 case SIOCSIFPFLAGS: 814 case SIOCSIFPFLAGS:
814 case SIOCGIFPFLAGS: 815 case SIOCGIFPFLAGS:
815 case SIOCSIFFLAGS: 816 case SIOCSIFFLAGS:
816 err = devinet_ioctl(cmd, (void __user *)arg); 817 err = devinet_ioctl(net, cmd, (void __user *)arg);
817 break; 818 break;
818 default: 819 default:
819 if (sk->sk_prot->ioctl) 820 if (sk->sk_prot->ioctl)
@@ -1316,15 +1317,18 @@ static int __init init_ipv4_mibs(void)
1316 if (snmp_mib_init((void **)udp_statistics, 1317 if (snmp_mib_init((void **)udp_statistics,
1317 sizeof(struct udp_mib)) < 0) 1318 sizeof(struct udp_mib)) < 0)
1318 goto err_udp_mib; 1319 goto err_udp_mib;
1320#ifdef CONFIG_IP_UDPLITE
1319 if (snmp_mib_init((void **)udplite_statistics, 1321 if (snmp_mib_init((void **)udplite_statistics,
1320 sizeof(struct udp_mib)) < 0) 1322 sizeof(struct udp_mib)) < 0)
1321 goto err_udplite_mib; 1323 goto err_udplite_mib;
1322 1324#endif
1323 tcp_mib_init(); 1325 tcp_mib_init();
1324 1326
1325 return 0; 1327 return 0;
1326 1328
1329#ifdef CONFIG_IP_UDPLITE
1327err_udplite_mib: 1330err_udplite_mib:
1331#endif
1328 snmp_mib_free((void **)udp_statistics); 1332 snmp_mib_free((void **)udp_statistics);
1329err_udp_mib: 1333err_udp_mib:
1330 snmp_mib_free((void **)tcp_statistics); 1334 snmp_mib_free((void **)tcp_statistics);
@@ -1414,7 +1418,7 @@ static int __init inet_init(void)
1414 1418
1415 ip_init(); 1419 ip_init();
1416 1420
1417 tcp_v4_init(&inet_family_ops); 1421 tcp_v4_init();
1418 1422
1419 /* Setup TCP slab cache for open requests. */ 1423 /* Setup TCP slab cache for open requests. */
1420 tcp_init(); 1424 tcp_init();
@@ -1422,14 +1426,17 @@ static int __init inet_init(void)
1422 /* Setup UDP memory threshold */ 1426 /* Setup UDP memory threshold */
1423 udp_init(); 1427 udp_init();
1424 1428
1429#ifdef CONFIG_IP_UDPLITE
1425 /* Add UDP-Lite (RFC 3828) */ 1430 /* Add UDP-Lite (RFC 3828) */
1426 udplite4_register(); 1431 udplite4_register();
1432#endif
1427 1433
1428 /* 1434 /*
1429 * Set the ICMP layer up 1435 * Set the ICMP layer up
1430 */ 1436 */
1431 1437
1432 icmp_init(&inet_family_ops); 1438 if (icmp_init() < 0)
1439 panic("Failed to create the ICMP control socket.\n");
1433 1440
1434 /* 1441 /*
1435 * Initialise the multicast router 1442 * Initialise the multicast router
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 8e17f65f4002..69e80bd9774a 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -570,14 +570,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
570 * Allocate a buffer 570 * Allocate a buffer
571 */ 571 */
572 572
573 skb = alloc_skb(sizeof(struct arphdr)+ 2*(dev->addr_len+4) 573 skb = alloc_skb(arp_hdr_len(dev) + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
574 + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
575 if (skb == NULL) 574 if (skb == NULL)
576 return NULL; 575 return NULL;
577 576
578 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 577 skb_reserve(skb, LL_RESERVED_SPACE(dev));
579 skb_reset_network_header(skb); 578 skb_reset_network_header(skb);
580 arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4)); 579 arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev));
581 skb->dev = dev; 580 skb->dev = dev;
582 skb->protocol = htons(ETH_P_ARP); 581 skb->protocol = htons(ETH_P_ARP);
583 if (src_hw == NULL) 582 if (src_hw == NULL)
@@ -916,9 +915,7 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
916 goto freeskb; 915 goto freeskb;
917 916
918 /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ 917 /* ARP header, plus 2 device addresses, plus 2 IP addresses. */
919 if (!pskb_may_pull(skb, (sizeof(struct arphdr) + 918 if (!pskb_may_pull(skb, arp_hdr_len(dev)))
920 (2 * dev->addr_len) +
921 (2 * sizeof(u32)))))
922 goto freeskb; 919 goto freeskb;
923 920
924 arp = arp_hdr(skb); 921 arp = arp_hdr(skb);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 87490f7bb0f7..4a10dbbbe0a1 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -446,9 +446,6 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
446 446
447 ASSERT_RTNL(); 447 ASSERT_RTNL();
448 448
449 if (net != &init_net)
450 return -EINVAL;
451
452 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 449 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
453 if (err < 0) 450 if (err < 0)
454 goto errout; 451 goto errout;
@@ -560,9 +557,6 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
560 557
561 ASSERT_RTNL(); 558 ASSERT_RTNL();
562 559
563 if (net != &init_net)
564 return -EINVAL;
565
566 ifa = rtm_to_ifaddr(net, nlh); 560 ifa = rtm_to_ifaddr(net, nlh);
567 if (IS_ERR(ifa)) 561 if (IS_ERR(ifa))
568 return PTR_ERR(ifa); 562 return PTR_ERR(ifa);
@@ -595,7 +589,7 @@ static __inline__ int inet_abc_len(__be32 addr)
595} 589}
596 590
597 591
598int devinet_ioctl(unsigned int cmd, void __user *arg) 592int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
599{ 593{
600 struct ifreq ifr; 594 struct ifreq ifr;
601 struct sockaddr_in sin_orig; 595 struct sockaddr_in sin_orig;
@@ -624,7 +618,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
624 *colon = 0; 618 *colon = 0;
625 619
626#ifdef CONFIG_KMOD 620#ifdef CONFIG_KMOD
627 dev_load(&init_net, ifr.ifr_name); 621 dev_load(net, ifr.ifr_name);
628#endif 622#endif
629 623
630 switch (cmd) { 624 switch (cmd) {
@@ -665,7 +659,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
665 rtnl_lock(); 659 rtnl_lock();
666 660
667 ret = -ENODEV; 661 ret = -ENODEV;
668 if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL) 662 if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
669 goto done; 663 goto done;
670 664
671 if (colon) 665 if (colon)
@@ -878,6 +872,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
878{ 872{
879 __be32 addr = 0; 873 __be32 addr = 0;
880 struct in_device *in_dev; 874 struct in_device *in_dev;
875 struct net *net = dev->nd_net;
881 876
882 rcu_read_lock(); 877 rcu_read_lock();
883 in_dev = __in_dev_get_rcu(dev); 878 in_dev = __in_dev_get_rcu(dev);
@@ -906,7 +901,7 @@ no_in_dev:
906 */ 901 */
907 read_lock(&dev_base_lock); 902 read_lock(&dev_base_lock);
908 rcu_read_lock(); 903 rcu_read_lock();
909 for_each_netdev(&init_net, dev) { 904 for_each_netdev(net, dev) {
910 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) 905 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
911 continue; 906 continue;
912 907
@@ -1045,9 +1040,6 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1045 struct net_device *dev = ptr; 1040 struct net_device *dev = ptr;
1046 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1041 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1047 1042
1048 if (dev->nd_net != &init_net)
1049 return NOTIFY_DONE;
1050
1051 ASSERT_RTNL(); 1043 ASSERT_RTNL();
1052 1044
1053 if (!in_dev) { 1045 if (!in_dev) {
@@ -1173,9 +1165,6 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1173 struct in_ifaddr *ifa; 1165 struct in_ifaddr *ifa;
1174 int s_ip_idx, s_idx = cb->args[0]; 1166 int s_ip_idx, s_idx = cb->args[0];
1175 1167
1176 if (net != &init_net)
1177 return 0;
1178
1179 s_ip_idx = ip_idx = cb->args[1]; 1168 s_ip_idx = ip_idx = cb->args[1];
1180 idx = 0; 1169 idx = 0;
1181 for_each_netdev(net, dev) { 1170 for_each_netdev(net, dev) {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index a13c074dac09..cee77d606fbe 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -229,14 +229,16 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
229 * 229 *
230 * On SMP we have one ICMP socket per-cpu. 230 * On SMP we have one ICMP socket per-cpu.
231 */ 231 */
232static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL; 232static struct sock *icmp_sk(struct net *net)
233#define icmp_socket __get_cpu_var(__icmp_socket) 233{
234 return net->ipv4.icmp_sk[smp_processor_id()];
235}
234 236
235static inline int icmp_xmit_lock(void) 237static inline int icmp_xmit_lock(struct sock *sk)
236{ 238{
237 local_bh_disable(); 239 local_bh_disable();
238 240
239 if (unlikely(!spin_trylock(&icmp_socket->sk->sk_lock.slock))) { 241 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
240 /* This can happen if the output path signals a 242 /* This can happen if the output path signals a
241 * dst_link_failure() for an outgoing ICMP packet. 243 * dst_link_failure() for an outgoing ICMP packet.
242 */ 244 */
@@ -246,9 +248,9 @@ static inline int icmp_xmit_lock(void)
246 return 0; 248 return 0;
247} 249}
248 250
249static inline void icmp_xmit_unlock(void) 251static inline void icmp_xmit_unlock(struct sock *sk)
250{ 252{
251 spin_unlock_bh(&icmp_socket->sk->sk_lock.slock); 253 spin_unlock_bh(&sk->sk_lock.slock);
252} 254}
253 255
254/* 256/*
@@ -346,19 +348,21 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
346static void icmp_push_reply(struct icmp_bxm *icmp_param, 348static void icmp_push_reply(struct icmp_bxm *icmp_param,
347 struct ipcm_cookie *ipc, struct rtable *rt) 349 struct ipcm_cookie *ipc, struct rtable *rt)
348{ 350{
351 struct sock *sk;
349 struct sk_buff *skb; 352 struct sk_buff *skb;
350 353
351 if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param, 354 sk = icmp_sk(rt->u.dst.dev->nd_net);
355 if (ip_append_data(sk, icmp_glue_bits, icmp_param,
352 icmp_param->data_len+icmp_param->head_len, 356 icmp_param->data_len+icmp_param->head_len,
353 icmp_param->head_len, 357 icmp_param->head_len,
354 ipc, rt, MSG_DONTWAIT) < 0) 358 ipc, rt, MSG_DONTWAIT) < 0)
355 ip_flush_pending_frames(icmp_socket->sk); 359 ip_flush_pending_frames(sk);
356 else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { 360 else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
357 struct icmphdr *icmph = icmp_hdr(skb); 361 struct icmphdr *icmph = icmp_hdr(skb);
358 __wsum csum = 0; 362 __wsum csum = 0;
359 struct sk_buff *skb1; 363 struct sk_buff *skb1;
360 364
361 skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) { 365 skb_queue_walk(&sk->sk_write_queue, skb1) {
362 csum = csum_add(csum, skb1->csum); 366 csum = csum_add(csum, skb1->csum);
363 } 367 }
364 csum = csum_partial_copy_nocheck((void *)&icmp_param->data, 368 csum = csum_partial_copy_nocheck((void *)&icmp_param->data,
@@ -366,7 +370,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
366 icmp_param->head_len, csum); 370 icmp_param->head_len, csum);
367 icmph->checksum = csum_fold(csum); 371 icmph->checksum = csum_fold(csum);
368 skb->ip_summed = CHECKSUM_NONE; 372 skb->ip_summed = CHECKSUM_NONE;
369 ip_push_pending_frames(icmp_socket->sk); 373 ip_push_pending_frames(sk);
370 } 374 }
371} 375}
372 376
@@ -376,16 +380,17 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
376 380
377static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) 381static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
378{ 382{
379 struct sock *sk = icmp_socket->sk;
380 struct inet_sock *inet = inet_sk(sk);
381 struct ipcm_cookie ipc; 383 struct ipcm_cookie ipc;
382 struct rtable *rt = (struct rtable *)skb->dst; 384 struct rtable *rt = (struct rtable *)skb->dst;
385 struct net *net = rt->u.dst.dev->nd_net;
386 struct sock *sk = icmp_sk(net);
387 struct inet_sock *inet = inet_sk(sk);
383 __be32 daddr; 388 __be32 daddr;
384 389
385 if (ip_options_echo(&icmp_param->replyopts, skb)) 390 if (ip_options_echo(&icmp_param->replyopts, skb))
386 return; 391 return;
387 392
388 if (icmp_xmit_lock()) 393 if (icmp_xmit_lock(sk))
389 return; 394 return;
390 395
391 icmp_param->data.icmph.checksum = 0; 396 icmp_param->data.icmph.checksum = 0;
@@ -405,7 +410,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
405 .tos = RT_TOS(ip_hdr(skb)->tos) } }, 410 .tos = RT_TOS(ip_hdr(skb)->tos) } },
406 .proto = IPPROTO_ICMP }; 411 .proto = IPPROTO_ICMP };
407 security_skb_classify_flow(skb, &fl); 412 security_skb_classify_flow(skb, &fl);
408 if (ip_route_output_key(rt->u.dst.dev->nd_net, &rt, &fl)) 413 if (ip_route_output_key(net, &rt, &fl))
409 goto out_unlock; 414 goto out_unlock;
410 } 415 }
411 if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, 416 if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type,
@@ -413,7 +418,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
413 icmp_push_reply(icmp_param, &ipc, rt); 418 icmp_push_reply(icmp_param, &ipc, rt);
414 ip_rt_put(rt); 419 ip_rt_put(rt);
415out_unlock: 420out_unlock:
416 icmp_xmit_unlock(); 421 icmp_xmit_unlock(sk);
417} 422}
418 423
419 424
@@ -438,10 +443,12 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
438 __be32 saddr; 443 __be32 saddr;
439 u8 tos; 444 u8 tos;
440 struct net *net; 445 struct net *net;
446 struct sock *sk;
441 447
442 if (!rt) 448 if (!rt)
443 goto out; 449 goto out;
444 net = rt->u.dst.dev->nd_net; 450 net = rt->u.dst.dev->nd_net;
451 sk = icmp_sk(net);
445 452
446 /* 453 /*
447 * Find the original header. It is expected to be valid, of course. 454 * Find the original header. It is expected to be valid, of course.
@@ -505,7 +512,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
505 } 512 }
506 } 513 }
507 514
508 if (icmp_xmit_lock()) 515 if (icmp_xmit_lock(sk))
509 return; 516 return;
510 517
511 /* 518 /*
@@ -544,7 +551,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
544 icmp_param.data.icmph.checksum = 0; 551 icmp_param.data.icmph.checksum = 0;
545 icmp_param.skb = skb_in; 552 icmp_param.skb = skb_in;
546 icmp_param.offset = skb_network_offset(skb_in); 553 icmp_param.offset = skb_network_offset(skb_in);
547 inet_sk(icmp_socket->sk)->tos = tos; 554 inet_sk(sk)->tos = tos;
548 ipc.addr = iph->saddr; 555 ipc.addr = iph->saddr;
549 ipc.opt = &icmp_param.replyopts; 556 ipc.opt = &icmp_param.replyopts;
550 557
@@ -652,7 +659,7 @@ route_done:
652ende: 659ende:
653 ip_rt_put(rt); 660 ip_rt_put(rt);
654out_unlock: 661out_unlock:
655 icmp_xmit_unlock(); 662 icmp_xmit_unlock(sk);
656out:; 663out:;
657} 664}
658 665
@@ -1139,29 +1146,46 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
1139 }, 1146 },
1140}; 1147};
1141 1148
1142void __init icmp_init(struct net_proto_family *ops) 1149static void __net_exit icmp_sk_exit(struct net *net)
1143{ 1150{
1144 struct inet_sock *inet;
1145 int i; 1151 int i;
1146 1152
1147 for_each_possible_cpu(i) { 1153 for_each_possible_cpu(i)
1148 int err; 1154 sk_release_kernel(net->ipv4.icmp_sk[i]);
1155 kfree(net->ipv4.icmp_sk);
1156 net->ipv4.icmp_sk = NULL;
1157}
1149 1158
1150 err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, 1159int __net_init icmp_sk_init(struct net *net)
1151 &per_cpu(__icmp_socket, i)); 1160{
1161 int i, err;
1152 1162
1163 net->ipv4.icmp_sk =
1164 kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
1165 if (net->ipv4.icmp_sk == NULL)
1166 return -ENOMEM;
1167
1168 for_each_possible_cpu(i) {
1169 struct sock *sk;
1170 struct socket *sock;
1171 struct inet_sock *inet;
1172
1173 err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, &sock);
1153 if (err < 0) 1174 if (err < 0)
1154 panic("Failed to create the ICMP control socket.\n"); 1175 goto fail;
1176
1177 net->ipv4.icmp_sk[i] = sk = sock->sk;
1178 sk_change_net(sk, net);
1155 1179
1156 per_cpu(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC; 1180 sk->sk_allocation = GFP_ATOMIC;
1157 1181
1158 /* Enough space for 2 64K ICMP packets, including 1182 /* Enough space for 2 64K ICMP packets, including
1159 * sk_buff struct overhead. 1183 * sk_buff struct overhead.
1160 */ 1184 */
1161 per_cpu(__icmp_socket, i)->sk->sk_sndbuf = 1185 sk->sk_sndbuf =
1162 (2 * ((64 * 1024) + sizeof(struct sk_buff))); 1186 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
1163 1187
1164 inet = inet_sk(per_cpu(__icmp_socket, i)->sk); 1188 inet = inet_sk(sk);
1165 inet->uc_ttl = -1; 1189 inet->uc_ttl = -1;
1166 inet->pmtudisc = IP_PMTUDISC_DONT; 1190 inet->pmtudisc = IP_PMTUDISC_DONT;
1167 1191
@@ -1169,8 +1193,25 @@ void __init icmp_init(struct net_proto_family *ops)
1169 * see it, we do not wish this socket to see incoming 1193 * see it, we do not wish this socket to see incoming
1170 * packets. 1194 * packets.
1171 */ 1195 */
1172 per_cpu(__icmp_socket, i)->sk->sk_prot->unhash(per_cpu(__icmp_socket, i)->sk); 1196 sk->sk_prot->unhash(sk);
1173 } 1197 }
1198 return 0;
1199
1200fail:
1201 for_each_possible_cpu(i)
1202 sk_release_kernel(net->ipv4.icmp_sk[i]);
1203 kfree(net->ipv4.icmp_sk);
1204 return err;
1205}
1206
1207static struct pernet_operations __net_initdata icmp_sk_ops = {
1208 .init = icmp_sk_init,
1209 .exit = icmp_sk_exit,
1210};
1211
1212int __init icmp_init(void)
1213{
1214 return register_pernet_device(&icmp_sk_ops);
1174} 1215}
1175 1216
1176EXPORT_SYMBOL(icmp_err_convert); 1217EXPORT_SYMBOL(icmp_err_convert);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 732cd07e6071..d3f34a772f3b 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1198,6 +1198,9 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
1198 1198
1199 ASSERT_RTNL(); 1199 ASSERT_RTNL();
1200 1200
1201 if (in_dev->dev->nd_net != &init_net)
1202 return;
1203
1201 for (im=in_dev->mc_list; im; im=im->next) { 1204 for (im=in_dev->mc_list; im; im=im->next) {
1202 if (im->multiaddr == addr) { 1205 if (im->multiaddr == addr) {
1203 im->users++; 1206 im->users++;
@@ -1277,6 +1280,9 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
1277 1280
1278 ASSERT_RTNL(); 1281 ASSERT_RTNL();
1279 1282
1283 if (in_dev->dev->nd_net != &init_net)
1284 return;
1285
1280 for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { 1286 for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) {
1281 if (i->multiaddr==addr) { 1287 if (i->multiaddr==addr) {
1282 if (--i->users == 0) { 1288 if (--i->users == 0) {
@@ -1304,6 +1310,9 @@ void ip_mc_down(struct in_device *in_dev)
1304 1310
1305 ASSERT_RTNL(); 1311 ASSERT_RTNL();
1306 1312
1313 if (in_dev->dev->nd_net != &init_net)
1314 return;
1315
1307 for (i=in_dev->mc_list; i; i=i->next) 1316 for (i=in_dev->mc_list; i; i=i->next)
1308 igmp_group_dropped(i); 1317 igmp_group_dropped(i);
1309 1318
@@ -1324,6 +1333,9 @@ void ip_mc_init_dev(struct in_device *in_dev)
1324{ 1333{
1325 ASSERT_RTNL(); 1334 ASSERT_RTNL();
1326 1335
1336 if (in_dev->dev->nd_net != &init_net)
1337 return;
1338
1327 in_dev->mc_tomb = NULL; 1339 in_dev->mc_tomb = NULL;
1328#ifdef CONFIG_IP_MULTICAST 1340#ifdef CONFIG_IP_MULTICAST
1329 in_dev->mr_gq_running = 0; 1341 in_dev->mr_gq_running = 0;
@@ -1347,6 +1359,9 @@ void ip_mc_up(struct in_device *in_dev)
1347 1359
1348 ASSERT_RTNL(); 1360 ASSERT_RTNL();
1349 1361
1362 if (in_dev->dev->nd_net != &init_net)
1363 return;
1364
1350 ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); 1365 ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
1351 1366
1352 for (i=in_dev->mc_list; i; i=i->next) 1367 for (i=in_dev->mc_list; i; i=i->next)
@@ -1363,6 +1378,9 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
1363 1378
1364 ASSERT_RTNL(); 1379 ASSERT_RTNL();
1365 1380
1381 if (in_dev->dev->nd_net != &init_net)
1382 return;
1383
1366 /* Deactivate timers */ 1384 /* Deactivate timers */
1367 ip_mc_down(in_dev); 1385 ip_mc_down(in_dev);
1368 1386
@@ -1744,6 +1762,9 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
1744 if (!ipv4_is_multicast(addr)) 1762 if (!ipv4_is_multicast(addr))
1745 return -EINVAL; 1763 return -EINVAL;
1746 1764
1765 if (sk->sk_net != &init_net)
1766 return -EPROTONOSUPPORT;
1767
1747 rtnl_lock(); 1768 rtnl_lock();
1748 1769
1749 in_dev = ip_mc_find_dev(imr); 1770 in_dev = ip_mc_find_dev(imr);
@@ -1812,6 +1833,9 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
1812 u32 ifindex; 1833 u32 ifindex;
1813 int ret = -EADDRNOTAVAIL; 1834 int ret = -EADDRNOTAVAIL;
1814 1835
1836 if (sk->sk_net != &init_net)
1837 return -EPROTONOSUPPORT;
1838
1815 rtnl_lock(); 1839 rtnl_lock();
1816 in_dev = ip_mc_find_dev(imr); 1840 in_dev = ip_mc_find_dev(imr);
1817 ifindex = imr->imr_ifindex; 1841 ifindex = imr->imr_ifindex;
@@ -1857,6 +1881,9 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
1857 if (!ipv4_is_multicast(addr)) 1881 if (!ipv4_is_multicast(addr))
1858 return -EINVAL; 1882 return -EINVAL;
1859 1883
1884 if (sk->sk_net != &init_net)
1885 return -EPROTONOSUPPORT;
1886
1860 rtnl_lock(); 1887 rtnl_lock();
1861 1888
1862 imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr; 1889 imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr;
@@ -1990,6 +2017,9 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
1990 msf->imsf_fmode != MCAST_EXCLUDE) 2017 msf->imsf_fmode != MCAST_EXCLUDE)
1991 return -EINVAL; 2018 return -EINVAL;
1992 2019
2020 if (sk->sk_net != &init_net)
2021 return -EPROTONOSUPPORT;
2022
1993 rtnl_lock(); 2023 rtnl_lock();
1994 2024
1995 imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; 2025 imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
@@ -2070,6 +2100,9 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
2070 if (!ipv4_is_multicast(addr)) 2100 if (!ipv4_is_multicast(addr))
2071 return -EINVAL; 2101 return -EINVAL;
2072 2102
2103 if (sk->sk_net != &init_net)
2104 return -EPROTONOSUPPORT;
2105
2073 rtnl_lock(); 2106 rtnl_lock();
2074 2107
2075 imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; 2108 imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
@@ -2132,6 +2165,9 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
2132 if (!ipv4_is_multicast(addr)) 2165 if (!ipv4_is_multicast(addr))
2133 return -EINVAL; 2166 return -EINVAL;
2134 2167
2168 if (sk->sk_net != &init_net)
2169 return -EPROTONOSUPPORT;
2170
2135 rtnl_lock(); 2171 rtnl_lock();
2136 2172
2137 err = -EADDRNOTAVAIL; 2173 err = -EADDRNOTAVAIL;
@@ -2216,6 +2252,9 @@ void ip_mc_drop_socket(struct sock *sk)
2216 if (inet->mc_list == NULL) 2252 if (inet->mc_list == NULL)
2217 return; 2253 return;
2218 2254
2255 if (sk->sk_net != &init_net)
2256 return;
2257
2219 rtnl_lock(); 2258 rtnl_lock();
2220 while ((iml = inet->mc_list) != NULL) { 2259 while ((iml = inet->mc_list) != NULL) {
2221 struct in_device *in_dev; 2260 struct in_device *in_dev;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index b189278c7bc1..c0e0fa03fce1 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -463,7 +463,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
463 if (time_after_eq(now, req->expires)) { 463 if (time_after_eq(now, req->expires)) {
464 if ((req->retrans < thresh || 464 if ((req->retrans < thresh ||
465 (inet_rsk(req)->acked && req->retrans < max_retries)) 465 (inet_rsk(req)->acked && req->retrans < max_retries))
466 && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) { 466 && !req->rsk_ops->rtx_syn_ack(parent, req)) {
467 unsigned long timeo; 467 unsigned long timeo;
468 468
469 if (req->retrans++ == 0) 469 if (req->retrans++ == 0)
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 4d315158fd3c..baaedd9689a0 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -107,10 +107,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
107 sptr = skb_network_header(skb); 107 sptr = skb_network_header(skb);
108 dptr = dopt->__data; 108 dptr = dopt->__data;
109 109
110 if (skb->dst) 110 daddr = ((struct rtable*)skb->dst)->rt_spec_dst;
111 daddr = ((struct rtable*)skb->dst)->rt_spec_dst;
112 else
113 daddr = ip_hdr(skb)->daddr;
114 111
115 if (sopt->rr) { 112 if (sopt->rr) {
116 optlen = sptr[sopt->rr+1]; 113 optlen = sptr[sopt->rr+1];
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 5dd938579eeb..4afce0572806 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -291,7 +291,7 @@ static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg)
291 291
292 mm_segment_t oldfs = get_fs(); 292 mm_segment_t oldfs = get_fs();
293 set_fs(get_ds()); 293 set_fs(get_ds());
294 res = devinet_ioctl(cmd, (struct ifreq __user *) arg); 294 res = devinet_ioctl(&init_net, cmd, (struct ifreq __user *) arg);
295 set_fs(oldfs); 295 set_fs(oldfs);
296 return res; 296 return res;
297} 297}
@@ -459,10 +459,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
459 if (rarp->ar_pro != htons(ETH_P_IP)) 459 if (rarp->ar_pro != htons(ETH_P_IP))
460 goto drop; 460 goto drop;
461 461
462 if (!pskb_may_pull(skb, 462 if (!pskb_may_pull(skb, arp_hdr_len(dev)))
463 sizeof(struct arphdr) +
464 (2 * dev->addr_len) +
465 (2 * 4)))
466 goto drop; 463 goto drop;
467 464
468 /* OK, it is all there and looks valid, process... */ 465 /* OK, it is all there and looks valid, process... */
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index a7591ce344d2..9b5904486184 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -233,10 +233,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
233 void *table_base; 233 void *table_base;
234 struct xt_table_info *private; 234 struct xt_table_info *private;
235 235
236 /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ 236 if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
237 if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
238 (2 * skb->dev->addr_len) +
239 (2 * sizeof(u32)))))
240 return NF_DROP; 237 return NF_DROP;
241 238
242 indev = in ? in->name : nulldevname; 239 indev = in ? in->name : nulldevname;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index d80fee8327e4..313b3fcf387e 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -139,18 +139,8 @@ static int masq_inet_event(struct notifier_block *this,
139 unsigned long event, 139 unsigned long event,
140 void *ptr) 140 void *ptr)
141{ 141{
142 const struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; 142 struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
143 143 return masq_device_event(this, event, dev);
144 if (event == NETDEV_DOWN) {
145 /* IP address was deleted. Search entire table for
146 conntracks which were associated with that device,
147 and forget them. */
148 NF_CT_ASSERT(dev->ifindex != 0);
149
150 nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
151 }
152
153 return NOTIFY_DONE;
154} 144}
155 145
156static struct notifier_block masq_dev_notifier = { 146static struct notifier_block masq_dev_notifier = {
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 089252e82c01..9668c3a23efe 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -379,7 +379,7 @@ static const struct file_operations ct_cpu_seq_fops = {
379 .open = ct_cpu_seq_open, 379 .open = ct_cpu_seq_open,
380 .read = seq_read, 380 .read = seq_read,
381 .llseek = seq_lseek, 381 .llseek = seq_lseek,
382 .release = seq_release_private, 382 .release = seq_release,
383}; 383};
384 384
385int __init nf_conntrack_ipv4_compat_init(void) 385int __init nf_conntrack_ipv4_compat_init(void)
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index d63474c6b400..d75ddb7fa4b8 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -59,7 +59,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
59 atomic_read(&tcp_memory_allocated)); 59 atomic_read(&tcp_memory_allocated));
60 seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(&udp_prot), 60 seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(&udp_prot),
61 atomic_read(&udp_memory_allocated)); 61 atomic_read(&udp_memory_allocated));
62#ifdef CONFIG_IP_UDPLITE
62 seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot)); 63 seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot));
64#endif
63 seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot)); 65 seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot));
64 seq_printf(seq, "FRAG: inuse %d memory %d\n", 66 seq_printf(seq, "FRAG: inuse %d memory %d\n",
65 ip_frag_nqueues(&init_net), ip_frag_mem(&init_net)); 67 ip_frag_nqueues(&init_net), ip_frag_mem(&init_net));
@@ -349,6 +351,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
349 snmp_fold_field((void **)udp_statistics, 351 snmp_fold_field((void **)udp_statistics,
350 snmp4_udp_list[i].entry)); 352 snmp4_udp_list[i].entry));
351 353
354#ifdef CONFIG_IP_UDPLITE
352 /* the UDP and UDP-Lite MIBs are the same */ 355 /* the UDP and UDP-Lite MIBs are the same */
353 seq_puts(seq, "\nUdpLite:"); 356 seq_puts(seq, "\nUdpLite:");
354 for (i = 0; snmp4_udp_list[i].name != NULL; i++) 357 for (i = 0; snmp4_udp_list[i].name != NULL; i++)
@@ -359,7 +362,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
359 seq_printf(seq, " %lu", 362 seq_printf(seq, " %lu",
360 snmp_fold_field((void **)udplite_statistics, 363 snmp_fold_field((void **)udplite_statistics,
361 snmp4_udp_list[i].entry)); 364 snmp4_udp_list[i].entry));
362 365#endif
363 seq_putc(seq, '\n'); 366 seq_putc(seq, '\n');
364 return 0; 367 return 0;
365} 368}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 7b5e8e1d94be..8c3e165f0034 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -273,6 +273,7 @@ static unsigned int rt_hash_code(u32 daddr, u32 saddr)
273 273
274#ifdef CONFIG_PROC_FS 274#ifdef CONFIG_PROC_FS
275struct rt_cache_iter_state { 275struct rt_cache_iter_state {
276 struct seq_net_private p;
276 int bucket; 277 int bucket;
277 int genid; 278 int genid;
278}; 279};
@@ -285,7 +286,8 @@ static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st)
285 rcu_read_lock_bh(); 286 rcu_read_lock_bh();
286 r = rcu_dereference(rt_hash_table[st->bucket].chain); 287 r = rcu_dereference(rt_hash_table[st->bucket].chain);
287 while (r) { 288 while (r) {
288 if (r->rt_genid == st->genid) 289 if (r->u.dst.dev->nd_net == st->p.net &&
290 r->rt_genid == st->genid)
289 return r; 291 return r;
290 r = rcu_dereference(r->u.dst.rt_next); 292 r = rcu_dereference(r->u.dst.rt_next);
291 } 293 }
@@ -294,7 +296,8 @@ static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st)
294 return r; 296 return r;
295} 297}
296 298
297static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct rtable *r) 299static struct rtable *__rt_cache_get_next(struct rt_cache_iter_state *st,
300 struct rtable *r)
298{ 301{
299 r = r->u.dst.rt_next; 302 r = r->u.dst.rt_next;
300 while (!r) { 303 while (!r) {
@@ -307,16 +310,25 @@ static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct r
307 return rcu_dereference(r); 310 return rcu_dereference(r);
308} 311}
309 312
313static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st,
314 struct rtable *r)
315{
316 while ((r = __rt_cache_get_next(st, r)) != NULL) {
317 if (r->u.dst.dev->nd_net != st->p.net)
318 continue;
319 if (r->rt_genid == st->genid)
320 break;
321 }
322 return r;
323}
324
310static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t pos) 325static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t pos)
311{ 326{
312 struct rtable *r = rt_cache_get_first(st); 327 struct rtable *r = rt_cache_get_first(st);
313 328
314 if (r) 329 if (r)
315 while (pos && (r = rt_cache_get_next(st, r))) { 330 while (pos && (r = rt_cache_get_next(st, r)))
316 if (r->rt_genid != st->genid)
317 continue;
318 --pos; 331 --pos;
319 }
320 return pos ? NULL : r; 332 return pos ? NULL : r;
321} 333}
322 334
@@ -390,7 +402,7 @@ static const struct seq_operations rt_cache_seq_ops = {
390 402
391static int rt_cache_seq_open(struct inode *inode, struct file *file) 403static int rt_cache_seq_open(struct inode *inode, struct file *file)
392{ 404{
393 return seq_open_private(file, &rt_cache_seq_ops, 405 return seq_open_net(inode, file, &rt_cache_seq_ops,
394 sizeof(struct rt_cache_iter_state)); 406 sizeof(struct rt_cache_iter_state));
395} 407}
396 408
@@ -399,7 +411,7 @@ static const struct file_operations rt_cache_seq_fops = {
399 .open = rt_cache_seq_open, 411 .open = rt_cache_seq_open,
400 .read = seq_read, 412 .read = seq_read,
401 .llseek = seq_lseek, 413 .llseek = seq_lseek,
402 .release = seq_release_private, 414 .release = seq_release_net,
403}; 415};
404 416
405 417
@@ -533,7 +545,7 @@ static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
533} 545}
534#endif 546#endif
535 547
536static __init int ip_rt_proc_init(struct net *net) 548static int __net_init ip_rt_do_proc_init(struct net *net)
537{ 549{
538 struct proc_dir_entry *pde; 550 struct proc_dir_entry *pde;
539 551
@@ -564,8 +576,26 @@ err2:
564err1: 576err1:
565 return -ENOMEM; 577 return -ENOMEM;
566} 578}
579
580static void __net_exit ip_rt_do_proc_exit(struct net *net)
581{
582 remove_proc_entry("rt_cache", net->proc_net_stat);
583 remove_proc_entry("rt_cache", net->proc_net);
584 remove_proc_entry("rt_acct", net->proc_net);
585}
586
587static struct pernet_operations ip_rt_proc_ops __net_initdata = {
588 .init = ip_rt_do_proc_init,
589 .exit = ip_rt_do_proc_exit,
590};
591
592static int __init ip_rt_proc_init(void)
593{
594 return register_pernet_subsys(&ip_rt_proc_ops);
595}
596
567#else 597#else
568static inline int ip_rt_proc_init(struct net *net) 598static inline int ip_rt_proc_init(void)
569{ 599{
570 return 0; 600 return 0;
571} 601}
@@ -1131,10 +1161,12 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1131 __be32 skeys[2] = { saddr, 0 }; 1161 __be32 skeys[2] = { saddr, 0 };
1132 int ikeys[2] = { dev->ifindex, 0 }; 1162 int ikeys[2] = { dev->ifindex, 0 };
1133 struct netevent_redirect netevent; 1163 struct netevent_redirect netevent;
1164 struct net *net;
1134 1165
1135 if (!in_dev) 1166 if (!in_dev)
1136 return; 1167 return;
1137 1168
1169 net = dev->nd_net;
1138 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) 1170 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev)
1139 || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) 1171 || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw)
1140 || ipv4_is_zeronet(new_gw)) 1172 || ipv4_is_zeronet(new_gw))
@@ -1146,7 +1178,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1146 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) 1178 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
1147 goto reject_redirect; 1179 goto reject_redirect;
1148 } else { 1180 } else {
1149 if (inet_addr_type(&init_net, new_gw) != RTN_UNICAST) 1181 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
1150 goto reject_redirect; 1182 goto reject_redirect;
1151 } 1183 }
1152 1184
@@ -1164,7 +1196,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1164 rth->fl.fl4_src != skeys[i] || 1196 rth->fl.fl4_src != skeys[i] ||
1165 rth->fl.oif != ikeys[k] || 1197 rth->fl.oif != ikeys[k] ||
1166 rth->fl.iif != 0 || 1198 rth->fl.iif != 0 ||
1167 rth->rt_genid != atomic_read(&rt_genid)) { 1199 rth->rt_genid != atomic_read(&rt_genid) ||
1200 rth->u.dst.dev->nd_net != net) {
1168 rthp = &rth->u.dst.rt_next; 1201 rthp = &rth->u.dst.rt_next;
1169 continue; 1202 continue;
1170 } 1203 }
@@ -2668,9 +2701,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2668 int err; 2701 int err;
2669 struct sk_buff *skb; 2702 struct sk_buff *skb;
2670 2703
2671 if (net != &init_net)
2672 return -EINVAL;
2673
2674 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); 2704 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2675 if (err < 0) 2705 if (err < 0)
2676 goto errout; 2706 goto errout;
@@ -2700,7 +2730,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2700 if (iif) { 2730 if (iif) {
2701 struct net_device *dev; 2731 struct net_device *dev;
2702 2732
2703 dev = __dev_get_by_index(&init_net, iif); 2733 dev = __dev_get_by_index(net, iif);
2704 if (dev == NULL) { 2734 if (dev == NULL) {
2705 err = -ENODEV; 2735 err = -ENODEV;
2706 goto errout_free; 2736 goto errout_free;
@@ -2726,7 +2756,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2726 }, 2756 },
2727 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, 2757 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
2728 }; 2758 };
2729 err = ip_route_output_key(&init_net, &rt, &fl); 2759 err = ip_route_output_key(net, &rt, &fl);
2730 } 2760 }
2731 2761
2732 if (err) 2762 if (err)
@@ -2737,11 +2767,11 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2737 rt->rt_flags |= RTCF_NOTIFY; 2767 rt->rt_flags |= RTCF_NOTIFY;
2738 2768
2739 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 2769 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
2740 RTM_NEWROUTE, 0, 0); 2770 RTM_NEWROUTE, 0, 0);
2741 if (err <= 0) 2771 if (err <= 0)
2742 goto errout_free; 2772 goto errout_free;
2743 2773
2744 err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); 2774 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2745errout: 2775errout:
2746 return err; 2776 return err;
2747 2777
@@ -2755,6 +2785,9 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2755 struct rtable *rt; 2785 struct rtable *rt;
2756 int h, s_h; 2786 int h, s_h;
2757 int idx, s_idx; 2787 int idx, s_idx;
2788 struct net *net;
2789
2790 net = skb->sk->sk_net;
2758 2791
2759 s_h = cb->args[0]; 2792 s_h = cb->args[0];
2760 if (s_h < 0) 2793 if (s_h < 0)
@@ -2764,7 +2797,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2764 rcu_read_lock_bh(); 2797 rcu_read_lock_bh();
2765 for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; 2798 for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt;
2766 rt = rcu_dereference(rt->u.dst.rt_next), idx++) { 2799 rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
2767 if (idx < s_idx) 2800 if (rt->u.dst.dev->nd_net != net || idx < s_idx)
2768 continue; 2801 continue;
2769 if (rt->rt_genid != atomic_read(&rt_genid)) 2802 if (rt->rt_genid != atomic_read(&rt_genid))
2770 continue; 2803 continue;
@@ -3040,7 +3073,7 @@ int __init ip_rt_init(void)
3040 ip_rt_secret_interval; 3073 ip_rt_secret_interval;
3041 add_timer(&rt_secret_timer); 3074 add_timer(&rt_secret_timer);
3042 3075
3043 if (ip_rt_proc_init(&init_net)) 3076 if (ip_rt_proc_init())
3044 printk(KERN_ERR "Unable to create route proc files\n"); 3077 printk(KERN_ERR "Unable to create route proc files\n");
3045#ifdef CONFIG_XFRM 3078#ifdef CONFIG_XFRM
3046 xfrm_init(); 3079 xfrm_init();
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index f470fe4511db..4704f27f6c0b 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -10,8 +10,6 @@
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 * 11 *
12 * $Id: syncookies.c,v 1.18 2002/02/01 22:01:04 davem Exp $ 12 * $Id: syncookies.c,v 1.18 2002/02/01 22:01:04 davem Exp $
13 *
14 * Missing: IPv6 support.
15 */ 13 */
16 14
17#include <linux/tcp.h> 15#include <linux/tcp.h>
@@ -23,22 +21,25 @@
23 21
24extern int sysctl_tcp_syncookies; 22extern int sysctl_tcp_syncookies;
25 23
26static __u32 syncookie_secret[2][16-3+SHA_DIGEST_WORDS]; 24__u32 syncookie_secret[2][16-3+SHA_DIGEST_WORDS];
25EXPORT_SYMBOL(syncookie_secret);
27 26
28static __init int init_syncookies(void) 27static __init int init_syncookies(void)
29{ 28{
30 get_random_bytes(syncookie_secret, sizeof(syncookie_secret)); 29 get_random_bytes(syncookie_secret, sizeof(syncookie_secret));
31 return 0; 30 return 0;
32} 31}
33module_init(init_syncookies); 32__initcall(init_syncookies);
34 33
35#define COOKIEBITS 24 /* Upper bits store count */ 34#define COOKIEBITS 24 /* Upper bits store count */
36#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) 35#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
37 36
37static DEFINE_PER_CPU(__u32, cookie_scratch)[16 + 5 + SHA_WORKSPACE_WORDS];
38
38static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, 39static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
39 u32 count, int c) 40 u32 count, int c)
40{ 41{
41 __u32 tmp[16 + 5 + SHA_WORKSPACE_WORDS]; 42 __u32 *tmp = __get_cpu_var(cookie_scratch);
42 43
43 memcpy(tmp + 3, syncookie_secret[c], sizeof(syncookie_secret[c])); 44 memcpy(tmp + 3, syncookie_secret[c], sizeof(syncookie_secret[c]));
44 tmp[0] = (__force u32)saddr; 45 tmp[0] = (__force u32)saddr;
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 3aa0b23c1ea0..eb5b9854c8c7 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -1,12 +1,13 @@
1/* 1/*
2 * TCP CUBIC: Binary Increase Congestion control for TCP v2.1 2 * TCP CUBIC: Binary Increase Congestion control for TCP v2.2
3 * 3 * Home page:
4 * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC
4 * This is from the implementation of CUBIC TCP in 5 * This is from the implementation of CUBIC TCP in
5 * Injong Rhee, Lisong Xu. 6 * Injong Rhee, Lisong Xu.
6 * "CUBIC: A New TCP-Friendly High-Speed TCP Variant 7 * "CUBIC: A New TCP-Friendly High-Speed TCP Variant
7 * in PFLDnet 2005 8 * in PFLDnet 2005
8 * Available from: 9 * Available from:
9 * http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf 10 * http://netsrv.csc.ncsu.edu/export/cubic-paper.pdf
10 * 11 *
11 * Unless CUBIC is enabled and congestion window is large 12 * Unless CUBIC is enabled and congestion window is large
12 * this behaves the same as the original Reno. 13 * this behaves the same as the original Reno.
@@ -20,15 +21,10 @@
20#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation 21#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation
21 * max_cwnd = snd_cwnd * beta 22 * max_cwnd = snd_cwnd * beta
22 */ 23 */
23#define BICTCP_B 4 /*
24 * In binary search,
25 * go to point (max+min)/N
26 */
27#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ 24#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */
28 25
29static int fast_convergence __read_mostly = 1; 26static int fast_convergence __read_mostly = 1;
30static int max_increment __read_mostly = 16; 27static int beta __read_mostly = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */
31static int beta __read_mostly = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
32static int initial_ssthresh __read_mostly; 28static int initial_ssthresh __read_mostly;
33static int bic_scale __read_mostly = 41; 29static int bic_scale __read_mostly = 41;
34static int tcp_friendliness __read_mostly = 1; 30static int tcp_friendliness __read_mostly = 1;
@@ -40,9 +36,7 @@ static u64 cube_factor __read_mostly;
40/* Note parameters that are used for precomputing scale factors are read-only */ 36/* Note parameters that are used for precomputing scale factors are read-only */
41module_param(fast_convergence, int, 0644); 37module_param(fast_convergence, int, 0644);
42MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence"); 38MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
43module_param(max_increment, int, 0644); 39module_param(beta, int, 0644);
44MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary search");
45module_param(beta, int, 0444);
46MODULE_PARM_DESC(beta, "beta for multiplicative increase"); 40MODULE_PARM_DESC(beta, "beta for multiplicative increase");
47module_param(initial_ssthresh, int, 0644); 41module_param(initial_ssthresh, int, 0644);
48MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); 42MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
@@ -145,7 +139,7 @@ static u32 cubic_root(u64 a)
145static inline void bictcp_update(struct bictcp *ca, u32 cwnd) 139static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
146{ 140{
147 u64 offs; 141 u64 offs;
148 u32 delta, t, bic_target, min_cnt, max_cnt; 142 u32 delta, t, bic_target, max_cnt;
149 143
150 ca->ack_cnt++; /* count the number of ACKs */ 144 ca->ack_cnt++; /* count the number of ACKs */
151 145
@@ -211,19 +205,6 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
211 ca->cnt = 100 * cwnd; /* very small increment*/ 205 ca->cnt = 100 * cwnd; /* very small increment*/
212 } 206 }
213 207
214 if (ca->delay_min > 0) {
215 /* max increment = Smax * rtt / 0.1 */
216 min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min);
217
218 /* use concave growth when the target is above the origin */
219 if (ca->cnt < min_cnt && t >= ca->bic_K)
220 ca->cnt = min_cnt;
221 }
222
223 /* slow start and low utilization */
224 if (ca->loss_cwnd == 0) /* could be aggressive in slow start */
225 ca->cnt = 50;
226
227 /* TCP Friendly */ 208 /* TCP Friendly */
228 if (tcp_friendliness) { 209 if (tcp_friendliness) {
229 u32 scale = beta_scale; 210 u32 scale = beta_scale;
@@ -391,4 +372,4 @@ module_exit(cubictcp_unregister);
391MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); 372MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
392MODULE_LICENSE("GPL"); 373MODULE_LICENSE("GPL");
393MODULE_DESCRIPTION("CUBIC TCP"); 374MODULE_DESCRIPTION("CUBIC TCP");
394MODULE_VERSION("2.1"); 375MODULE_VERSION("2.2");
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7facdb0f6960..c4679f343675 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5330,6 +5330,7 @@ discard:
5330 5330
5331EXPORT_SYMBOL(sysctl_tcp_ecn); 5331EXPORT_SYMBOL(sysctl_tcp_ecn);
5332EXPORT_SYMBOL(sysctl_tcp_reordering); 5332EXPORT_SYMBOL(sysctl_tcp_reordering);
5333EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
5333EXPORT_SYMBOL(tcp_parse_options); 5334EXPORT_SYMBOL(tcp_parse_options);
5334EXPORT_SYMBOL(tcp_rcv_established); 5335EXPORT_SYMBOL(tcp_rcv_established);
5335EXPORT_SYMBOL(tcp_rcv_state_process); 5336EXPORT_SYMBOL(tcp_rcv_state_process);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 00156bf421ca..3873c4dbeaeb 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -723,8 +723,8 @@ static void tcp_v4_reqsk_send_ack(struct sk_buff *skb,
723 * This still operates on a request_sock only, not on a big 723 * This still operates on a request_sock only, not on a big
724 * socket. 724 * socket.
725 */ 725 */
726static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, 726static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
727 struct dst_entry *dst) 727 struct dst_entry *dst)
728{ 728{
729 const struct inet_request_sock *ireq = inet_rsk(req); 729 const struct inet_request_sock *ireq = inet_rsk(req);
730 int err = -1; 730 int err = -1;
@@ -732,7 +732,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
732 732
733 /* First, grab a route. */ 733 /* First, grab a route. */
734 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) 734 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
735 goto out; 735 return -1;
736 736
737 skb = tcp_make_synack(sk, dst, req); 737 skb = tcp_make_synack(sk, dst, req);
738 738
@@ -751,11 +751,15 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
751 err = net_xmit_eval(err); 751 err = net_xmit_eval(err);
752 } 752 }
753 753
754out:
755 dst_release(dst); 754 dst_release(dst);
756 return err; 755 return err;
757} 756}
758 757
758static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req)
759{
760 return __tcp_v4_send_synack(sk, req, NULL);
761}
762
759/* 763/*
760 * IPv4 request_sock destructor. 764 * IPv4 request_sock destructor.
761 */ 765 */
@@ -1351,8 +1355,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1351 (s32)(peer->tcp_ts - req->ts_recent) > 1355 (s32)(peer->tcp_ts - req->ts_recent) >
1352 TCP_PAWS_WINDOW) { 1356 TCP_PAWS_WINDOW) {
1353 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED); 1357 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
1354 dst_release(dst); 1358 goto drop_and_release;
1355 goto drop_and_free;
1356 } 1359 }
1357 } 1360 }
1358 /* Kill the following clause, if you dislike this way. */ 1361 /* Kill the following clause, if you dislike this way. */
@@ -1372,24 +1375,21 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1372 "request from %u.%u.%u.%u/%u\n", 1375 "request from %u.%u.%u.%u/%u\n",
1373 NIPQUAD(saddr), 1376 NIPQUAD(saddr),
1374 ntohs(tcp_hdr(skb)->source)); 1377 ntohs(tcp_hdr(skb)->source));
1375 dst_release(dst); 1378 goto drop_and_release;
1376 goto drop_and_free;
1377 } 1379 }
1378 1380
1379 isn = tcp_v4_init_sequence(skb); 1381 isn = tcp_v4_init_sequence(skb);
1380 } 1382 }
1381 tcp_rsk(req)->snt_isn = isn; 1383 tcp_rsk(req)->snt_isn = isn;
1382 1384
1383 if (tcp_v4_send_synack(sk, req, dst)) 1385 if (__tcp_v4_send_synack(sk, req, dst) || want_cookie)
1384 goto drop_and_free; 1386 goto drop_and_free;
1385 1387
1386 if (want_cookie) { 1388 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1387 reqsk_free(req);
1388 } else {
1389 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1390 }
1391 return 0; 1389 return 0;
1392 1390
1391drop_and_release:
1392 dst_release(dst);
1393drop_and_free: 1393drop_and_free:
1394 reqsk_free(req); 1394 reqsk_free(req);
1395drop: 1395drop:
@@ -2443,7 +2443,7 @@ struct proto tcp_prot = {
2443 REF_PROTO_INUSE(tcp) 2443 REF_PROTO_INUSE(tcp)
2444}; 2444};
2445 2445
2446void __init tcp_v4_init(struct net_proto_family *ops) 2446void __init tcp_v4_init(void)
2447{ 2447{
2448 if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, 2448 if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW,
2449 IPPROTO_TCP) < 0) 2449 IPPROTO_TCP) < 0)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b61b76847ad9..8245247a6ceb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -35,6 +35,8 @@
35#endif 35#endif
36 36
37int sysctl_tcp_syncookies __read_mostly = SYNC_INIT; 37int sysctl_tcp_syncookies __read_mostly = SYNC_INIT;
38EXPORT_SYMBOL(sysctl_tcp_syncookies);
39
38int sysctl_tcp_abort_on_overflow __read_mostly; 40int sysctl_tcp_abort_on_overflow __read_mostly;
39 41
40struct inet_timewait_death_row tcp_death_row = { 42struct inet_timewait_death_row tcp_death_row = {
@@ -536,7 +538,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
536 * Enforce "SYN-ACK" according to figure 8, figure 6 538 * Enforce "SYN-ACK" according to figure 8, figure 6
537 * of RFC793, fixed by RFC1122. 539 * of RFC793, fixed by RFC1122.
538 */ 540 */
539 req->rsk_ops->rtx_syn_ack(sk, req, NULL); 541 req->rsk_ops->rtx_syn_ack(sk, req);
540 return NULL; 542 return NULL;
541 } 543 }
542 544
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ed750f9ceb07..cbfef8b1f5e8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2560,6 +2560,7 @@ void tcp_send_probe0(struct sock *sk)
2560 } 2560 }
2561} 2561}
2562 2562
2563EXPORT_SYMBOL(tcp_select_initial_window);
2563EXPORT_SYMBOL(tcp_connect); 2564EXPORT_SYMBOL(tcp_connect);
2564EXPORT_SYMBOL(tcp_make_synack); 2565EXPORT_SYMBOL(tcp_make_synack);
2565EXPORT_SYMBOL(tcp_simple_retransmit); 2566EXPORT_SYMBOL(tcp_simple_retransmit);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7ea1b67b6de1..c53d7673b57d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -246,553 +246,6 @@ int udp_get_port(struct sock *sk, unsigned short snum,
246 return __udp_lib_get_port(sk, snum, udp_hash, scmp); 246 return __udp_lib_get_port(sk, snum, udp_hash, scmp);
247} 247}
248 248
249int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
250{
251 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
252
253 return ( !ipv6_only_sock(sk2) &&
254 (!inet1->rcv_saddr || !inet2->rcv_saddr ||
255 inet1->rcv_saddr == inet2->rcv_saddr ));
256}
257
258static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
259{
260 return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
261}
262
263/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
264 * harder than this. -DaveM
265 */
266static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
267 __be16 sport, __be32 daddr, __be16 dport,
268 int dif, struct hlist_head udptable[])
269{
270 struct sock *sk, *result = NULL;
271 struct hlist_node *node;
272 unsigned short hnum = ntohs(dport);
273 int badness = -1;
274
275 read_lock(&udp_hash_lock);
276 sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
277 struct inet_sock *inet = inet_sk(sk);
278
279 if (sk->sk_net == net && sk->sk_hash == hnum &&
280 !ipv6_only_sock(sk)) {
281 int score = (sk->sk_family == PF_INET ? 1 : 0);
282 if (inet->rcv_saddr) {
283 if (inet->rcv_saddr != daddr)
284 continue;
285 score+=2;
286 }
287 if (inet->daddr) {
288 if (inet->daddr != saddr)
289 continue;
290 score+=2;
291 }
292 if (inet->dport) {
293 if (inet->dport != sport)
294 continue;
295 score+=2;
296 }
297 if (sk->sk_bound_dev_if) {
298 if (sk->sk_bound_dev_if != dif)
299 continue;
300 score+=2;
301 }
302 if (score == 9) {
303 result = sk;
304 break;
305 } else if (score > badness) {
306 result = sk;
307 badness = score;
308 }
309 }
310 }
311 if (result)
312 sock_hold(result);
313 read_unlock(&udp_hash_lock);
314 return result;
315}
316
317static inline struct sock *udp_v4_mcast_next(struct sock *sk,
318 __be16 loc_port, __be32 loc_addr,
319 __be16 rmt_port, __be32 rmt_addr,
320 int dif)
321{
322 struct hlist_node *node;
323 struct sock *s = sk;
324 unsigned short hnum = ntohs(loc_port);
325
326 sk_for_each_from(s, node) {
327 struct inet_sock *inet = inet_sk(s);
328
329 if (s->sk_hash != hnum ||
330 (inet->daddr && inet->daddr != rmt_addr) ||
331 (inet->dport != rmt_port && inet->dport) ||
332 (inet->rcv_saddr && inet->rcv_saddr != loc_addr) ||
333 ipv6_only_sock(s) ||
334 (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
335 continue;
336 if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif))
337 continue;
338 goto found;
339 }
340 s = NULL;
341found:
342 return s;
343}
344
345/*
346 * This routine is called by the ICMP module when it gets some
347 * sort of error condition. If err < 0 then the socket should
348 * be closed and the error returned to the user. If err > 0
349 * it's just the icmp type << 8 | icmp code.
350 * Header points to the ip header of the error packet. We move
351 * on past this. Then (as it used to claim before adjustment)
352 * header points to the first 8 bytes of the udp header. We need
353 * to find the appropriate port.
354 */
355
356void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
357{
358 struct inet_sock *inet;
359 struct iphdr *iph = (struct iphdr*)skb->data;
360 struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
361 const int type = icmp_hdr(skb)->type;
362 const int code = icmp_hdr(skb)->code;
363 struct sock *sk;
364 int harderr;
365 int err;
366
367 sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest,
368 iph->saddr, uh->source, skb->dev->ifindex, udptable);
369 if (sk == NULL) {
370 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
371 return; /* No socket for error */
372 }
373
374 err = 0;
375 harderr = 0;
376 inet = inet_sk(sk);
377
378 switch (type) {
379 default:
380 case ICMP_TIME_EXCEEDED:
381 err = EHOSTUNREACH;
382 break;
383 case ICMP_SOURCE_QUENCH:
384 goto out;
385 case ICMP_PARAMETERPROB:
386 err = EPROTO;
387 harderr = 1;
388 break;
389 case ICMP_DEST_UNREACH:
390 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
391 if (inet->pmtudisc != IP_PMTUDISC_DONT) {
392 err = EMSGSIZE;
393 harderr = 1;
394 break;
395 }
396 goto out;
397 }
398 err = EHOSTUNREACH;
399 if (code <= NR_ICMP_UNREACH) {
400 harderr = icmp_err_convert[code].fatal;
401 err = icmp_err_convert[code].errno;
402 }
403 break;
404 }
405
406 /*
407 * RFC1122: OK. Passes ICMP errors back to application, as per
408 * 4.1.3.3.
409 */
410 if (!inet->recverr) {
411 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
412 goto out;
413 } else {
414 ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
415 }
416 sk->sk_err = err;
417 sk->sk_error_report(sk);
418out:
419 sock_put(sk);
420}
421
422void udp_err(struct sk_buff *skb, u32 info)
423{
424 __udp4_lib_err(skb, info, udp_hash);
425}
426
427/*
428 * Throw away all pending data and cancel the corking. Socket is locked.
429 */
430static void udp_flush_pending_frames(struct sock *sk)
431{
432 struct udp_sock *up = udp_sk(sk);
433
434 if (up->pending) {
435 up->len = 0;
436 up->pending = 0;
437 ip_flush_pending_frames(sk);
438 }
439}
440
441/**
442 * udp4_hwcsum_outgoing - handle outgoing HW checksumming
443 * @sk: socket we are sending on
444 * @skb: sk_buff containing the filled-in UDP header
445 * (checksum field must be zeroed out)
446 */
447static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
448 __be32 src, __be32 dst, int len )
449{
450 unsigned int offset;
451 struct udphdr *uh = udp_hdr(skb);
452 __wsum csum = 0;
453
454 if (skb_queue_len(&sk->sk_write_queue) == 1) {
455 /*
456 * Only one fragment on the socket.
457 */
458 skb->csum_start = skb_transport_header(skb) - skb->head;
459 skb->csum_offset = offsetof(struct udphdr, check);
460 uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
461 } else {
462 /*
463 * HW-checksum won't work as there are two or more
464 * fragments on the socket so that all csums of sk_buffs
465 * should be together
466 */
467 offset = skb_transport_offset(skb);
468 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
469
470 skb->ip_summed = CHECKSUM_NONE;
471
472 skb_queue_walk(&sk->sk_write_queue, skb) {
473 csum = csum_add(csum, skb->csum);
474 }
475
476 uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
477 if (uh->check == 0)
478 uh->check = CSUM_MANGLED_0;
479 }
480}
481
482/*
483 * Push out all pending data as one UDP datagram. Socket is locked.
484 */
485static int udp_push_pending_frames(struct sock *sk)
486{
487 struct udp_sock *up = udp_sk(sk);
488 struct inet_sock *inet = inet_sk(sk);
489 struct flowi *fl = &inet->cork.fl;
490 struct sk_buff *skb;
491 struct udphdr *uh;
492 int err = 0;
493 int is_udplite = IS_UDPLITE(sk);
494 __wsum csum = 0;
495
496 /* Grab the skbuff where UDP header space exists. */
497 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
498 goto out;
499
500 /*
501 * Create a UDP header
502 */
503 uh = udp_hdr(skb);
504 uh->source = fl->fl_ip_sport;
505 uh->dest = fl->fl_ip_dport;
506 uh->len = htons(up->len);
507 uh->check = 0;
508
509 if (is_udplite) /* UDP-Lite */
510 csum = udplite_csum_outgoing(sk, skb);
511
512 else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
513
514 skb->ip_summed = CHECKSUM_NONE;
515 goto send;
516
517 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
518
519 udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len);
520 goto send;
521
522 } else /* `normal' UDP */
523 csum = udp_csum_outgoing(sk, skb);
524
525 /* add protocol-dependent pseudo-header */
526 uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
527 sk->sk_protocol, csum );
528 if (uh->check == 0)
529 uh->check = CSUM_MANGLED_0;
530
531send:
532 err = ip_push_pending_frames(sk);
533out:
534 up->len = 0;
535 up->pending = 0;
536 if (!err)
537 UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
538 return err;
539}
540
541int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
542 size_t len)
543{
544 struct inet_sock *inet = inet_sk(sk);
545 struct udp_sock *up = udp_sk(sk);
546 int ulen = len;
547 struct ipcm_cookie ipc;
548 struct rtable *rt = NULL;
549 int free = 0;
550 int connected = 0;
551 __be32 daddr, faddr, saddr;
552 __be16 dport;
553 u8 tos;
554 int err, is_udplite = IS_UDPLITE(sk);
555 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
556 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
557
558 if (len > 0xFFFF)
559 return -EMSGSIZE;
560
561 /*
562 * Check the flags.
563 */
564
565 if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */
566 return -EOPNOTSUPP;
567
568 ipc.opt = NULL;
569
570 if (up->pending) {
571 /*
572 * There are pending frames.
573 * The socket lock must be held while it's corked.
574 */
575 lock_sock(sk);
576 if (likely(up->pending)) {
577 if (unlikely(up->pending != AF_INET)) {
578 release_sock(sk);
579 return -EINVAL;
580 }
581 goto do_append_data;
582 }
583 release_sock(sk);
584 }
585 ulen += sizeof(struct udphdr);
586
587 /*
588 * Get and verify the address.
589 */
590 if (msg->msg_name) {
591 struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
592 if (msg->msg_namelen < sizeof(*usin))
593 return -EINVAL;
594 if (usin->sin_family != AF_INET) {
595 if (usin->sin_family != AF_UNSPEC)
596 return -EAFNOSUPPORT;
597 }
598
599 daddr = usin->sin_addr.s_addr;
600 dport = usin->sin_port;
601 if (dport == 0)
602 return -EINVAL;
603 } else {
604 if (sk->sk_state != TCP_ESTABLISHED)
605 return -EDESTADDRREQ;
606 daddr = inet->daddr;
607 dport = inet->dport;
608 /* Open fast path for connected socket.
609 Route will not be used, if at least one option is set.
610 */
611 connected = 1;
612 }
613 ipc.addr = inet->saddr;
614
615 ipc.oif = sk->sk_bound_dev_if;
616 if (msg->msg_controllen) {
617 err = ip_cmsg_send(msg, &ipc);
618 if (err)
619 return err;
620 if (ipc.opt)
621 free = 1;
622 connected = 0;
623 }
624 if (!ipc.opt)
625 ipc.opt = inet->opt;
626
627 saddr = ipc.addr;
628 ipc.addr = faddr = daddr;
629
630 if (ipc.opt && ipc.opt->srr) {
631 if (!daddr)
632 return -EINVAL;
633 faddr = ipc.opt->faddr;
634 connected = 0;
635 }
636 tos = RT_TOS(inet->tos);
637 if (sock_flag(sk, SOCK_LOCALROUTE) ||
638 (msg->msg_flags & MSG_DONTROUTE) ||
639 (ipc.opt && ipc.opt->is_strictroute)) {
640 tos |= RTO_ONLINK;
641 connected = 0;
642 }
643
644 if (ipv4_is_multicast(daddr)) {
645 if (!ipc.oif)
646 ipc.oif = inet->mc_index;
647 if (!saddr)
648 saddr = inet->mc_addr;
649 connected = 0;
650 }
651
652 if (connected)
653 rt = (struct rtable*)sk_dst_check(sk, 0);
654
655 if (rt == NULL) {
656 struct flowi fl = { .oif = ipc.oif,
657 .nl_u = { .ip4_u =
658 { .daddr = faddr,
659 .saddr = saddr,
660 .tos = tos } },
661 .proto = sk->sk_protocol,
662 .uli_u = { .ports =
663 { .sport = inet->sport,
664 .dport = dport } } };
665 security_sk_classify_flow(sk, &fl);
666 err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1);
667 if (err) {
668 if (err == -ENETUNREACH)
669 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
670 goto out;
671 }
672
673 err = -EACCES;
674 if ((rt->rt_flags & RTCF_BROADCAST) &&
675 !sock_flag(sk, SOCK_BROADCAST))
676 goto out;
677 if (connected)
678 sk_dst_set(sk, dst_clone(&rt->u.dst));
679 }
680
681 if (msg->msg_flags&MSG_CONFIRM)
682 goto do_confirm;
683back_from_confirm:
684
685 saddr = rt->rt_src;
686 if (!ipc.addr)
687 daddr = ipc.addr = rt->rt_dst;
688
689 lock_sock(sk);
690 if (unlikely(up->pending)) {
691 /* The socket is already corked while preparing it. */
692 /* ... which is an evident application bug. --ANK */
693 release_sock(sk);
694
695 LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
696 err = -EINVAL;
697 goto out;
698 }
699 /*
700 * Now cork the socket to pend data.
701 */
702 inet->cork.fl.fl4_dst = daddr;
703 inet->cork.fl.fl_ip_dport = dport;
704 inet->cork.fl.fl4_src = saddr;
705 inet->cork.fl.fl_ip_sport = inet->sport;
706 up->pending = AF_INET;
707
708do_append_data:
709 up->len += ulen;
710 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
711 err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
712 sizeof(struct udphdr), &ipc, rt,
713 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
714 if (err)
715 udp_flush_pending_frames(sk);
716 else if (!corkreq)
717 err = udp_push_pending_frames(sk);
718 else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
719 up->pending = 0;
720 release_sock(sk);
721
722out:
723 ip_rt_put(rt);
724 if (free)
725 kfree(ipc.opt);
726 if (!err)
727 return len;
728 /*
729 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
730 * ENOBUFS might not be good (it's not tunable per se), but otherwise
731 * we don't have a good statistic (IpOutDiscards but it can be too many
732 * things). We could add another new stat but at least for now that
733 * seems like overkill.
734 */
735 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
736 UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite);
737 }
738 return err;
739
740do_confirm:
741 dst_confirm(&rt->u.dst);
742 if (!(msg->msg_flags&MSG_PROBE) || len)
743 goto back_from_confirm;
744 err = 0;
745 goto out;
746}
747
748int udp_sendpage(struct sock *sk, struct page *page, int offset,
749 size_t size, int flags)
750{
751 struct udp_sock *up = udp_sk(sk);
752 int ret;
753
754 if (!up->pending) {
755 struct msghdr msg = { .msg_flags = flags|MSG_MORE };
756
757 /* Call udp_sendmsg to specify destination address which
758 * sendpage interface can't pass.
759 * This will succeed only when the socket is connected.
760 */
761 ret = udp_sendmsg(NULL, sk, &msg, 0);
762 if (ret < 0)
763 return ret;
764 }
765
766 lock_sock(sk);
767
768 if (unlikely(!up->pending)) {
769 release_sock(sk);
770
771 LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n");
772 return -EINVAL;
773 }
774
775 ret = ip_append_page(sk, page, offset, size, flags);
776 if (ret == -EOPNOTSUPP) {
777 release_sock(sk);
778 return sock_no_sendpage(sk->sk_socket, page, offset,
779 size, flags);
780 }
781 if (ret < 0) {
782 udp_flush_pending_frames(sk);
783 goto out;
784 }
785
786 up->len += size;
787 if (!(up->corkflag || (flags&MSG_MORE)))
788 ret = udp_push_pending_frames(sk);
789 if (!ret)
790 ret = size;
791out:
792 release_sock(sk);
793 return ret;
794}
795
796/* 249/*
797 * IOCTL requests applicable to the UDP protocol 250 * IOCTL requests applicable to the UDP protocol
798 */ 251 */
@@ -833,107 +286,6 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
833 return 0; 286 return 0;
834} 287}
835 288
836/*
837 * This should be easy, if there is something there we
838 * return it, otherwise we block.
839 */
840
841int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
842 size_t len, int noblock, int flags, int *addr_len)
843{
844 struct inet_sock *inet = inet_sk(sk);
845 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
846 struct sk_buff *skb;
847 unsigned int ulen, copied;
848 int peeked;
849 int err;
850 int is_udplite = IS_UDPLITE(sk);
851
852 /*
853 * Check any passed addresses
854 */
855 if (addr_len)
856 *addr_len=sizeof(*sin);
857
858 if (flags & MSG_ERRQUEUE)
859 return ip_recv_error(sk, msg, len);
860
861try_again:
862 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
863 &peeked, &err);
864 if (!skb)
865 goto out;
866
867 ulen = skb->len - sizeof(struct udphdr);
868 copied = len;
869 if (copied > ulen)
870 copied = ulen;
871 else if (copied < ulen)
872 msg->msg_flags |= MSG_TRUNC;
873
874 /*
875 * If checksum is needed at all, try to do it while copying the
876 * data. If the data is truncated, or if we only want a partial
877 * coverage checksum (UDP-Lite), do it before the copy.
878 */
879
880 if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
881 if (udp_lib_checksum_complete(skb))
882 goto csum_copy_err;
883 }
884
885 if (skb_csum_unnecessary(skb))
886 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
887 msg->msg_iov, copied );
888 else {
889 err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
890
891 if (err == -EINVAL)
892 goto csum_copy_err;
893 }
894
895 if (err)
896 goto out_free;
897
898 if (!peeked)
899 UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
900
901 sock_recv_timestamp(msg, sk, skb);
902
903 /* Copy the address. */
904 if (sin)
905 {
906 sin->sin_family = AF_INET;
907 sin->sin_port = udp_hdr(skb)->source;
908 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
909 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
910 }
911 if (inet->cmsg_flags)
912 ip_cmsg_recv(msg, skb);
913
914 err = copied;
915 if (flags & MSG_TRUNC)
916 err = ulen;
917
918out_free:
919 lock_sock(sk);
920 skb_free_datagram(sk, skb);
921 release_sock(sk);
922out:
923 return err;
924
925csum_copy_err:
926 lock_sock(sk);
927 if (!skb_kill_datagram(sk, skb, flags))
928 UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
929 release_sock(sk);
930
931 if (noblock)
932 return -EAGAIN;
933 goto try_again;
934}
935
936
937int udp_disconnect(struct sock *sk, int flags) 289int udp_disconnect(struct sock *sk, int flags)
938{ 290{
939 struct inet_sock *inet = inet_sk(sk); 291 struct inet_sock *inet = inet_sk(sk);
@@ -956,319 +308,6 @@ int udp_disconnect(struct sock *sk, int flags)
956 return 0; 308 return 0;
957} 309}
958 310
959/* returns:
960 * -1: error
961 * 0: success
962 * >0: "udp encap" protocol resubmission
963 *
964 * Note that in the success and error cases, the skb is assumed to
965 * have either been requeued or freed.
966 */
967int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
968{
969 struct udp_sock *up = udp_sk(sk);
970 int rc;
971 int is_udplite = IS_UDPLITE(sk);
972
973 /*
974 * Charge it to the socket, dropping if the queue is full.
975 */
976 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
977 goto drop;
978 nf_reset(skb);
979
980 if (up->encap_type) {
981 /*
982 * This is an encapsulation socket so pass the skb to
983 * the socket's udp_encap_rcv() hook. Otherwise, just
984 * fall through and pass this up the UDP socket.
985 * up->encap_rcv() returns the following value:
986 * =0 if skb was successfully passed to the encap
987 * handler or was discarded by it.
988 * >0 if skb should be passed on to UDP.
989 * <0 if skb should be resubmitted as proto -N
990 */
991
992 /* if we're overly short, let UDP handle it */
993 if (skb->len > sizeof(struct udphdr) &&
994 up->encap_rcv != NULL) {
995 int ret;
996
997 ret = (*up->encap_rcv)(sk, skb);
998 if (ret <= 0) {
999 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS,
1000 is_udplite);
1001 return -ret;
1002 }
1003 }
1004
1005 /* FALLTHROUGH -- it's a UDP Packet */
1006 }
1007
1008 /*
1009 * UDP-Lite specific tests, ignored on UDP sockets
1010 */
1011 if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
1012
1013 /*
1014 * MIB statistics other than incrementing the error count are
1015 * disabled for the following two types of errors: these depend
1016 * on the application settings, not on the functioning of the
1017 * protocol stack as such.
1018 *
1019 * RFC 3828 here recommends (sec 3.3): "There should also be a
1020 * way ... to ... at least let the receiving application block
1021 * delivery of packets with coverage values less than a value
1022 * provided by the application."
1023 */
1024 if (up->pcrlen == 0) { /* full coverage was set */
1025 LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage "
1026 "%d while full coverage %d requested\n",
1027 UDP_SKB_CB(skb)->cscov, skb->len);
1028 goto drop;
1029 }
1030 /* The next case involves violating the min. coverage requested
1031 * by the receiver. This is subtle: if receiver wants x and x is
1032 * greater than the buffersize/MTU then receiver will complain
1033 * that it wants x while sender emits packets of smaller size y.
1034 * Therefore the above ...()->partial_cov statement is essential.
1035 */
1036 if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
1037 LIMIT_NETDEBUG(KERN_WARNING
1038 "UDPLITE: coverage %d too small, need min %d\n",
1039 UDP_SKB_CB(skb)->cscov, up->pcrlen);
1040 goto drop;
1041 }
1042 }
1043
1044 if (sk->sk_filter) {
1045 if (udp_lib_checksum_complete(skb))
1046 goto drop;
1047 }
1048
1049 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
1050 /* Note that an ENOMEM error is charged twice */
1051 if (rc == -ENOMEM)
1052 UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
1053 goto drop;
1054 }
1055
1056 return 0;
1057
1058drop:
1059 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
1060 kfree_skb(skb);
1061 return -1;
1062}
1063
1064/*
1065 * Multicasts and broadcasts go to each listener.
1066 *
1067 * Note: called only from the BH handler context,
1068 * so we don't need to lock the hashes.
1069 */
1070static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
1071 struct udphdr *uh,
1072 __be32 saddr, __be32 daddr,
1073 struct hlist_head udptable[])
1074{
1075 struct sock *sk;
1076 int dif;
1077
1078 read_lock(&udp_hash_lock);
1079 sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
1080 dif = skb->dev->ifindex;
1081 sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
1082 if (sk) {
1083 struct sock *sknext = NULL;
1084
1085 do {
1086 struct sk_buff *skb1 = skb;
1087
1088 sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
1089 uh->source, saddr, dif);
1090 if (sknext)
1091 skb1 = skb_clone(skb, GFP_ATOMIC);
1092
1093 if (skb1) {
1094 int ret = 0;
1095
1096 bh_lock_sock_nested(sk);
1097 if (!sock_owned_by_user(sk))
1098 ret = udp_queue_rcv_skb(sk, skb1);
1099 else
1100 sk_add_backlog(sk, skb1);
1101 bh_unlock_sock(sk);
1102
1103 if (ret > 0)
1104 /* we should probably re-process instead
1105 * of dropping packets here. */
1106 kfree_skb(skb1);
1107 }
1108 sk = sknext;
1109 } while (sknext);
1110 } else
1111 kfree_skb(skb);
1112 read_unlock(&udp_hash_lock);
1113 return 0;
1114}
1115
1116/* Initialize UDP checksum. If exited with zero value (success),
1117 * CHECKSUM_UNNECESSARY means, that no more checks are required.
1118 * Otherwise, csum completion requires chacksumming packet body,
1119 * including udp header and folding it to skb->csum.
1120 */
1121static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
1122 int proto)
1123{
1124 const struct iphdr *iph;
1125 int err;
1126
1127 UDP_SKB_CB(skb)->partial_cov = 0;
1128 UDP_SKB_CB(skb)->cscov = skb->len;
1129
1130 if (proto == IPPROTO_UDPLITE) {
1131 err = udplite_checksum_init(skb, uh);
1132 if (err)
1133 return err;
1134 }
1135
1136 iph = ip_hdr(skb);
1137 if (uh->check == 0) {
1138 skb->ip_summed = CHECKSUM_UNNECESSARY;
1139 } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
1140 if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
1141 proto, skb->csum))
1142 skb->ip_summed = CHECKSUM_UNNECESSARY;
1143 }
1144 if (!skb_csum_unnecessary(skb))
1145 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1146 skb->len, proto, 0);
1147 /* Probably, we should checksum udp header (it should be in cache
1148 * in any case) and data in tiny packets (< rx copybreak).
1149 */
1150
1151 return 0;
1152}
1153
1154/*
1155 * All we need to do is get the socket, and then do a checksum.
1156 */
1157
1158int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
1159 int proto)
1160{
1161 struct sock *sk;
1162 struct udphdr *uh = udp_hdr(skb);
1163 unsigned short ulen;
1164 struct rtable *rt = (struct rtable*)skb->dst;
1165 __be32 saddr = ip_hdr(skb)->saddr;
1166 __be32 daddr = ip_hdr(skb)->daddr;
1167
1168 /*
1169 * Validate the packet.
1170 */
1171 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
1172 goto drop; /* No space for header. */
1173
1174 ulen = ntohs(uh->len);
1175 if (ulen > skb->len)
1176 goto short_packet;
1177
1178 if (proto == IPPROTO_UDP) {
1179 /* UDP validates ulen. */
1180 if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
1181 goto short_packet;
1182 uh = udp_hdr(skb);
1183 }
1184
1185 if (udp4_csum_init(skb, uh, proto))
1186 goto csum_error;
1187
1188 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
1189 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
1190
1191 sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr,
1192 uh->dest, inet_iif(skb), udptable);
1193
1194 if (sk != NULL) {
1195 int ret = 0;
1196 bh_lock_sock_nested(sk);
1197 if (!sock_owned_by_user(sk))
1198 ret = udp_queue_rcv_skb(sk, skb);
1199 else
1200 sk_add_backlog(sk, skb);
1201 bh_unlock_sock(sk);
1202 sock_put(sk);
1203
1204 /* a return value > 0 means to resubmit the input, but
1205 * it wants the return to be -protocol, or 0
1206 */
1207 if (ret > 0)
1208 return -ret;
1209 return 0;
1210 }
1211
1212 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1213 goto drop;
1214 nf_reset(skb);
1215
1216 /* No socket. Drop packet silently, if checksum is wrong */
1217 if (udp_lib_checksum_complete(skb))
1218 goto csum_error;
1219
1220 UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
1221 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1222
1223 /*
1224 * Hmm. We got an UDP packet to a port to which we
1225 * don't wanna listen. Ignore it.
1226 */
1227 kfree_skb(skb);
1228 return 0;
1229
1230short_packet:
1231 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
1232 proto == IPPROTO_UDPLITE ? "-Lite" : "",
1233 NIPQUAD(saddr),
1234 ntohs(uh->source),
1235 ulen,
1236 skb->len,
1237 NIPQUAD(daddr),
1238 ntohs(uh->dest));
1239 goto drop;
1240
1241csum_error:
1242 /*
1243 * RFC1122: OK. Discards the bad packet silently (as far as
1244 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
1245 */
1246 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
1247 proto == IPPROTO_UDPLITE ? "-Lite" : "",
1248 NIPQUAD(saddr),
1249 ntohs(uh->source),
1250 NIPQUAD(daddr),
1251 ntohs(uh->dest),
1252 ulen);
1253drop:
1254 UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
1255 kfree_skb(skb);
1256 return 0;
1257}
1258
1259int udp_rcv(struct sk_buff *skb)
1260{
1261 return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
1262}
1263
1264int udp_destroy_sock(struct sock *sk)
1265{
1266 lock_sock(sk);
1267 udp_flush_pending_frames(sk);
1268 release_sock(sk);
1269 return 0;
1270}
1271
1272/* 311/*
1273 * Socket option code for UDP 312 * Socket option code for UDP
1274 */ 313 */
@@ -1279,7 +318,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1279 struct udp_sock *up = udp_sk(sk); 318 struct udp_sock *up = udp_sk(sk);
1280 int val; 319 int val;
1281 int err = 0; 320 int err = 0;
321#ifdef CONFIG_IP_UDPLITE
1282 int is_udplite = IS_UDPLITE(sk); 322 int is_udplite = IS_UDPLITE(sk);
323#endif
1283 324
1284 if (optlen<sizeof(int)) 325 if (optlen<sizeof(int))
1285 return -EINVAL; 326 return -EINVAL;
@@ -1315,6 +356,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1315 } 356 }
1316 break; 357 break;
1317 358
359#ifdef CONFIG_IP_UDPLITE
1318 /* 360 /*
1319 * UDP-Lite's partial checksum coverage (RFC 3828). 361 * UDP-Lite's partial checksum coverage (RFC 3828).
1320 */ 362 */
@@ -1340,6 +382,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1340 up->pcrlen = val; 382 up->pcrlen = val;
1341 up->pcflag |= UDPLITE_RECV_CC; 383 up->pcflag |= UDPLITE_RECV_CC;
1342 break; 384 break;
385#endif
1343 386
1344 default: 387 default:
1345 err = -ENOPROTOOPT; 388 err = -ENOPROTOOPT;
@@ -1349,26 +392,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1349 return err; 392 return err;
1350} 393}
1351 394
1352int udp_setsockopt(struct sock *sk, int level, int optname,
1353 char __user *optval, int optlen)
1354{
1355 if (level == SOL_UDP || level == SOL_UDPLITE)
1356 return udp_lib_setsockopt(sk, level, optname, optval, optlen,
1357 udp_push_pending_frames);
1358 return ip_setsockopt(sk, level, optname, optval, optlen);
1359}
1360
1361#ifdef CONFIG_COMPAT
1362int compat_udp_setsockopt(struct sock *sk, int level, int optname,
1363 char __user *optval, int optlen)
1364{
1365 if (level == SOL_UDP || level == SOL_UDPLITE)
1366 return udp_lib_setsockopt(sk, level, optname, optval, optlen,
1367 udp_push_pending_frames);
1368 return compat_ip_setsockopt(sk, level, optname, optval, optlen);
1369}
1370#endif
1371
1372int udp_lib_getsockopt(struct sock *sk, int level, int optname, 395int udp_lib_getsockopt(struct sock *sk, int level, int optname,
1373 char __user *optval, int __user *optlen) 396 char __user *optval, int __user *optlen)
1374{ 397{
@@ -1413,23 +436,6 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
1413 return 0; 436 return 0;
1414} 437}
1415 438
1416int udp_getsockopt(struct sock *sk, int level, int optname,
1417 char __user *optval, int __user *optlen)
1418{
1419 if (level == SOL_UDP || level == SOL_UDPLITE)
1420 return udp_lib_getsockopt(sk, level, optname, optval, optlen);
1421 return ip_getsockopt(sk, level, optname, optval, optlen);
1422}
1423
1424#ifdef CONFIG_COMPAT
1425int compat_udp_getsockopt(struct sock *sk, int level, int optname,
1426 char __user *optval, int __user *optlen)
1427{
1428 if (level == SOL_UDP || level == SOL_UDPLITE)
1429 return udp_lib_getsockopt(sk, level, optname, optval, optlen);
1430 return compat_ip_getsockopt(sk, level, optname, optval, optlen);
1431}
1432#endif
1433/** 439/**
1434 * udp_poll - wait for a UDP event. 440 * udp_poll - wait for a UDP event.
1435 * @file - file struct 441 * @file - file struct
@@ -1474,36 +480,6 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1474 480
1475} 481}
1476 482
1477DEFINE_PROTO_INUSE(udp)
1478
1479struct proto udp_prot = {
1480 .name = "UDP",
1481 .owner = THIS_MODULE,
1482 .close = udp_lib_close,
1483 .connect = ip4_datagram_connect,
1484 .disconnect = udp_disconnect,
1485 .ioctl = udp_ioctl,
1486 .destroy = udp_destroy_sock,
1487 .setsockopt = udp_setsockopt,
1488 .getsockopt = udp_getsockopt,
1489 .sendmsg = udp_sendmsg,
1490 .recvmsg = udp_recvmsg,
1491 .sendpage = udp_sendpage,
1492 .backlog_rcv = udp_queue_rcv_skb,
1493 .hash = udp_lib_hash,
1494 .unhash = udp_lib_unhash,
1495 .get_port = udp_v4_get_port,
1496 .memory_allocated = &udp_memory_allocated,
1497 .sysctl_mem = sysctl_udp_mem,
1498 .sysctl_wmem = &sysctl_udp_wmem_min,
1499 .sysctl_rmem = &sysctl_udp_rmem_min,
1500 .obj_size = sizeof(struct udp_sock),
1501#ifdef CONFIG_COMPAT
1502 .compat_setsockopt = compat_udp_setsockopt,
1503 .compat_getsockopt = compat_udp_getsockopt,
1504#endif
1505 REF_PROTO_INUSE(udp)
1506};
1507 483
1508/* ------------------------------------------------------------------------ */ 484/* ------------------------------------------------------------------------ */
1509#ifdef CONFIG_PROC_FS 485#ifdef CONFIG_PROC_FS
@@ -1636,62 +612,6 @@ void udp_proc_unregister(struct udp_seq_afinfo *afinfo)
1636 proc_net_remove(&init_net, afinfo->name); 612 proc_net_remove(&init_net, afinfo->name);
1637 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); 613 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
1638} 614}
1639
1640/* ------------------------------------------------------------------------ */
1641static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket)
1642{
1643 struct inet_sock *inet = inet_sk(sp);
1644 __be32 dest = inet->daddr;
1645 __be32 src = inet->rcv_saddr;
1646 __u16 destp = ntohs(inet->dport);
1647 __u16 srcp = ntohs(inet->sport);
1648
1649 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
1650 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p",
1651 bucket, src, srcp, dest, destp, sp->sk_state,
1652 atomic_read(&sp->sk_wmem_alloc),
1653 atomic_read(&sp->sk_rmem_alloc),
1654 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
1655 atomic_read(&sp->sk_refcnt), sp);
1656}
1657
1658int udp4_seq_show(struct seq_file *seq, void *v)
1659{
1660 if (v == SEQ_START_TOKEN)
1661 seq_printf(seq, "%-127s\n",
1662 " sl local_address rem_address st tx_queue "
1663 "rx_queue tr tm->when retrnsmt uid timeout "
1664 "inode");
1665 else {
1666 char tmpbuf[129];
1667 struct udp_iter_state *state = seq->private;
1668
1669 udp4_format_sock(v, tmpbuf, state->bucket);
1670 seq_printf(seq, "%-127s\n", tmpbuf);
1671 }
1672 return 0;
1673}
1674
1675/* ------------------------------------------------------------------------ */
1676static struct file_operations udp4_seq_fops;
1677static struct udp_seq_afinfo udp4_seq_afinfo = {
1678 .owner = THIS_MODULE,
1679 .name = "udp",
1680 .family = AF_INET,
1681 .hashtable = udp_hash,
1682 .seq_show = udp4_seq_show,
1683 .seq_fops = &udp4_seq_fops,
1684};
1685
1686int __init udp4_proc_init(void)
1687{
1688 return udp_proc_register(&udp4_seq_afinfo);
1689}
1690
1691void udp4_proc_exit(void)
1692{
1693 udp_proc_unregister(&udp4_seq_afinfo);
1694}
1695#endif /* CONFIG_PROC_FS */ 615#endif /* CONFIG_PROC_FS */
1696 616
1697void __init udp_init(void) 617void __init udp_init(void)
@@ -1718,8 +638,6 @@ EXPORT_SYMBOL(udp_hash);
1718EXPORT_SYMBOL(udp_hash_lock); 638EXPORT_SYMBOL(udp_hash_lock);
1719EXPORT_SYMBOL(udp_ioctl); 639EXPORT_SYMBOL(udp_ioctl);
1720EXPORT_SYMBOL(udp_get_port); 640EXPORT_SYMBOL(udp_get_port);
1721EXPORT_SYMBOL(udp_prot);
1722EXPORT_SYMBOL(udp_sendmsg);
1723EXPORT_SYMBOL(udp_lib_getsockopt); 641EXPORT_SYMBOL(udp_lib_getsockopt);
1724EXPORT_SYMBOL(udp_lib_setsockopt); 642EXPORT_SYMBOL(udp_lib_setsockopt);
1725EXPORT_SYMBOL(udp_poll); 643EXPORT_SYMBOL(udp_poll);
diff --git a/net/ipv4/udp_ipv4.c b/net/ipv4/udp_ipv4.c
new file mode 100644
index 000000000000..40978de7fb51
--- /dev/null
+++ b/net/ipv4/udp_ipv4.c
@@ -0,0 +1,1134 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * UDP for IPv4.
7 *
8 * For full credits, see net/ipv4/udp.c.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16#include <asm/system.h>
17#include <asm/uaccess.h>
18#include <asm/ioctls.h>
19#include <linux/bootmem.h>
20#include <linux/types.h>
21#include <linux/fcntl.h>
22#include <linux/module.h>
23#include <linux/socket.h>
24#include <linux/sockios.h>
25#include <linux/igmp.h>
26#include <linux/in.h>
27#include <linux/errno.h>
28#include <linux/timer.h>
29#include <linux/mm.h>
30#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <net/tcp_states.h>
33#include <linux/skbuff.h>
34#include <linux/proc_fs.h>
35#include <linux/seq_file.h>
36#include <net/net_namespace.h>
37#include <net/icmp.h>
38#include <net/route.h>
39#include <net/checksum.h>
40#include <net/xfrm.h>
41#include "udp_impl.h"
42
43int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
44{
45 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
46
47 return ( !ipv6_only_sock(sk2) &&
48 (!inet1->rcv_saddr || !inet2->rcv_saddr ||
49 inet1->rcv_saddr == inet2->rcv_saddr ));
50}
51
52static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
53{
54 return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
55}
56
57/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
58 * harder than this. -DaveM
59 */
60static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
61 __be16 sport, __be32 daddr, __be16 dport,
62 int dif, struct hlist_head udptable[])
63{
64 struct sock *sk, *result = NULL;
65 struct hlist_node *node;
66 unsigned short hnum = ntohs(dport);
67 int badness = -1;
68
69 read_lock(&udp_hash_lock);
70 sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
71 struct inet_sock *inet = inet_sk(sk);
72
73 if (sk->sk_net == net && sk->sk_hash == hnum &&
74 !ipv6_only_sock(sk)) {
75 int score = (sk->sk_family == PF_INET ? 1 : 0);
76 if (inet->rcv_saddr) {
77 if (inet->rcv_saddr != daddr)
78 continue;
79 score+=2;
80 }
81 if (inet->daddr) {
82 if (inet->daddr != saddr)
83 continue;
84 score+=2;
85 }
86 if (inet->dport) {
87 if (inet->dport != sport)
88 continue;
89 score+=2;
90 }
91 if (sk->sk_bound_dev_if) {
92 if (sk->sk_bound_dev_if != dif)
93 continue;
94 score+=2;
95 }
96 if (score == 9) {
97 result = sk;
98 break;
99 } else if (score > badness) {
100 result = sk;
101 badness = score;
102 }
103 }
104 }
105 if (result)
106 sock_hold(result);
107 read_unlock(&udp_hash_lock);
108 return result;
109}
110
111static inline struct sock *udp_v4_mcast_next(struct sock *sk,
112 __be16 loc_port, __be32 loc_addr,
113 __be16 rmt_port, __be32 rmt_addr,
114 int dif)
115{
116 struct hlist_node *node;
117 struct sock *s = sk;
118 unsigned short hnum = ntohs(loc_port);
119
120 sk_for_each_from(s, node) {
121 struct inet_sock *inet = inet_sk(s);
122
123 if (s->sk_hash != hnum ||
124 (inet->daddr && inet->daddr != rmt_addr) ||
125 (inet->dport != rmt_port && inet->dport) ||
126 (inet->rcv_saddr && inet->rcv_saddr != loc_addr) ||
127 ipv6_only_sock(s) ||
128 (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
129 continue;
130 if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif))
131 continue;
132 goto found;
133 }
134 s = NULL;
135found:
136 return s;
137}
138
139/*
140 * This routine is called by the ICMP module when it gets some
141 * sort of error condition. If err < 0 then the socket should
142 * be closed and the error returned to the user. If err > 0
143 * it's just the icmp type << 8 | icmp code.
144 * Header points to the ip header of the error packet. We move
145 * on past this. Then (as it used to claim before adjustment)
146 * header points to the first 8 bytes of the udp header. We need
147 * to find the appropriate port.
148 */
149
150void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
151{
152 struct inet_sock *inet;
153 struct iphdr *iph = (struct iphdr*)skb->data;
154 struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
155 const int type = icmp_hdr(skb)->type;
156 const int code = icmp_hdr(skb)->code;
157 struct sock *sk;
158 int harderr;
159 int err;
160
161 sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest,
162 iph->saddr, uh->source, skb->dev->ifindex, udptable);
163 if (sk == NULL) {
164 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
165 return; /* No socket for error */
166 }
167
168 err = 0;
169 harderr = 0;
170 inet = inet_sk(sk);
171
172 switch (type) {
173 default:
174 case ICMP_TIME_EXCEEDED:
175 err = EHOSTUNREACH;
176 break;
177 case ICMP_SOURCE_QUENCH:
178 goto out;
179 case ICMP_PARAMETERPROB:
180 err = EPROTO;
181 harderr = 1;
182 break;
183 case ICMP_DEST_UNREACH:
184 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
185 if (inet->pmtudisc != IP_PMTUDISC_DONT) {
186 err = EMSGSIZE;
187 harderr = 1;
188 break;
189 }
190 goto out;
191 }
192 err = EHOSTUNREACH;
193 if (code <= NR_ICMP_UNREACH) {
194 harderr = icmp_err_convert[code].fatal;
195 err = icmp_err_convert[code].errno;
196 }
197 break;
198 }
199
200 /*
201 * RFC1122: OK. Passes ICMP errors back to application, as per
202 * 4.1.3.3.
203 */
204 if (!inet->recverr) {
205 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
206 goto out;
207 } else {
208 ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
209 }
210 sk->sk_err = err;
211 sk->sk_error_report(sk);
212out:
213 sock_put(sk);
214}
215
216void udp_err(struct sk_buff *skb, u32 info)
217{
218 __udp4_lib_err(skb, info, udp_hash);
219}
220
221/*
222 * Throw away all pending data and cancel the corking. Socket is locked.
223 */
224static void udp_flush_pending_frames(struct sock *sk)
225{
226 struct udp_sock *up = udp_sk(sk);
227
228 if (up->pending) {
229 up->len = 0;
230 up->pending = 0;
231 ip_flush_pending_frames(sk);
232 }
233}
234
235/**
236 * udp4_hwcsum_outgoing - handle outgoing HW checksumming
237 * @sk: socket we are sending on
238 * @skb: sk_buff containing the filled-in UDP header
239 * (checksum field must be zeroed out)
240 */
241static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
242 __be32 src, __be32 dst, int len )
243{
244 unsigned int offset;
245 struct udphdr *uh = udp_hdr(skb);
246 __wsum csum = 0;
247
248 if (skb_queue_len(&sk->sk_write_queue) == 1) {
249 /*
250 * Only one fragment on the socket.
251 */
252 skb->csum_start = skb_transport_header(skb) - skb->head;
253 skb->csum_offset = offsetof(struct udphdr, check);
254 uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
255 } else {
256 /*
257 * HW-checksum won't work as there are two or more
258 * fragments on the socket so that all csums of sk_buffs
259 * should be together
260 */
261 offset = skb_transport_offset(skb);
262 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
263
264 skb->ip_summed = CHECKSUM_NONE;
265
266 skb_queue_walk(&sk->sk_write_queue, skb) {
267 csum = csum_add(csum, skb->csum);
268 }
269
270 uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
271 if (uh->check == 0)
272 uh->check = CSUM_MANGLED_0;
273 }
274}
275
276/*
277 * Push out all pending data as one UDP datagram. Socket is locked.
278 */
279static int udp_push_pending_frames(struct sock *sk)
280{
281 struct udp_sock *up = udp_sk(sk);
282 struct inet_sock *inet = inet_sk(sk);
283 struct flowi *fl = &inet->cork.fl;
284 struct sk_buff *skb;
285 struct udphdr *uh;
286 int err = 0;
287 int is_udplite = IS_UDPLITE(sk);
288 __wsum csum = 0;
289
290 /* Grab the skbuff where UDP header space exists. */
291 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
292 goto out;
293
294 /*
295 * Create a UDP header
296 */
297 uh = udp_hdr(skb);
298 uh->source = fl->fl_ip_sport;
299 uh->dest = fl->fl_ip_dport;
300 uh->len = htons(up->len);
301 uh->check = 0;
302
303 if (is_udplite) /* UDP-Lite */
304 csum = udplite_csum_outgoing(sk, skb);
305
306 else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
307
308 skb->ip_summed = CHECKSUM_NONE;
309 goto send;
310
311 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
312
313 udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len);
314 goto send;
315
316 } else /* `normal' UDP */
317 csum = udp_csum_outgoing(sk, skb);
318
319 /* add protocol-dependent pseudo-header */
320 uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
321 sk->sk_protocol, csum );
322 if (uh->check == 0)
323 uh->check = CSUM_MANGLED_0;
324
325send:
326 err = ip_push_pending_frames(sk);
327out:
328 up->len = 0;
329 up->pending = 0;
330 if (!err)
331 UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
332 return err;
333}
334
335int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
336 size_t len)
337{
338 struct inet_sock *inet = inet_sk(sk);
339 struct udp_sock *up = udp_sk(sk);
340 int ulen = len;
341 struct ipcm_cookie ipc;
342 struct rtable *rt = NULL;
343 int free = 0;
344 int connected = 0;
345 __be32 daddr, faddr, saddr;
346 __be16 dport;
347 u8 tos;
348 int err, is_udplite = IS_UDPLITE(sk);
349 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
350 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
351
352 if (len > 0xFFFF)
353 return -EMSGSIZE;
354
355 /*
356 * Check the flags.
357 */
358
359 if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */
360 return -EOPNOTSUPP;
361
362 ipc.opt = NULL;
363
364 if (up->pending) {
365 /*
366 * There are pending frames.
367 * The socket lock must be held while it's corked.
368 */
369 lock_sock(sk);
370 if (likely(up->pending)) {
371 if (unlikely(up->pending != AF_INET)) {
372 release_sock(sk);
373 return -EINVAL;
374 }
375 goto do_append_data;
376 }
377 release_sock(sk);
378 }
379 ulen += sizeof(struct udphdr);
380
381 /*
382 * Get and verify the address.
383 */
384 if (msg->msg_name) {
385 struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
386 if (msg->msg_namelen < sizeof(*usin))
387 return -EINVAL;
388 if (usin->sin_family != AF_INET) {
389 if (usin->sin_family != AF_UNSPEC)
390 return -EAFNOSUPPORT;
391 }
392
393 daddr = usin->sin_addr.s_addr;
394 dport = usin->sin_port;
395 if (dport == 0)
396 return -EINVAL;
397 } else {
398 if (sk->sk_state != TCP_ESTABLISHED)
399 return -EDESTADDRREQ;
400 daddr = inet->daddr;
401 dport = inet->dport;
402 /* Open fast path for connected socket.
403 Route will not be used, if at least one option is set.
404 */
405 connected = 1;
406 }
407 ipc.addr = inet->saddr;
408
409 ipc.oif = sk->sk_bound_dev_if;
410 if (msg->msg_controllen) {
411 err = ip_cmsg_send(msg, &ipc);
412 if (err)
413 return err;
414 if (ipc.opt)
415 free = 1;
416 connected = 0;
417 }
418 if (!ipc.opt)
419 ipc.opt = inet->opt;
420
421 saddr = ipc.addr;
422 ipc.addr = faddr = daddr;
423
424 if (ipc.opt && ipc.opt->srr) {
425 if (!daddr)
426 return -EINVAL;
427 faddr = ipc.opt->faddr;
428 connected = 0;
429 }
430 tos = RT_TOS(inet->tos);
431 if (sock_flag(sk, SOCK_LOCALROUTE) ||
432 (msg->msg_flags & MSG_DONTROUTE) ||
433 (ipc.opt && ipc.opt->is_strictroute)) {
434 tos |= RTO_ONLINK;
435 connected = 0;
436 }
437
438 if (ipv4_is_multicast(daddr)) {
439 if (!ipc.oif)
440 ipc.oif = inet->mc_index;
441 if (!saddr)
442 saddr = inet->mc_addr;
443 connected = 0;
444 }
445
446 if (connected)
447 rt = (struct rtable*)sk_dst_check(sk, 0);
448
449 if (rt == NULL) {
450 struct flowi fl = { .oif = ipc.oif,
451 .nl_u = { .ip4_u =
452 { .daddr = faddr,
453 .saddr = saddr,
454 .tos = tos } },
455 .proto = sk->sk_protocol,
456 .uli_u = { .ports =
457 { .sport = inet->sport,
458 .dport = dport } } };
459 security_sk_classify_flow(sk, &fl);
460 err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1);
461 if (err) {
462 if (err == -ENETUNREACH)
463 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
464 goto out;
465 }
466
467 err = -EACCES;
468 if ((rt->rt_flags & RTCF_BROADCAST) &&
469 !sock_flag(sk, SOCK_BROADCAST))
470 goto out;
471 if (connected)
472 sk_dst_set(sk, dst_clone(&rt->u.dst));
473 }
474
475 if (msg->msg_flags&MSG_CONFIRM)
476 goto do_confirm;
477back_from_confirm:
478
479 saddr = rt->rt_src;
480 if (!ipc.addr)
481 daddr = ipc.addr = rt->rt_dst;
482
483 lock_sock(sk);
484 if (unlikely(up->pending)) {
485 /* The socket is already corked while preparing it. */
486 /* ... which is an evident application bug. --ANK */
487 release_sock(sk);
488
489 LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
490 err = -EINVAL;
491 goto out;
492 }
493 /*
494 * Now cork the socket to pend data.
495 */
496 inet->cork.fl.fl4_dst = daddr;
497 inet->cork.fl.fl_ip_dport = dport;
498 inet->cork.fl.fl4_src = saddr;
499 inet->cork.fl.fl_ip_sport = inet->sport;
500 up->pending = AF_INET;
501
502do_append_data:
503 up->len += ulen;
504 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
505 err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
506 sizeof(struct udphdr), &ipc, rt,
507 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
508 if (err)
509 udp_flush_pending_frames(sk);
510 else if (!corkreq)
511 err = udp_push_pending_frames(sk);
512 else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
513 up->pending = 0;
514 release_sock(sk);
515
516out:
517 ip_rt_put(rt);
518 if (free)
519 kfree(ipc.opt);
520 if (!err)
521 return len;
522 /*
523 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
524 * ENOBUFS might not be good (it's not tunable per se), but otherwise
525 * we don't have a good statistic (IpOutDiscards but it can be too many
526 * things). We could add another new stat but at least for now that
527 * seems like overkill.
528 */
529 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
530 UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite);
531 }
532 return err;
533
534do_confirm:
535 dst_confirm(&rt->u.dst);
536 if (!(msg->msg_flags&MSG_PROBE) || len)
537 goto back_from_confirm;
538 err = 0;
539 goto out;
540}
541
542int udp_sendpage(struct sock *sk, struct page *page, int offset,
543 size_t size, int flags)
544{
545 struct udp_sock *up = udp_sk(sk);
546 int ret;
547
548 if (!up->pending) {
549 struct msghdr msg = { .msg_flags = flags|MSG_MORE };
550
551 /* Call udp_sendmsg to specify destination address which
552 * sendpage interface can't pass.
553 * This will succeed only when the socket is connected.
554 */
555 ret = udp_sendmsg(NULL, sk, &msg, 0);
556 if (ret < 0)
557 return ret;
558 }
559
560 lock_sock(sk);
561
562 if (unlikely(!up->pending)) {
563 release_sock(sk);
564
565 LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n");
566 return -EINVAL;
567 }
568
569 ret = ip_append_page(sk, page, offset, size, flags);
570 if (ret == -EOPNOTSUPP) {
571 release_sock(sk);
572 return sock_no_sendpage(sk->sk_socket, page, offset,
573 size, flags);
574 }
575 if (ret < 0) {
576 udp_flush_pending_frames(sk);
577 goto out;
578 }
579
580 up->len += size;
581 if (!(up->corkflag || (flags&MSG_MORE)))
582 ret = udp_push_pending_frames(sk);
583 if (!ret)
584 ret = size;
585out:
586 release_sock(sk);
587 return ret;
588}
589
590/*
591 * This should be easy, if there is something there we
592 * return it, otherwise we block.
593 */
594
595int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
596 size_t len, int noblock, int flags, int *addr_len)
597{
598 struct inet_sock *inet = inet_sk(sk);
599 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
600 struct sk_buff *skb;
601 unsigned int ulen, copied;
602 int peeked;
603 int err;
604 int is_udplite = IS_UDPLITE(sk);
605
606 /*
607 * Check any passed addresses
608 */
609 if (addr_len)
610 *addr_len=sizeof(*sin);
611
612 if (flags & MSG_ERRQUEUE)
613 return ip_recv_error(sk, msg, len);
614
615try_again:
616 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
617 &peeked, &err);
618 if (!skb)
619 goto out;
620
621 ulen = skb->len - sizeof(struct udphdr);
622 copied = len;
623 if (copied > ulen)
624 copied = ulen;
625 else if (copied < ulen)
626 msg->msg_flags |= MSG_TRUNC;
627
628 /*
629 * If checksum is needed at all, try to do it while copying the
630 * data. If the data is truncated, or if we only want a partial
631 * coverage checksum (UDP-Lite), do it before the copy.
632 */
633
634 if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
635 if (udp_lib_checksum_complete(skb))
636 goto csum_copy_err;
637 }
638
639 if (skb_csum_unnecessary(skb))
640 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
641 msg->msg_iov, copied );
642 else {
643 err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
644
645 if (err == -EINVAL)
646 goto csum_copy_err;
647 }
648
649 if (err)
650 goto out_free;
651
652 if (!peeked)
653 UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
654
655 sock_recv_timestamp(msg, sk, skb);
656
657 /* Copy the address. */
658 if (sin)
659 {
660 sin->sin_family = AF_INET;
661 sin->sin_port = udp_hdr(skb)->source;
662 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
663 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
664 }
665 if (inet->cmsg_flags)
666 ip_cmsg_recv(msg, skb);
667
668 err = copied;
669 if (flags & MSG_TRUNC)
670 err = ulen;
671
672out_free:
673 lock_sock(sk);
674 skb_free_datagram(sk, skb);
675 release_sock(sk);
676out:
677 return err;
678
679csum_copy_err:
680 lock_sock(sk);
681 if (!skb_kill_datagram(sk, skb, flags))
682 UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
683 release_sock(sk);
684
685 if (noblock)
686 return -EAGAIN;
687 goto try_again;
688}
689
690
691/* returns:
692 * -1: error
693 * 0: success
694 * >0: "udp encap" protocol resubmission
695 *
696 * Note that in the success and error cases, the skb is assumed to
697 * have either been requeued or freed.
698 */
699int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
700{
701 struct udp_sock *up = udp_sk(sk);
702 int rc;
703 int is_udplite = IS_UDPLITE(sk);
704
705 /*
706 * Charge it to the socket, dropping if the queue is full.
707 */
708 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
709 goto drop;
710 nf_reset(skb);
711
712 if (up->encap_type) {
713 /*
714 * This is an encapsulation socket so pass the skb to
715 * the socket's udp_encap_rcv() hook. Otherwise, just
716 * fall through and pass this up the UDP socket.
717 * up->encap_rcv() returns the following value:
718 * =0 if skb was successfully passed to the encap
719 * handler or was discarded by it.
720 * >0 if skb should be passed on to UDP.
721 * <0 if skb should be resubmitted as proto -N
722 */
723
724 /* if we're overly short, let UDP handle it */
725 if (skb->len > sizeof(struct udphdr) &&
726 up->encap_rcv != NULL) {
727 int ret;
728
729 ret = (*up->encap_rcv)(sk, skb);
730 if (ret <= 0) {
731 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS,
732 is_udplite);
733 return -ret;
734 }
735 }
736
737 /* FALLTHROUGH -- it's a UDP Packet */
738 }
739
740 /*
741 * UDP-Lite specific tests, ignored on UDP sockets
742 */
743 if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
744
745 /*
746 * MIB statistics other than incrementing the error count are
747 * disabled for the following two types of errors: these depend
748 * on the application settings, not on the functioning of the
749 * protocol stack as such.
750 *
751 * RFC 3828 here recommends (sec 3.3): "There should also be a
752 * way ... to ... at least let the receiving application block
753 * delivery of packets with coverage values less than a value
754 * provided by the application."
755 */
756 if (up->pcrlen == 0) { /* full coverage was set */
757 LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage "
758 "%d while full coverage %d requested\n",
759 UDP_SKB_CB(skb)->cscov, skb->len);
760 goto drop;
761 }
762 /* The next case involves violating the min. coverage requested
763 * by the receiver. This is subtle: if receiver wants x and x is
764 * greater than the buffersize/MTU then receiver will complain
765 * that it wants x while sender emits packets of smaller size y.
766 * Therefore the above ...()->partial_cov statement is essential.
767 */
768 if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
769 LIMIT_NETDEBUG(KERN_WARNING
770 "UDPLITE: coverage %d too small, need min %d\n",
771 UDP_SKB_CB(skb)->cscov, up->pcrlen);
772 goto drop;
773 }
774 }
775
776 if (sk->sk_filter) {
777 if (udp_lib_checksum_complete(skb))
778 goto drop;
779 }
780
781 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
782 /* Note that an ENOMEM error is charged twice */
783 if (rc == -ENOMEM)
784 UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
785 goto drop;
786 }
787
788 return 0;
789
790drop:
791 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
792 kfree_skb(skb);
793 return -1;
794}
795
796/*
797 * Multicasts and broadcasts go to each listener.
798 *
799 * Note: called only from the BH handler context,
800 * so we don't need to lock the hashes.
801 */
802static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
803 struct udphdr *uh,
804 __be32 saddr, __be32 daddr,
805 struct hlist_head udptable[])
806{
807 struct sock *sk;
808 int dif;
809
810 read_lock(&udp_hash_lock);
811 sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
812 dif = skb->dev->ifindex;
813 sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
814 if (sk) {
815 struct sock *sknext = NULL;
816
817 do {
818 struct sk_buff *skb1 = skb;
819
820 sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
821 uh->source, saddr, dif);
822 if (sknext)
823 skb1 = skb_clone(skb, GFP_ATOMIC);
824
825 if (skb1) {
826 int ret = 0;
827
828 bh_lock_sock_nested(sk);
829 if (!sock_owned_by_user(sk))
830 ret = udp_queue_rcv_skb(sk, skb1);
831 else
832 sk_add_backlog(sk, skb1);
833 bh_unlock_sock(sk);
834
835 if (ret > 0)
836 /* we should probably re-process instead
837 * of dropping packets here. */
838 kfree_skb(skb1);
839 }
840 sk = sknext;
841 } while (sknext);
842 } else
843 kfree_skb(skb);
844 read_unlock(&udp_hash_lock);
845 return 0;
846}
847
848/* Initialize UDP checksum. If exited with zero value (success),
849 * CHECKSUM_UNNECESSARY means, that no more checks are required.
850 * Otherwise, csum completion requires chacksumming packet body,
851 * including udp header and folding it to skb->csum.
852 */
853static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
854 int proto)
855{
856 const struct iphdr *iph;
857 int err;
858
859 UDP_SKB_CB(skb)->partial_cov = 0;
860 UDP_SKB_CB(skb)->cscov = skb->len;
861
862 if (IS_PROTO_UDPLITE(proto)) {
863 err = udplite_checksum_init(skb, uh);
864 if (err)
865 return err;
866 }
867
868 iph = ip_hdr(skb);
869 if (uh->check == 0) {
870 skb->ip_summed = CHECKSUM_UNNECESSARY;
871 } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
872 if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
873 proto, skb->csum))
874 skb->ip_summed = CHECKSUM_UNNECESSARY;
875 }
876 if (!skb_csum_unnecessary(skb))
877 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
878 skb->len, proto, 0);
879 /* Probably, we should checksum udp header (it should be in cache
880 * in any case) and data in tiny packets (< rx copybreak).
881 */
882
883 return 0;
884}
885
886/*
887 * All we need to do is get the socket, and then do a checksum.
888 */
889
890int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
891 int proto)
892{
893 struct sock *sk;
894 struct udphdr *uh = udp_hdr(skb);
895 unsigned short ulen;
896 struct rtable *rt = (struct rtable*)skb->dst;
897 __be32 saddr = ip_hdr(skb)->saddr;
898 __be32 daddr = ip_hdr(skb)->daddr;
899
900 /*
901 * Validate the packet.
902 */
903 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
904 goto drop; /* No space for header. */
905
906 ulen = ntohs(uh->len);
907 if (ulen > skb->len)
908 goto short_packet;
909
910 if (IS_PROTO_UDPLITE(proto)) {
911 /* UDP validates ulen. */
912 if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
913 goto short_packet;
914 uh = udp_hdr(skb);
915 }
916
917 if (udp4_csum_init(skb, uh, proto))
918 goto csum_error;
919
920 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
921 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
922
923 sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr,
924 uh->dest, inet_iif(skb), udptable);
925
926 if (sk != NULL) {
927 int ret = 0;
928 bh_lock_sock_nested(sk);
929 if (!sock_owned_by_user(sk))
930 ret = udp_queue_rcv_skb(sk, skb);
931 else
932 sk_add_backlog(sk, skb);
933 bh_unlock_sock(sk);
934 sock_put(sk);
935
936 /* a return value > 0 means to resubmit the input, but
937 * it wants the return to be -protocol, or 0
938 */
939 if (ret > 0)
940 return -ret;
941 return 0;
942 }
943
944 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
945 goto drop;
946 nf_reset(skb);
947
948 /* No socket. Drop packet silently, if checksum is wrong */
949 if (udp_lib_checksum_complete(skb))
950 goto csum_error;
951
952 UDP_INC_STATS_BH(UDP_MIB_NOPORTS, IS_PROTO_UDPLITE(proto));
953 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
954
955 /*
956 * Hmm. We got an UDP packet to a port to which we
957 * don't wanna listen. Ignore it.
958 */
959 kfree_skb(skb);
960 return 0;
961
962short_packet:
963 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
964 IS_PROTO_UDPLITE(proto) ? "-Lite" : "",
965 NIPQUAD(saddr),
966 ntohs(uh->source),
967 ulen,
968 skb->len,
969 NIPQUAD(daddr),
970 ntohs(uh->dest));
971 goto drop;
972
973csum_error:
974 /*
975 * RFC1122: OK. Discards the bad packet silently (as far as
976 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
977 */
978 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
979 IS_PROTO_UDPLITE(proto) ? "-Lite" : "",
980 NIPQUAD(saddr),
981 ntohs(uh->source),
982 NIPQUAD(daddr),
983 ntohs(uh->dest),
984 ulen);
985drop:
986 UDP_INC_STATS_BH(UDP_MIB_INERRORS, IS_PROTO_UDPLITE(proto));
987 kfree_skb(skb);
988 return 0;
989}
990
991int udp_rcv(struct sk_buff *skb)
992{
993 return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
994}
995
996int udp_destroy_sock(struct sock *sk)
997{
998 lock_sock(sk);
999 udp_flush_pending_frames(sk);
1000 release_sock(sk);
1001 return 0;
1002}
1003
1004int udp_setsockopt(struct sock *sk, int level, int optname,
1005 char __user *optval, int optlen)
1006{
1007 if (IS_SOL_UDPFAMILY(level))
1008 return udp_lib_setsockopt(sk, level, optname, optval, optlen,
1009 udp_push_pending_frames);
1010 return ip_setsockopt(sk, level, optname, optval, optlen);
1011}
1012
1013#ifdef CONFIG_COMPAT
1014int compat_udp_setsockopt(struct sock *sk, int level, int optname,
1015 char __user *optval, int optlen)
1016{
1017 if (IS_SOL_UDPFAMILY(level))
1018 return udp_lib_setsockopt(sk, level, optname, optval, optlen,
1019 udp_push_pending_frames);
1020 return compat_ip_setsockopt(sk, level, optname, optval, optlen);
1021}
1022#endif
1023
1024int udp_getsockopt(struct sock *sk, int level, int optname,
1025 char __user *optval, int __user *optlen)
1026{
1027 if (IS_SOL_UDPFAMILY(level))
1028 return udp_lib_getsockopt(sk, level, optname, optval, optlen);
1029 return ip_getsockopt(sk, level, optname, optval, optlen);
1030}
1031
1032#ifdef CONFIG_COMPAT
1033int compat_udp_getsockopt(struct sock *sk, int level, int optname,
1034 char __user *optval, int __user *optlen)
1035{
1036 if (IS_SOL_UDPFAMILY(level))
1037 return udp_lib_getsockopt(sk, level, optname, optval, optlen);
1038 return compat_ip_getsockopt(sk, level, optname, optval, optlen);
1039}
1040#endif
1041
1042/* ------------------------------------------------------------------------ */
1043DEFINE_PROTO_INUSE(udp)
1044
1045struct proto udp_prot = {
1046 .name = "UDP",
1047 .owner = THIS_MODULE,
1048 .close = udp_lib_close,
1049 .connect = ip4_datagram_connect,
1050 .disconnect = udp_disconnect,
1051 .ioctl = udp_ioctl,
1052 .destroy = udp_destroy_sock,
1053 .setsockopt = udp_setsockopt,
1054 .getsockopt = udp_getsockopt,
1055 .sendmsg = udp_sendmsg,
1056 .recvmsg = udp_recvmsg,
1057 .sendpage = udp_sendpage,
1058 .backlog_rcv = udp_queue_rcv_skb,
1059 .hash = udp_lib_hash,
1060 .unhash = udp_lib_unhash,
1061 .get_port = udp_v4_get_port,
1062 .memory_allocated = &udp_memory_allocated,
1063 .sysctl_mem = sysctl_udp_mem,
1064 .sysctl_wmem = &sysctl_udp_wmem_min,
1065 .sysctl_rmem = &sysctl_udp_rmem_min,
1066 .obj_size = sizeof(struct udp_sock),
1067#ifdef CONFIG_COMPAT
1068 .compat_setsockopt = compat_udp_setsockopt,
1069 .compat_getsockopt = compat_udp_getsockopt,
1070#endif
1071 REF_PROTO_INUSE(udp)
1072};
1073
1074/* ------------------------------------------------------------------------ */
1075static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket)
1076{
1077 struct inet_sock *inet = inet_sk(sp);
1078 __be32 dest = inet->daddr;
1079 __be32 src = inet->rcv_saddr;
1080 __u16 destp = ntohs(inet->dport);
1081 __u16 srcp = ntohs(inet->sport);
1082
1083 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
1084 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p",
1085 bucket, src, srcp, dest, destp, sp->sk_state,
1086 atomic_read(&sp->sk_wmem_alloc),
1087 atomic_read(&sp->sk_rmem_alloc),
1088 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
1089 atomic_read(&sp->sk_refcnt), sp);
1090}
1091
1092int udp4_seq_show(struct seq_file *seq, void *v)
1093{
1094 if (v == SEQ_START_TOKEN)
1095 seq_printf(seq, "%-127s\n",
1096 " sl local_address rem_address st tx_queue "
1097 "rx_queue tr tm->when retrnsmt uid timeout "
1098 "inode");
1099 else {
1100 char tmpbuf[129];
1101 struct udp_iter_state *state = seq->private;
1102
1103 udp4_format_sock(v, tmpbuf, state->bucket);
1104 seq_printf(seq, "%-127s\n", tmpbuf);
1105 }
1106 return 0;
1107}
1108
1109/* ------------------------------------------------------------------------ */
1110#ifdef CONFIG_PROC_FS
1111static struct file_operations udp4_seq_fops;
1112static struct udp_seq_afinfo udp4_seq_afinfo = {
1113 .owner = THIS_MODULE,
1114 .name = "udp",
1115 .family = AF_INET,
1116 .hashtable = udp_hash,
1117 .seq_show = udp4_seq_show,
1118 .seq_fops = &udp4_seq_fops,
1119};
1120
1121int __init udp4_proc_init(void)
1122{
1123 return udp_proc_register(&udp4_seq_afinfo);
1124}
1125
1126void udp4_proc_exit(void)
1127{
1128 udp_proc_unregister(&udp4_seq_afinfo);
1129}
1130#endif /* CONFIG_PROC_FS */
1131
1132EXPORT_SYMBOL(udp_prot);
1133EXPORT_SYMBOL(udp_sendmsg);
1134
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite_ipv4.c
index 001b881ca36f..001b881ca36f 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite_ipv4.c